#!/bin/bash

echo "$0: START"

set -x
set -e

source /usr/share/mediawiki-shell/common

## These variables should be set by the calling script as environment variables.
## example:
#[[ -v SOURCE_WIKI_URL ]] || SOURCE_WIKI_URL='https://www.whonix.org/w'
check_vars_exist SOURCE_WIKI_URL QUERY_TYPE

[[ -v SOURCE_TARGET_API ]] || SOURCE_TARGET_API="${SOURCE_WIKI_URL}/api.php"
[[ -v 1 ]] || missing_variable "parameter 1 needs to be set to the allpages_file name. example: $0 /tmp/allpages.txt"

## https://www.mediawiki.org/wiki/Manual:Namespace
## https://www.kicksecure.com/w/api.php?action=query&meta=siteinfo&siprop=namespaces
## https://www.whonix.org/w/api.php?action=query&meta=siteinfo&siprop=namespaces
[[ -v wiki_namespace_list_default ]] || wiki_namespace_list_default="0 4 6 8 10 12 14"
## NOTE to self by Patrick: also set by other script:
## wiki-backup-with-mediawiki-shell
## 500: site-specific namespace Moved
## 274: site-specific namespace Widgets
#[[ -v wiki_namespace_list_extra ]] || wiki_namespace_list_extra="500 274"
[[ -v wiki_namespace_list_extra ]] || wiki_namespace_list_extra=""

[[ -v wiki_article_must_include_sanity_test ]] || wiki_article_must_include_sanity_test=""

api_extra_args=""
if [ "$QUERY_TYPE" = "allpages" ]; then
   list=allpages
   namespace_keyword=apnamespace
   api_continue_keyword=apcontinue
   api_limit_keyword_and_content="aplimit=500"
elif [ "$QUERY_TYPE" = "unreviewedpages" ]; then
   list=unreviewedpages
   namespace_keyword=urnamespace
   api_continue_keyword=urstart
   api_limit_keyword_and_content="urlimit=500"
elif [ "$QUERY_TYPE" = "querypage" ]; then
   list=querypage
   namespace_keyword=""
   api_continue_keyword=""
   api_limit_keyword_and_content="qplimit=500"
   api_extra_args="&qppage=Unusedimages"
   ## Upstream does not support name spaces for this API call.
   wiki_namespace_list_extra=""
   wiki_namespace_list_default=0
   ## https://www.kicksecure.com/w/api.php?action=query&list=querypage&qppage=Unusedimages
else
   echo "$0: ERROR: QUERY_TYPE must be set to either QUERY_TYPE=allpages or QUERY_TYPE=unreviewedpages"
   exit 1
fi

allpages_file="$1"
rm -f "$allpages_file"

echo "$0: INFO: allpages_file              : $allpages_file"
echo "$0: INFO: wiki_namespace_list_default: $wiki_namespace_list_default"
echo "$0: INFO: wiki_namespace_list_extra  : $wiki_namespace_list_extra"
echo "$0: INFO: SOURCE_WIKI_URL  : $SOURCE_WIKI_URL"
echo "$0: INFO: SOURCE_TARGET_API: $SOURCE_TARGET_API"
echo "$0: INFO: QUERY_TYPE: $QUERY_TYPE"

## Not required for public wiki.
#mw-logout
#mw-login

for wiki_namespace_item in $wiki_namespace_list_extra $wiki_namespace_list_default ; do
   echo "$0: INFO: wiki_namespace_item: $wiki_namespace_item"

   api_continue_or_not=""

   while true ; do
      echo "$0: INFO: api_full_link:"
      api_full_link="${SOURCE_TARGET_API}?&format=json&action=query&list=${list}&${namespace_keyword}=${wiki_namespace_item}&${api_limit_keyword_and_content}&${api_continue_keyword}=${api_continue_or_not}${api_extra_args}"
      echo "$api_full_link"

      query_result=$(\
         $curl \
            $curl_opts \
            "$api_full_link"
      )

      if [ "$QUERY_TYPE" = "allpages" ]; then
         echo "$query_result" | jq -r ".query.allpages[] | .title | @sh" | tee -a "$allpages_file" >/dev/null
         ## If curl exits non-zero, consider no longer API continue.
         ## If curl exits zero, set variable api_continue_or_not.
         if ! api_continue_or_not=$(echo "$query_result" | jq -r ".continue | .apcontinue") ; then
            break
         fi
         ## If api_continue_or_not is set to "null", we're done.
         if [ "$api_continue_or_not" = "null" ]; then
            break
         fi
      elif [ "$QUERY_TYPE" = "unreviewedpages" ]; then
         echo "$query_result" | jq -r '.query.unreviewedpages[] | .title | @sh' | tee -a "$allpages_file" >/dev/null
         if ! api_continue_or_not=$(echo "$query_result" | jq -r ".continue.urstart") ; then
            break
         fi
         if [ "$api_continue_or_not" = "null" ]; then
            break
         fi
      elif [ "$QUERY_TYPE" = "querypage" ]; then
         echo "$query_result" | jq -r '.query.querypage.results[] | .title' | tee -a "$allpages_file" >/dev/null
         ## Upstream does not support name spaces for this API call.
         break
      else
         error "Not implemented!"
         break
      fi
   done
done

test -f "$allpages_file"
result_test="$(cat "$allpages_file")"
if [ "$result_test" = "" ]; then
   echo "$0 ERROR: result_test is empty!" >&2
   exit 1
fi

if [ "$wiki_article_must_include_sanity_test" = "" ]; then
   echo "$0 INFO: wiki_article_must_include_sanity_test not configured, ok."
else
   ## TODO: echo too much. ARG_MAX potentially hit. grep based on file instead.
   if echo "$result_test" | grep -q -i "$wiki_article_must_include_sanity_test" ; then
      echo "$0 INFO: result_test does contain wiki_article_must_include_sanity_test '$wiki_article_must_include_sanity_test', ok. "
   else
      echo "$0 ERROR: See script source code. result_test does not contain '$wiki_article_must_include_sanity_test'! allpages_file: '$allpages_file'" >&2
      echo "" >&2
      echo "$0 result_test: '$result_test'" >&2
      echo "" >&2
      exit 1
   fi
fi
