#!/bin/bash

## Copyright (C) 2022 - 2023 ENCRYPTED SUPPORT LP <adrelanos@whonix.org>
## See the file COPYING for copying conditions.

#set -x
set -e
set -o errexit
set -o nounset
set -o errtrace
set -o pipefail

true "$0: START."

### XXX: hardcoded
folder=~/derivative-maker
wrapper=~/derivative-maker/packages/kicksecure/helper-scripts/usr/bin/grep-find-unicode-wrapper

test -d "$folder"
test -x "$wrapper"
cd "$folder"

## overwrite with '|| true' because `grep` exits non-zero if no match was found.
##
## Using because a real name contains a special character.
## XXX: This is clearly a non-ideal solution but fixing this is an issue for
##      whole Free and Open Source community. See also:
##      https://forums.whonix.org/t/detecting-malicious-unicode-in-source-code-and-pull-requests/13754
## --exclude=LICENSE
## --exclude=lkrg-openrc.sh
## https://github.com/grml/grml-debootstrap/issues/219
grep_find_unicode_wrapper_output="$(\
   "$wrapper" \
      --recursive \
      --binary-files=without-match \
      --exclude=control.authcookie \
      --exclude=LICENSE \
      --exclude=lkrg-openrc.sh \
      --exclude=changelog \
      --exclude=changelog.upstream \
      --exclude-dir=.git \
      -- \
      "./" \
   )" \
   || true

whitelist_list=(
   './live-build/debian/control'
   './live-build/debian/copyright'
   './live-build/manpages/po/fr/.*'
   './live-build/manpages/fr/*'
   './live-build/manpages/ja/*'
   './live-build/manpages/po/ja/.*'
   './packages/kicksecure/kicksecure-base-files/debian/copyright'
)
whitelist_pattern="($(IFS=$'\n'; echo -n "${whitelist_list[*]}" | sed -z 's/\n/|/g'))";

filtered_output=$(echo "$grep_find_unicode_wrapper_output" | grep --invert-match --extended-regexp -- "$whitelist_pattern" || true)

if [ -z "$filtered_output" ]; then
   true "INFO: No Unicode issues found, everything is OK."
else
   echo "$filtered_output" >&2

   true "\
$0: ERROR: Unicode found!

See also:
https://forums.whonix.org/t/detecting-malicious-unicode-in-source-code-and-pull-requests/13754
"
   exit 1
fi

true "$0: END."
