#!/bin/bash

## Copyright (C) 2022 - 2025 ENCRYPTED SUPPORT LLC <adrelanos@whonix.org>
## See the file COPYING for copying conditions.

## https://www.kicksecure.com/wiki/Unicode#grep-find-unicode-wrapper

set -o errexit
set -o nounset
set -o errtrace
set -o pipefail

check_grep_status() {
  if [ "$1" = "0" ]; then
    true "$0: INFO: Match."
  elif [ "$1" = "1" ]; then
    true "$0: INFO: No match."
  else
    printf '%s\n' "$0: ERROR: grep (syntax?) error! Exiting with code code '$1'." >&2
    exit "$1"
  fi
}

command -v stecho >/dev/null
command -v sort >/dev/null

## end-of-options ("--"):
## There is intentionally not use of end-of-options, because this a wrapper
## around 'grep' and the user is supposed to inject their own command line
## options such as for example '--recursive'. It remains the responsibility of
## the user to use end-of-options.

## --binary-files=text - required to find backspace and null character
grep_args=(
  --files-with-matches
  --line-number
  --binary-files=text
)

set +o errexit

one="$(LC_ALL=C grep "${grep_args[@]}" --perl-regexp '[^\x00-\x7F]' "$@")"
check_grep_status "$?"

two="$(LC_ALL=C grep "${grep_args[@]}" --perl-regexp "[^[:ascii:]]" "$@")"
check_grep_status "$?"

## https://access.redhat.com/security/vulnerabilities/RHSB-2021-007
## https://lintian.debian.org/tags/unicode-trojan
##
## '--perl-regexp':
## Not using 'grep's '--perl-regexp' option for three.
## Because not mentioned in above links and can lead to the following error message:
# grep: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
three="$(LC_ALL=C grep "${grep_args[@]}" $'[\u061C\u200E\u200F\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069]' "$@")"
check_grep_status "$?"

## ASCII control characters.
four="$(LC_ALL=C grep "${grep_args[@]}" --perl-regexp '[\x00-\x08\x0B\x0C\x0D\x0E-\x1F\x7F]' "$@")"
check_grep_status "$?"

set -o errexit

#result="\
#$one
#$two
#$three
#$four"
## Problem: Extraneous newline at the end.
#output_message="$(printf '%s' "$result" | sort --unique)"

output_message="$(
  {
    [ -n "$one"   ] && printf '%s\n' "$one"
    [ -n "$two"   ] && printf '%s\n' "$two"
    [ -n "$three" ] && printf '%s\n' "$three"
    [ -n "$four"  ] && printf '%s\n' "$four"
  } | sort --unique
)"

if [ "$output_message" = "" ]; then
  ## No matches found, therefore 'exit 1'.
  ## This is consistent with 'grep', which also exits non-zero if no match has been found.
  exit 1
fi

## Use 'stecho' in case the file names itself contain unicode.
stecho "$output_message"

## Matches found, therefore 'exit 0'.
## This is consistent with 'grep', which also exits 0 if matches have been found.
