#!/usr/bin/python3 -su

## Copyright (C) 2025 - 2025 ENCRYPTED SUPPORT LLC <adrelanos@whonix.org>
## See the file COPYING for copying conditions.

"""
Writes individual test files, each containing a suspicious Unicode character
that might be missed by basic scanners. Characters include ASCII control
characters and invisible or formatting Unicode characters.

Each character is encoded using its codepoint (not literal Unicode) and written
into a separate file named 1.txt, 2.txt, etc.

Most likely folder:
    $HOME/suspicious-unicode-tests-temp-folder/suspicious-unicode-test

Example file:
    $HOME/suspicious-unicode-tests-temp-folder/suspicious-unicode-test/1.txt
"""

import os
import sys
import unicodedata

def write_test_files(base_dir):
    # List of suspicious Unicode codepoints to include (decimal form)
    codepoints = [
        1,       # START OF HEADING
        7,       # BELL
        11,      # VERTICAL TAB
        27,      # ESCAPE
        127,     # DELETE
        0x00AD,  # SOFT HYPHEN
        0x200B,  # ZERO WIDTH SPACE
        0x200E,  # LEFT-TO-RIGHT MARK
        0x200F,  # RIGHT-TO-LEFT MARK
        0x2060,  # WORD JOINER
        0xFEFF,  # ZERO WIDTH NO-BREAK SPACE (BOM)
    ]

    os.makedirs(base_dir, exist_ok=True)

    for i, cp in enumerate(codepoints, start=1):
        char = chr(cp)
        try:
            hex_cp = f"U+{cp:04X}"
            try:
                name = unicodedata.name(char)
            except ValueError:
                name = "UNKNOWN CHARACTER"
            line = f"Index: {i} | Decimal: {cp} | Codepoint: {hex_cp} | Name: {name} | Character: {char}"
            file_path = os.path.join(base_dir, f"{i}.txt")
            print(file_path)
            with open(file_path, 'w', encoding='utf-8') as f:
                f.write(line)
                f.write("\n")
        except UnicodeEncodeError as e:
            print(f"Error encoding U+{cp:04X}: {e}")

if __name__ == "__main__":
    target_dir = sys.argv[1]
    write_test_files(target_dir)
