videocut/strip_suffix.py

import os
import re
import sys
import glob
import unicodedata


def _is_strippable_prefix_char(ch):
    """Return True if ch is an emoji/symbol/whitespace character that should
    be stripped when it appears at the start of a filename.
    """
    if ch.isspace():
        return True
    cp = ord(ch)
    # Variation selectors (e.g. U+FE0F after ❤) and zero-width joiner
    if 0xFE00 <= cp <= 0xFE0F or cp == 0x200D:
        return True
    # Common emoji / pictographic / symbol blocks
    emoji_ranges = (
        (0x2300, 0x23FF),    # Misc Technical (⏰ etc.)
        (0x2460, 0x24FF),    # Enclosed Alphanumerics
        (0x2500, 0x257F),    # Box Drawing
        (0x2580, 0x259F),    # Block Elements
        (0x25A0, 0x25FF),    # Geometric Shapes (⚫ is 0x26AB, but ▶ here)
        (0x2600, 0x26FF),    # Misc Symbols (⚫ ❤ ☀ ...)
        (0x2700, 0x27BF),    # Dingbats (✅ ✨ ...)
        (0x2B00, 0x2BFF),    # Misc Symbols and Arrows
        (0x1F000, 0x1FFFF),  # Supplementary symbols & emoji planes
    )
    for lo, hi in emoji_ranges:
        if lo <= cp <= hi:
            return True
    # Unicode general categories: Symbol (So/Sk/Sm) and format chars
    cat = unicodedata.category(ch)
    if cat in ('So', 'Sk', 'Sm', 'Cf'):
        return True
    return False


def strip_emoji_prefix(name):
    """Strip leading emoji/symbol/whitespace characters from name."""
    i = 0
    while i < len(name) and _is_strippable_prefix_char(name[i]):
        i += 1
    return name[i:]


def strip_duplicate_suffix(filename):
    """Strips unwanted prefixes/suffixes from a filename.

    Suffixes removed: OS-generated ' (2)', ' (3)', and trailing ' - Join'.
    Prefixes removed: leading emoji / special symbols (e.g. ⚫️, ❤️, ✅).

    Examples:
        'video (2).mp4'         -> 'video.mp4'
        'video (3).mp4'         -> 'video.mp4'
        'video - Join.mp4'      -> 'video.mp4'
        '⚫️“啊~爸”.mp4'         -> '“啊~爸”.mp4'
        '❤️hello.mp4'           -> 'hello.mp4'
        'video.mp4'             -> 'video.mp4'
    """
    name, ext = os.path.splitext(filename)
    cleaned = re.sub(r'\s+\(\d+\)$', '', name)
    cleaned = re.sub(r'\s+-\s+Join$', '', cleaned)
    cleaned = strip_emoji_prefix(cleaned)
    return cleaned + ext


def main():
    # 1. Check arguments
    if len(sys.argv) < 2:
        print("Usage: python strip_suffix.py <path_to_file_or_folder> [more_files...]")
        return

    # 2. Parse arguments
    args = [arg.strip(' "\'') for arg in sys.argv[1:]]

    target_files = []

    for arg in args:
        abs_arg = os.path.abspath(arg)
        if os.path.isfile(abs_arg):
            target_files.append(abs_arg)
        elif os.path.isdir(abs_arg):
            # Find all files in the directory
            for f in os.listdir(abs_arg):
                full_path = os.path.join(abs_arg, f)
                if os.path.isfile(full_path):
                    target_files.append(full_path)
        else:
            print(f"Warning: '{arg}' does not exist.")

    if not target_files:
        print("Error: No valid files found to process.")
        return

    rename_count = 0
    skip_count = 0

    print("-" * 50)

    for filepath in target_files:
        directory = os.path.dirname(filepath)
        filename = os.path.basename(filepath)
        clean_name = strip_duplicate_suffix(filename)

        if clean_name != filename:
            clean_path = os.path.join(directory, clean_name)

            # Check if the target name already exists
            if os.path.exists(clean_path):
                print(f"  SKIP: '{filename}' -> '{clean_name}' (target already exists)")
                skip_count += 1
                continue

            os.rename(filepath, clean_path)
            print(f"  RENAMED: '{filename}' -> '{clean_name}'")
            rename_count += 1

    print("-" * 50)
    if rename_count == 0:
        print("No files with duplicate suffixes found.")
    else:
        print(f"Renamed {rename_count} file(s). Skipped {skip_count} file(s).")


if __name__ == "__main__":
    main()