videocut/strip_suffix.py

129 lines
4.0 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import re
import sys
import glob
import unicodedata
def _is_strippable_prefix_char(ch):
"""Return True if ch is an emoji/symbol/whitespace character that should
be stripped when it appears at the start of a filename.
"""
if ch.isspace():
return True
cp = ord(ch)
# Variation selectors (e.g. U+FE0F after ❤) and zero-width joiner
if 0xFE00 <= cp <= 0xFE0F or cp == 0x200D:
return True
# Common emoji / pictographic / symbol blocks
emoji_ranges = (
(0x2300, 0x23FF), # Misc Technical (⏰ etc.)
(0x2460, 0x24FF), # Enclosed Alphanumerics
(0x2500, 0x257F), # Box Drawing
(0x2580, 0x259F), # Block Elements
(0x25A0, 0x25FF), # Geometric Shapes (⚫ is 0x26AB, but ▶ here)
(0x2600, 0x26FF), # Misc Symbols (⚫ ❤ ☀ ...)
(0x2700, 0x27BF), # Dingbats (✅ ✨ ...)
(0x2B00, 0x2BFF), # Misc Symbols and Arrows
(0x1F000, 0x1FFFF), # Supplementary symbols & emoji planes
)
for lo, hi in emoji_ranges:
if lo <= cp <= hi:
return True
# Unicode general categories: Symbol (So/Sk/Sm) and format chars
cat = unicodedata.category(ch)
if cat in ('So', 'Sk', 'Sm', 'Cf'):
return True
return False
def strip_emoji_prefix(name):
"""Strip leading emoji/symbol/whitespace characters from name."""
i = 0
while i < len(name) and _is_strippable_prefix_char(name[i]):
i += 1
return name[i:]
def strip_duplicate_suffix(filename):
"""Strips unwanted prefixes/suffixes from a filename.
Suffixes removed: OS-generated ' (2)', ' (3)', and trailing ' - Join'.
Prefixes removed: leading emoji / special symbols (e.g. ⚫️, ❤️, ✅).
Examples:
'video (2).mp4' -> 'video.mp4'
'video (3).mp4' -> 'video.mp4'
'video - Join.mp4' -> 'video.mp4'
'⚫️“啊~爸”.mp4' -> '“啊~爸”.mp4'
'hello.mp4' -> 'hello.mp4'
'video.mp4' -> 'video.mp4'
"""
name, ext = os.path.splitext(filename)
cleaned = re.sub(r'\s+\(\d+\)$', '', name)
cleaned = re.sub(r'\s+-\s+Join$', '', cleaned)
cleaned = strip_emoji_prefix(cleaned)
return cleaned + ext
def main():
# 1. Check arguments
if len(sys.argv) < 2:
print("Usage: python strip_suffix.py <path_to_file_or_folder> [more_files...]")
return
# 2. Parse arguments
args = [arg.strip(' "\'') for arg in sys.argv[1:]]
target_files = []
for arg in args:
abs_arg = os.path.abspath(arg)
if os.path.isfile(abs_arg):
target_files.append(abs_arg)
elif os.path.isdir(abs_arg):
# Find all files in the directory
for f in os.listdir(abs_arg):
full_path = os.path.join(abs_arg, f)
if os.path.isfile(full_path):
target_files.append(full_path)
else:
print(f"Warning: '{arg}' does not exist.")
if not target_files:
print("Error: No valid files found to process.")
return
rename_count = 0
skip_count = 0
print("-" * 50)
for filepath in target_files:
directory = os.path.dirname(filepath)
filename = os.path.basename(filepath)
clean_name = strip_duplicate_suffix(filename)
if clean_name != filename:
clean_path = os.path.join(directory, clean_name)
# Check if the target name already exists
if os.path.exists(clean_path):
print(f" SKIP: '{filename}' -> '{clean_name}' (target already exists)")
skip_count += 1
continue
os.rename(filepath, clean_path)
print(f" RENAMED: '{filename}' -> '{clean_name}'")
rename_count += 1
print("-" * 50)
if rename_count == 0:
print("No files with duplicate suffixes found.")
else:
print(f"Renamed {rename_count} file(s). Skipped {skip_count} file(s).")
if __name__ == "__main__":
main()