feat:[v1.6]优化解决文件名重复的问题
This commit is contained in:
parent
9e3cb6ff64
commit
b042805d46
|
|
@ -1,5 +1,5 @@
|
|||
docker build -t 192.168.2.212:3000/tigeren/metube:1.5 .
|
||||
docker build -t 192.168.2.212:3000/tigeren/metube:1.6 .
|
||||
|
||||
docker push 192.168.2.212:3000/tigeren/metube:1.5
|
||||
docker push 192.168.2.212:3000/tigeren/metube:1.6
|
||||
|
||||
docker compose up -d --build --force-recreate
|
||||
316
app/ytdl.py
316
app/ytdl.py
|
|
@ -7,6 +7,8 @@ import asyncio
|
|||
import multiprocessing
|
||||
import logging
|
||||
import re
|
||||
import random
|
||||
import string
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import yt_dlp.networking.impersonate
|
||||
|
|
@ -71,11 +73,32 @@ class Download:
|
|||
self.proc = None
|
||||
self.loop = None
|
||||
self.notifier = None
|
||||
self.had_download = False # Track if actual download occurred
|
||||
|
||||
def _download(self):
|
||||
log.info(f"Starting download for: {self.info.title} ({self.info.url})")
|
||||
log.info(f"[TRACE] Download config: download_dir={self.download_dir}, temp_dir={self.temp_dir}")
|
||||
log.info(f"[TRACE] Output template: {self.output_template}")
|
||||
try:
|
||||
def put_status(st):
|
||||
# Log every status update to trace the flow
|
||||
status_type = st.get('status', 'unknown')
|
||||
if status_type == 'downloading':
|
||||
# Mark that we're actually downloading (not skipping)
|
||||
self.had_download = True
|
||||
if 'tmpfilename' in st:
|
||||
log.debug(f"[TRACE] Downloading - tmpfile: {st.get('tmpfilename')}")
|
||||
elif status_type == 'finished':
|
||||
log.info(f"[TRACE] put_status FINISHED - filename: {st.get('filename')}, tmpfilename: {st.get('tmpfilename')}")
|
||||
log.info(f"[TRACE] had_download flag: {self.had_download}")
|
||||
if st.get('filename'):
|
||||
exists = os.path.exists(st['filename'])
|
||||
log.info(f"[TRACE] File exists at reported location? {exists}")
|
||||
if exists:
|
||||
log.info(f"[TRACE] File size: {os.path.getsize(st['filename'])} bytes")
|
||||
elif status_type == 'error':
|
||||
log.error(f"[TRACE] put_status ERROR - msg: {st.get('msg')}")
|
||||
|
||||
self.status_queue.put({k: v for k, v in st.items() if k in (
|
||||
'tmpfilename',
|
||||
'filename',
|
||||
|
|
@ -89,12 +112,87 @@ class Download:
|
|||
)})
|
||||
|
||||
def put_status_postprocessor(d):
|
||||
log.info(f"[TRACE] ===== POSTPROCESSOR CALLED =====")
|
||||
log.info(f"[TRACE] Postprocessor: {d.get('postprocessor')}, Status: {d.get('status')}")
|
||||
|
||||
if d['postprocessor'] == 'MoveFiles' and d['status'] == 'finished':
|
||||
log.info(f"[TRACE] MoveFiles postprocessor triggered")
|
||||
log.info(f"[TRACE] had_download flag in postprocessor: {self.had_download}")
|
||||
log.info(f"[TRACE] info_dict keys: {list(d['info_dict'].keys())}")
|
||||
log.info(f"[TRACE] info_dict filepath: {d['info_dict'].get('filepath')}")
|
||||
log.info(f"[TRACE] info_dict __finaldir: {d['info_dict'].get('__finaldir')}")
|
||||
|
||||
if '__finaldir' in d['info_dict']:
|
||||
filename = os.path.join(d['info_dict']['__finaldir'], os.path.basename(d['info_dict']['filepath']))
|
||||
else:
|
||||
filename = d['info_dict']['filepath']
|
||||
self.status_queue.put({'status': 'finished', 'filename': filename})
|
||||
|
||||
log.info(f"[TRACE] Resolved filename: {filename}")
|
||||
log.info(f"[TRACE] File exists? {os.path.exists(filename)}")
|
||||
|
||||
# List files in directory
|
||||
dir_name = os.path.dirname(filename)
|
||||
if os.path.isdir(dir_name):
|
||||
all_files = os.listdir(dir_name)
|
||||
log.info(f"[TRACE] Files in {dir_name}: {all_files}")
|
||||
|
||||
# Check if file exists at expected location
|
||||
if os.path.exists(filename):
|
||||
log.info(f"[TRACE] File FOUND at expected location")
|
||||
|
||||
# If yt-dlp didn't actually download (skipped), just report the existing file
|
||||
if not self.had_download:
|
||||
log.info(f"[TRACE] No actual download occurred - yt-dlp reused existing file")
|
||||
log.info(f"[TRACE] Sending status with existing filename: {filename}")
|
||||
self.status_queue.put({'status': 'finished', 'filename': filename})
|
||||
else:
|
||||
# Actual download happened - check for conflicts
|
||||
log.info(f"[TRACE] Actual download occurred - checking for conflicts")
|
||||
base_name = os.path.basename(filename)
|
||||
name, ext = os.path.splitext(base_name)
|
||||
|
||||
# Look for other files with same base name (excluding current file)
|
||||
other_files = []
|
||||
if os.path.isdir(dir_name):
|
||||
for existing_file in os.listdir(dir_name):
|
||||
if existing_file == base_name:
|
||||
log.debug(f"[TRACE] Skipping current file: {existing_file}")
|
||||
continue # Skip the current file
|
||||
existing_name, existing_ext = os.path.splitext(existing_file)
|
||||
# Check for exact name match
|
||||
if existing_ext == ext and existing_name == name:
|
||||
log.info(f"[TRACE] Found matching file: {existing_file}")
|
||||
other_files.append(existing_file)
|
||||
|
||||
log.info(f"[TRACE] Found {len(other_files)} other files with same base name: {other_files}")
|
||||
|
||||
# If other files exist with same name, we have a duplicate - rename the NEW file
|
||||
if len(other_files) > 0:
|
||||
log.info(f"[TRACE] CONFLICT DETECTED! Other files: {other_files}")
|
||||
unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
|
||||
new_filename = f"{name}_{unique_id}{ext}"
|
||||
new_filepath = os.path.join(dir_name, new_filename)
|
||||
|
||||
log.info(f"[TRACE] Attempting rename: {filename} -> {new_filepath}")
|
||||
try:
|
||||
os.rename(filename, new_filepath)
|
||||
log.warning(f"Filename conflict detected. Renamed: {base_name} → {new_filename}")
|
||||
log.info(f"[TRACE] Rename successful")
|
||||
filename = new_filepath
|
||||
except Exception as e:
|
||||
log.error(f"[TRACE] Rename FAILED: {e}")
|
||||
log.error(f"Failed to rename file due to conflict: {e}")
|
||||
else:
|
||||
log.info(f"[TRACE] No conflict - this is the only file with this name")
|
||||
|
||||
log.info(f"[TRACE] Sending status with filename: {filename}")
|
||||
self.status_queue.put({'status': 'finished', 'filename': filename})
|
||||
else:
|
||||
log.info(f"[TRACE] File NOT FOUND at expected location")
|
||||
base_name = os.path.basename(filename)
|
||||
self.status_queue.put({'status': 'error', 'msg': f'File not found: {base_name}'})
|
||||
else:
|
||||
log.debug(f"[TRACE] Other postprocessor: {d.get('postprocessor')}")
|
||||
|
||||
ret = yt_dlp.YoutubeDL(params={
|
||||
'quiet': True,
|
||||
|
|
@ -128,6 +226,22 @@ class Download:
|
|||
asyncio.create_task(self.update_status())
|
||||
return await self.loop.run_in_executor(None, self.proc.join)
|
||||
|
||||
def _resolve_filename_conflict(self, filepath):
|
||||
"""
|
||||
Resolve filename conflicts by appending a short unique ID.
|
||||
Returns the final non-conflicting filepath.
|
||||
"""
|
||||
dir_name = os.path.dirname(filepath)
|
||||
base_name = os.path.basename(filepath)
|
||||
name, ext = os.path.splitext(base_name)
|
||||
|
||||
# Generate a short unique ID (5 alphanumeric characters)
|
||||
unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
|
||||
new_filename = f"{name}_{unique_id}{ext}"
|
||||
new_filepath = os.path.join(dir_name, new_filename)
|
||||
|
||||
return new_filepath
|
||||
|
||||
def cancel(self):
|
||||
log.info(f"Cancelling download: {self.info.title}")
|
||||
if self.running():
|
||||
|
|
@ -250,6 +364,11 @@ class DownloadQueue:
|
|||
elif self.config.DOWNLOAD_MODE == 'limited':
|
||||
self.semaphore = asyncio.Semaphore(int(self.config.MAX_CONCURRENT_DOWNLOADS))
|
||||
|
||||
# PreCheck queue for sequential conflict detection (no locks needed)
|
||||
self.precheck_queue = asyncio.Queue()
|
||||
self.reserved_filenames = set() # Track filenames being processed
|
||||
self.precheck_in_progress = {} # Track URL -> DownloadInfo for items in precheck queue
|
||||
|
||||
self.done.load()
|
||||
|
||||
async def __import_queue(self):
|
||||
|
|
@ -262,8 +381,153 @@ class DownloadQueue:
|
|||
|
||||
async def initialize(self):
|
||||
log.info("Initializing DownloadQueue")
|
||||
# Start the precheck worker for sequential conflict detection
|
||||
asyncio.create_task(self.__precheck_worker())
|
||||
asyncio.create_task(self.__import_queue())
|
||||
asyncio.create_task(self.__import_pending())
|
||||
|
||||
async def __precheck_worker(self):
|
||||
"""Background worker that processes precheck queue sequentially.
|
||||
Sequential processing naturally prevents race conditions without locks."""
|
||||
log.info("[PreCheck] Worker started")
|
||||
while True:
|
||||
try:
|
||||
# Get next item from queue (blocks if empty)
|
||||
item = await self.precheck_queue.get()
|
||||
log.debug(f"[PreCheck] Processing item: {item['dl'].url}")
|
||||
|
||||
# Process the precheck and start download
|
||||
await self.__process_precheck(item)
|
||||
|
||||
# Mark task as done
|
||||
self.precheck_queue.task_done()
|
||||
except Exception as e:
|
||||
log.error(f"[PreCheck] Worker error: {e}", exc_info=True)
|
||||
|
||||
async def __process_precheck(self, item):
|
||||
"""Process a single download with conflict detection.
|
||||
Called sequentially by worker - no race conditions possible."""
|
||||
dl = item['dl']
|
||||
auto_start = item['auto_start']
|
||||
dldirectory = item['dldirectory']
|
||||
output = item['output']
|
||||
output_chapter = item['output_chapter']
|
||||
ytdl_options = item['ytdl_options']
|
||||
entry = item['entry']
|
||||
|
||||
log.info(f"[PreCheck] Checking for filename conflicts before download")
|
||||
log.debug(f"[PreCheck] Original output template: {output}")
|
||||
|
||||
# Try to predict the filename that yt-dlp will generate
|
||||
if entry and 'title' in entry:
|
||||
# Check if we have the real title or just a placeholder
|
||||
title = entry.get('title', '')
|
||||
video_id = entry.get('id', '')
|
||||
|
||||
# If title looks like a placeholder (contains the ID), we need full extraction
|
||||
needs_full_extraction = (
|
||||
not title or # No title
|
||||
title == f"twitter video #{video_id}" or # Placeholder pattern
|
||||
video_id in title # ID is in title (likely placeholder)
|
||||
)
|
||||
|
||||
if needs_full_extraction:
|
||||
log.debug(f"[PreCheck] Title appears to be placeholder: '{title}', doing full info extraction")
|
||||
try:
|
||||
# Do a full (non-flat) extraction to get real title
|
||||
full_entry = await asyncio.get_running_loop().run_in_executor(
|
||||
None,
|
||||
lambda: yt_dlp.YoutubeDL(params={
|
||||
'quiet': True,
|
||||
'no_color': True,
|
||||
'extract_flat': False, # Full extraction
|
||||
'skip_download': True, # Don't download, just get info
|
||||
'paths': {"home": dldirectory, "temp": self.config.TEMP_DIR},
|
||||
**ytdl_options,
|
||||
}).extract_info(dl.url, download=False)
|
||||
)
|
||||
if full_entry and 'title' in full_entry:
|
||||
title = full_entry['title']
|
||||
log.debug(f"[PreCheck] Got real title from full extraction: '{title}'")
|
||||
except Exception as e:
|
||||
log.warning(f"[PreCheck] Failed to get full info: {e}, using placeholder title")
|
||||
|
||||
predicted_filename = output
|
||||
# Replace title
|
||||
if '%(title)s' in predicted_filename:
|
||||
predicted_filename = predicted_filename.replace('%(title)s', title)
|
||||
|
||||
# Replace id
|
||||
if '%(id)s' in predicted_filename and video_id:
|
||||
predicted_filename = predicted_filename.replace('%(id)s', video_id)
|
||||
|
||||
# Handle ext specially - default to format's extension if not in entry
|
||||
if '%(ext)s' in predicted_filename:
|
||||
ext = entry.get('ext', dl.format if dl.format in ['mp4', 'mkv', 'webm', 'mp3', 'm4a'] else 'mp4')
|
||||
predicted_filename = predicted_filename.replace('%(ext)s', ext)
|
||||
|
||||
predicted_filepath = os.path.join(dldirectory, predicted_filename)
|
||||
log.info(f"[PreCheck] Predicted filepath: {predicted_filepath}")
|
||||
|
||||
# Check if file already exists OR is reserved by another download in queue
|
||||
# Sequential processing means we check one at a time - no race condition
|
||||
if os.path.exists(predicted_filepath) or predicted_filepath in self.reserved_filenames:
|
||||
if predicted_filepath in self.reserved_filenames:
|
||||
log.warning(f"[PreCheck] Filename is reserved by pending download! Will append unique ID")
|
||||
else:
|
||||
log.warning(f"[PreCheck] File already exists! Will append unique ID to avoid conflict")
|
||||
|
||||
# Generate unique ID
|
||||
unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
|
||||
|
||||
# Modify output template to include unique ID before extension
|
||||
# Change "%(title)s.%(ext)s" to "%(title)s_XXXXX.%(ext)s"
|
||||
if '.%(ext)s' in output:
|
||||
output = output.replace('.%(ext)s', f'_{unique_id}.%(ext)s')
|
||||
else:
|
||||
# Fallback: append to end
|
||||
output = f"{output}_{unique_id}"
|
||||
|
||||
# Re-predict the new filename
|
||||
predicted_filename = output
|
||||
if '%(title)s' in predicted_filename:
|
||||
predicted_filename = predicted_filename.replace('%(title)s', title)
|
||||
if '%(id)s' in predicted_filename and video_id:
|
||||
predicted_filename = predicted_filename.replace('%(id)s', video_id)
|
||||
if '%(ext)s' in predicted_filename:
|
||||
ext = entry.get('ext', dl.format if dl.format in ['mp4', 'mkv', 'webm', 'mp3', 'm4a'] else 'mp4')
|
||||
predicted_filename = predicted_filename.replace('%(ext)s', ext)
|
||||
predicted_filepath = os.path.join(dldirectory, predicted_filename)
|
||||
|
||||
log.info(f"[PreCheck] Modified output template: {output}")
|
||||
log.info(f"[PreCheck] New predicted filepath: {predicted_filepath}")
|
||||
else:
|
||||
log.info(f"[PreCheck] No conflict detected, using original template")
|
||||
|
||||
# Reserve this filename to prevent concurrent downloads from using it
|
||||
self.reserved_filenames.add(predicted_filepath)
|
||||
log.debug(f"[PreCheck] Reserved filename: {predicted_filepath}")
|
||||
else:
|
||||
predicted_filepath = None
|
||||
log.debug(f"[PreCheck] No entry data available, skipping pre-check")
|
||||
|
||||
log.debug(f"final resolved output template: {output}")
|
||||
download = Download(dldirectory, self.config.TEMP_DIR, output, output_chapter, dl.quality, dl.format, ytdl_options, dl)
|
||||
|
||||
# Store the reserved filepath for cleanup
|
||||
download.reserved_filepath = predicted_filepath
|
||||
|
||||
# Remove from in-progress set before adding to queue
|
||||
# This allows checking queue.exists() to work properly
|
||||
if dl.url in self.precheck_in_progress:
|
||||
del self.precheck_in_progress[dl.url]
|
||||
log.debug(f"[PreCheck] Removed from in-progress tracking: {dl.url}")
|
||||
|
||||
if auto_start is True:
|
||||
self.queue.put(download)
|
||||
asyncio.create_task(self.__start_download(download))
|
||||
else:
|
||||
self.pending.put(download)
|
||||
|
||||
async def __start_download(self, download):
|
||||
if download.canceled:
|
||||
|
|
@ -296,6 +560,12 @@ class DownloadQueue:
|
|||
self._post_download_cleanup(download)
|
||||
|
||||
def _post_download_cleanup(self, download):
|
||||
# Release filename reservation if it exists
|
||||
if hasattr(download, 'reserved_filepath') and download.reserved_filepath:
|
||||
if download.reserved_filepath in self.reserved_filenames:
|
||||
self.reserved_filenames.discard(download.reserved_filepath)
|
||||
log.debug(f"[PreCheck] Released reservation for: {download.reserved_filepath}")
|
||||
|
||||
if download.info.status != 'finished':
|
||||
if download.tmpfilename and os.path.isfile(download.tmpfilename):
|
||||
try:
|
||||
|
|
@ -342,18 +612,32 @@ class DownloadQueue:
|
|||
return dldirectory, None
|
||||
|
||||
async def __add_download(self, dl, auto_start):
|
||||
"""Fast path: validate and queue for precheck processing.
|
||||
Returns immediately without blocking on slow operations."""
|
||||
# Check if this exact URL is already being processed, in queue, or already downloaded
|
||||
# This prevents duplicate downloads when same URL is submitted multiple times
|
||||
if (dl.url in self.precheck_in_progress or
|
||||
self.queue.exists(dl.url) or
|
||||
self.pending.exists(dl.url) or
|
||||
self.done.exists(dl.url)):
|
||||
log.info(f"[PreCheck] URL already queued/processing/downloaded, skipping: {dl.url}")
|
||||
return {'status': 'ok', 'msg': 'Download already exists'}
|
||||
|
||||
dldirectory, error_message = self.__calc_download_path(dl.quality, dl.format, dl.folder)
|
||||
if error_message is not None:
|
||||
return error_message
|
||||
|
||||
output = self.config.OUTPUT_TEMPLATE if len(dl.custom_name_prefix) == 0 else f'{dl.custom_name_prefix}.{self.config.OUTPUT_TEMPLATE}'
|
||||
output_chapter = self.config.OUTPUT_TEMPLATE_CHAPTER
|
||||
entry = getattr(dl, 'entry', None)
|
||||
|
||||
if entry is not None and 'playlist' in entry and entry['playlist'] is not None:
|
||||
if len(self.config.OUTPUT_TEMPLATE_PLAYLIST):
|
||||
output = self.config.OUTPUT_TEMPLATE_PLAYLIST
|
||||
for property, value in entry.items():
|
||||
if property.startswith("playlist"):
|
||||
output = output.replace(f"%({property})s", str(value))
|
||||
|
||||
ytdl_options = dict(self.config.YTDL_OPTIONS)
|
||||
playlist_item_limit = getattr(dl, 'playlist_item_limit', 0)
|
||||
if playlist_item_limit > 0:
|
||||
|
|
@ -404,12 +688,23 @@ class DownloadQueue:
|
|||
else:
|
||||
log.debug(f"[Cookie] Cookies directory does not exist")
|
||||
|
||||
download = Download(dldirectory, self.config.TEMP_DIR, output, output_chapter, dl.quality, dl.format, ytdl_options, dl)
|
||||
if auto_start is True:
|
||||
self.queue.put(download)
|
||||
asyncio.create_task(self.__start_download(download))
|
||||
else:
|
||||
self.pending.put(download)
|
||||
# Mark URL as being processed to prevent duplicates
|
||||
# Store the DownloadInfo so we can display it in UI
|
||||
self.precheck_in_progress[dl.url] = dl
|
||||
|
||||
# Queue for sequential precheck processing (fast, non-blocking)
|
||||
await self.precheck_queue.put({
|
||||
'dl': dl,
|
||||
'auto_start': auto_start,
|
||||
'dldirectory': dldirectory,
|
||||
'output': output,
|
||||
'output_chapter': output_chapter,
|
||||
'ytdl_options': ytdl_options,
|
||||
'entry': entry,
|
||||
})
|
||||
log.debug(f"[PreCheck] Queued for processing: {dl.url}")
|
||||
|
||||
# Notify immediately (fast response to user)
|
||||
await self.notifier.added(dl)
|
||||
|
||||
async def __add_entry(self, entry, quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start, already):
|
||||
|
|
@ -575,6 +870,11 @@ class DownloadQueue:
|
|||
else:
|
||||
v.info.file_exists = False
|
||||
|
||||
return (list((k, v.info) for k, v in self.queue.items()) +
|
||||
# Create list from items in precheck queue
|
||||
# These items have 'preparing' status to indicate they're being analyzed
|
||||
precheck_list = [(dl.url, dl) for dl in self.precheck_in_progress.values()]
|
||||
|
||||
return (precheck_list +
|
||||
list((k, v.info) for k, v in self.queue.items()) +
|
||||
list((k, v.info) for k, v in self.pending.items()),
|
||||
list((k, v.info) for k, v in self.done.items()))
|
||||
|
|
|
|||
Loading…
Reference in New Issue