diff --git a/DEPLOY.md b/DEPLOY.md index f005682..d1953b6 100644 --- a/DEPLOY.md +++ b/DEPLOY.md @@ -1,5 +1,5 @@ -docker build -t 192.168.2.212:3000/tigeren/metube:1.5 . +docker build -t 192.168.2.212:3000/tigeren/metube:1.6 . -docker push 192.168.2.212:3000/tigeren/metube:1.5 +docker push 192.168.2.212:3000/tigeren/metube:1.6 docker compose up -d --build --force-recreate \ No newline at end of file diff --git a/app/ytdl.py b/app/ytdl.py index 5bd5ee7..ae2b286 100644 --- a/app/ytdl.py +++ b/app/ytdl.py @@ -7,6 +7,8 @@ import asyncio import multiprocessing import logging import re +import random +import string from urllib.parse import urlparse import yt_dlp.networking.impersonate @@ -71,11 +73,32 @@ class Download: self.proc = None self.loop = None self.notifier = None + self.had_download = False # Track if actual download occurred def _download(self): log.info(f"Starting download for: {self.info.title} ({self.info.url})") + log.info(f"[TRACE] Download config: download_dir={self.download_dir}, temp_dir={self.temp_dir}") + log.info(f"[TRACE] Output template: {self.output_template}") try: def put_status(st): + # Log every status update to trace the flow + status_type = st.get('status', 'unknown') + if status_type == 'downloading': + # Mark that we're actually downloading (not skipping) + self.had_download = True + if 'tmpfilename' in st: + log.debug(f"[TRACE] Downloading - tmpfile: {st.get('tmpfilename')}") + elif status_type == 'finished': + log.info(f"[TRACE] put_status FINISHED - filename: {st.get('filename')}, tmpfilename: {st.get('tmpfilename')}") + log.info(f"[TRACE] had_download flag: {self.had_download}") + if st.get('filename'): + exists = os.path.exists(st['filename']) + log.info(f"[TRACE] File exists at reported location? {exists}") + if exists: + log.info(f"[TRACE] File size: {os.path.getsize(st['filename'])} bytes") + elif status_type == 'error': + log.error(f"[TRACE] put_status ERROR - msg: {st.get('msg')}") + self.status_queue.put({k: v for k, v in st.items() if k in ( 'tmpfilename', 'filename', @@ -89,12 +112,87 @@ class Download: )}) def put_status_postprocessor(d): + log.info(f"[TRACE] ===== POSTPROCESSOR CALLED =====") + log.info(f"[TRACE] Postprocessor: {d.get('postprocessor')}, Status: {d.get('status')}") + if d['postprocessor'] == 'MoveFiles' and d['status'] == 'finished': + log.info(f"[TRACE] MoveFiles postprocessor triggered") + log.info(f"[TRACE] had_download flag in postprocessor: {self.had_download}") + log.info(f"[TRACE] info_dict keys: {list(d['info_dict'].keys())}") + log.info(f"[TRACE] info_dict filepath: {d['info_dict'].get('filepath')}") + log.info(f"[TRACE] info_dict __finaldir: {d['info_dict'].get('__finaldir')}") + if '__finaldir' in d['info_dict']: filename = os.path.join(d['info_dict']['__finaldir'], os.path.basename(d['info_dict']['filepath'])) else: filename = d['info_dict']['filepath'] - self.status_queue.put({'status': 'finished', 'filename': filename}) + + log.info(f"[TRACE] Resolved filename: {filename}") + log.info(f"[TRACE] File exists? {os.path.exists(filename)}") + + # List files in directory + dir_name = os.path.dirname(filename) + if os.path.isdir(dir_name): + all_files = os.listdir(dir_name) + log.info(f"[TRACE] Files in {dir_name}: {all_files}") + + # Check if file exists at expected location + if os.path.exists(filename): + log.info(f"[TRACE] File FOUND at expected location") + + # If yt-dlp didn't actually download (skipped), just report the existing file + if not self.had_download: + log.info(f"[TRACE] No actual download occurred - yt-dlp reused existing file") + log.info(f"[TRACE] Sending status with existing filename: {filename}") + self.status_queue.put({'status': 'finished', 'filename': filename}) + else: + # Actual download happened - check for conflicts + log.info(f"[TRACE] Actual download occurred - checking for conflicts") + base_name = os.path.basename(filename) + name, ext = os.path.splitext(base_name) + + # Look for other files with same base name (excluding current file) + other_files = [] + if os.path.isdir(dir_name): + for existing_file in os.listdir(dir_name): + if existing_file == base_name: + log.debug(f"[TRACE] Skipping current file: {existing_file}") + continue # Skip the current file + existing_name, existing_ext = os.path.splitext(existing_file) + # Check for exact name match + if existing_ext == ext and existing_name == name: + log.info(f"[TRACE] Found matching file: {existing_file}") + other_files.append(existing_file) + + log.info(f"[TRACE] Found {len(other_files)} other files with same base name: {other_files}") + + # If other files exist with same name, we have a duplicate - rename the NEW file + if len(other_files) > 0: + log.info(f"[TRACE] CONFLICT DETECTED! Other files: {other_files}") + unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5)) + new_filename = f"{name}_{unique_id}{ext}" + new_filepath = os.path.join(dir_name, new_filename) + + log.info(f"[TRACE] Attempting rename: {filename} -> {new_filepath}") + try: + os.rename(filename, new_filepath) + log.warning(f"Filename conflict detected. Renamed: {base_name} → {new_filename}") + log.info(f"[TRACE] Rename successful") + filename = new_filepath + except Exception as e: + log.error(f"[TRACE] Rename FAILED: {e}") + log.error(f"Failed to rename file due to conflict: {e}") + else: + log.info(f"[TRACE] No conflict - this is the only file with this name") + + log.info(f"[TRACE] Sending status with filename: {filename}") + self.status_queue.put({'status': 'finished', 'filename': filename}) + else: + log.info(f"[TRACE] File NOT FOUND at expected location") + base_name = os.path.basename(filename) + self.status_queue.put({'status': 'error', 'msg': f'File not found: {base_name}'}) + else: + log.debug(f"[TRACE] Other postprocessor: {d.get('postprocessor')}") ret = yt_dlp.YoutubeDL(params={ 'quiet': True, @@ -128,6 +226,22 @@ class Download: asyncio.create_task(self.update_status()) return await self.loop.run_in_executor(None, self.proc.join) + def _resolve_filename_conflict(self, filepath): + """ + Resolve filename conflicts by appending a short unique ID. + Returns the final non-conflicting filepath. + """ + dir_name = os.path.dirname(filepath) + base_name = os.path.basename(filepath) + name, ext = os.path.splitext(base_name) + + # Generate a short unique ID (5 alphanumeric characters) + unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5)) + new_filename = f"{name}_{unique_id}{ext}" + new_filepath = os.path.join(dir_name, new_filename) + + return new_filepath + def cancel(self): log.info(f"Cancelling download: {self.info.title}") if self.running(): @@ -250,6 +364,11 @@ class DownloadQueue: elif self.config.DOWNLOAD_MODE == 'limited': self.semaphore = asyncio.Semaphore(int(self.config.MAX_CONCURRENT_DOWNLOADS)) + # PreCheck queue for sequential conflict detection (no locks needed) + self.precheck_queue = asyncio.Queue() + self.reserved_filenames = set() # Track filenames being processed + self.precheck_in_progress = {} # Track URL -> DownloadInfo for items in precheck queue + self.done.load() async def __import_queue(self): @@ -262,8 +381,153 @@ class DownloadQueue: async def initialize(self): log.info("Initializing DownloadQueue") + # Start the precheck worker for sequential conflict detection + asyncio.create_task(self.__precheck_worker()) asyncio.create_task(self.__import_queue()) asyncio.create_task(self.__import_pending()) + + async def __precheck_worker(self): + """Background worker that processes precheck queue sequentially. + Sequential processing naturally prevents race conditions without locks.""" + log.info("[PreCheck] Worker started") + while True: + try: + # Get next item from queue (blocks if empty) + item = await self.precheck_queue.get() + log.debug(f"[PreCheck] Processing item: {item['dl'].url}") + + # Process the precheck and start download + await self.__process_precheck(item) + + # Mark task as done + self.precheck_queue.task_done() + except Exception as e: + log.error(f"[PreCheck] Worker error: {e}", exc_info=True) + + async def __process_precheck(self, item): + """Process a single download with conflict detection. + Called sequentially by worker - no race conditions possible.""" + dl = item['dl'] + auto_start = item['auto_start'] + dldirectory = item['dldirectory'] + output = item['output'] + output_chapter = item['output_chapter'] + ytdl_options = item['ytdl_options'] + entry = item['entry'] + + log.info(f"[PreCheck] Checking for filename conflicts before download") + log.debug(f"[PreCheck] Original output template: {output}") + + # Try to predict the filename that yt-dlp will generate + if entry and 'title' in entry: + # Check if we have the real title or just a placeholder + title = entry.get('title', '') + video_id = entry.get('id', '') + + # If title looks like a placeholder (contains the ID), we need full extraction + needs_full_extraction = ( + not title or # No title + title == f"twitter video #{video_id}" or # Placeholder pattern + video_id in title # ID is in title (likely placeholder) + ) + + if needs_full_extraction: + log.debug(f"[PreCheck] Title appears to be placeholder: '{title}', doing full info extraction") + try: + # Do a full (non-flat) extraction to get real title + full_entry = await asyncio.get_running_loop().run_in_executor( + None, + lambda: yt_dlp.YoutubeDL(params={ + 'quiet': True, + 'no_color': True, + 'extract_flat': False, # Full extraction + 'skip_download': True, # Don't download, just get info + 'paths': {"home": dldirectory, "temp": self.config.TEMP_DIR}, + **ytdl_options, + }).extract_info(dl.url, download=False) + ) + if full_entry and 'title' in full_entry: + title = full_entry['title'] + log.debug(f"[PreCheck] Got real title from full extraction: '{title}'") + except Exception as e: + log.warning(f"[PreCheck] Failed to get full info: {e}, using placeholder title") + + predicted_filename = output + # Replace title + if '%(title)s' in predicted_filename: + predicted_filename = predicted_filename.replace('%(title)s', title) + + # Replace id + if '%(id)s' in predicted_filename and video_id: + predicted_filename = predicted_filename.replace('%(id)s', video_id) + + # Handle ext specially - default to format's extension if not in entry + if '%(ext)s' in predicted_filename: + ext = entry.get('ext', dl.format if dl.format in ['mp4', 'mkv', 'webm', 'mp3', 'm4a'] else 'mp4') + predicted_filename = predicted_filename.replace('%(ext)s', ext) + + predicted_filepath = os.path.join(dldirectory, predicted_filename) + log.info(f"[PreCheck] Predicted filepath: {predicted_filepath}") + + # Check if file already exists OR is reserved by another download in queue + # Sequential processing means we check one at a time - no race condition + if os.path.exists(predicted_filepath) or predicted_filepath in self.reserved_filenames: + if predicted_filepath in self.reserved_filenames: + log.warning(f"[PreCheck] Filename is reserved by pending download! Will append unique ID") + else: + log.warning(f"[PreCheck] File already exists! Will append unique ID to avoid conflict") + + # Generate unique ID + unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5)) + + # Modify output template to include unique ID before extension + # Change "%(title)s.%(ext)s" to "%(title)s_XXXXX.%(ext)s" + if '.%(ext)s' in output: + output = output.replace('.%(ext)s', f'_{unique_id}.%(ext)s') + else: + # Fallback: append to end + output = f"{output}_{unique_id}" + + # Re-predict the new filename + predicted_filename = output + if '%(title)s' in predicted_filename: + predicted_filename = predicted_filename.replace('%(title)s', title) + if '%(id)s' in predicted_filename and video_id: + predicted_filename = predicted_filename.replace('%(id)s', video_id) + if '%(ext)s' in predicted_filename: + ext = entry.get('ext', dl.format if dl.format in ['mp4', 'mkv', 'webm', 'mp3', 'm4a'] else 'mp4') + predicted_filename = predicted_filename.replace('%(ext)s', ext) + predicted_filepath = os.path.join(dldirectory, predicted_filename) + + log.info(f"[PreCheck] Modified output template: {output}") + log.info(f"[PreCheck] New predicted filepath: {predicted_filepath}") + else: + log.info(f"[PreCheck] No conflict detected, using original template") + + # Reserve this filename to prevent concurrent downloads from using it + self.reserved_filenames.add(predicted_filepath) + log.debug(f"[PreCheck] Reserved filename: {predicted_filepath}") + else: + predicted_filepath = None + log.debug(f"[PreCheck] No entry data available, skipping pre-check") + + log.debug(f"final resolved output template: {output}") + download = Download(dldirectory, self.config.TEMP_DIR, output, output_chapter, dl.quality, dl.format, ytdl_options, dl) + + # Store the reserved filepath for cleanup + download.reserved_filepath = predicted_filepath + + # Remove from in-progress set before adding to queue + # This allows checking queue.exists() to work properly + if dl.url in self.precheck_in_progress: + del self.precheck_in_progress[dl.url] + log.debug(f"[PreCheck] Removed from in-progress tracking: {dl.url}") + + if auto_start is True: + self.queue.put(download) + asyncio.create_task(self.__start_download(download)) + else: + self.pending.put(download) async def __start_download(self, download): if download.canceled: @@ -296,6 +560,12 @@ class DownloadQueue: self._post_download_cleanup(download) def _post_download_cleanup(self, download): + # Release filename reservation if it exists + if hasattr(download, 'reserved_filepath') and download.reserved_filepath: + if download.reserved_filepath in self.reserved_filenames: + self.reserved_filenames.discard(download.reserved_filepath) + log.debug(f"[PreCheck] Released reservation for: {download.reserved_filepath}") + if download.info.status != 'finished': if download.tmpfilename and os.path.isfile(download.tmpfilename): try: @@ -342,18 +612,32 @@ class DownloadQueue: return dldirectory, None async def __add_download(self, dl, auto_start): + """Fast path: validate and queue for precheck processing. + Returns immediately without blocking on slow operations.""" + # Check if this exact URL is already being processed, in queue, or already downloaded + # This prevents duplicate downloads when same URL is submitted multiple times + if (dl.url in self.precheck_in_progress or + self.queue.exists(dl.url) or + self.pending.exists(dl.url) or + self.done.exists(dl.url)): + log.info(f"[PreCheck] URL already queued/processing/downloaded, skipping: {dl.url}") + return {'status': 'ok', 'msg': 'Download already exists'} + dldirectory, error_message = self.__calc_download_path(dl.quality, dl.format, dl.folder) if error_message is not None: return error_message + output = self.config.OUTPUT_TEMPLATE if len(dl.custom_name_prefix) == 0 else f'{dl.custom_name_prefix}.{self.config.OUTPUT_TEMPLATE}' output_chapter = self.config.OUTPUT_TEMPLATE_CHAPTER entry = getattr(dl, 'entry', None) + if entry is not None and 'playlist' in entry and entry['playlist'] is not None: if len(self.config.OUTPUT_TEMPLATE_PLAYLIST): output = self.config.OUTPUT_TEMPLATE_PLAYLIST for property, value in entry.items(): if property.startswith("playlist"): output = output.replace(f"%({property})s", str(value)) + ytdl_options = dict(self.config.YTDL_OPTIONS) playlist_item_limit = getattr(dl, 'playlist_item_limit', 0) if playlist_item_limit > 0: @@ -404,12 +688,23 @@ class DownloadQueue: else: log.debug(f"[Cookie] Cookies directory does not exist") - download = Download(dldirectory, self.config.TEMP_DIR, output, output_chapter, dl.quality, dl.format, ytdl_options, dl) - if auto_start is True: - self.queue.put(download) - asyncio.create_task(self.__start_download(download)) - else: - self.pending.put(download) + # Mark URL as being processed to prevent duplicates + # Store the DownloadInfo so we can display it in UI + self.precheck_in_progress[dl.url] = dl + + # Queue for sequential precheck processing (fast, non-blocking) + await self.precheck_queue.put({ + 'dl': dl, + 'auto_start': auto_start, + 'dldirectory': dldirectory, + 'output': output, + 'output_chapter': output_chapter, + 'ytdl_options': ytdl_options, + 'entry': entry, + }) + log.debug(f"[PreCheck] Queued for processing: {dl.url}") + + # Notify immediately (fast response to user) await self.notifier.added(dl) async def __add_entry(self, entry, quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start, already): @@ -575,6 +870,11 @@ class DownloadQueue: else: v.info.file_exists = False - return (list((k, v.info) for k, v in self.queue.items()) + + # Create list from items in precheck queue + # These items have 'preparing' status to indicate they're being analyzed + precheck_list = [(dl.url, dl) for dl in self.precheck_in_progress.values()] + + return (precheck_list + + list((k, v.info) for k, v in self.queue.items()) + list((k, v.info) for k, v in self.pending.items()), list((k, v.info) for k, v in self.done.items()))