import os import yt_dlp from collections import OrderedDict import shelve import time import asyncio import multiprocessing import logging import re import random import string from urllib.parse import urlparse import yt_dlp.networking.impersonate from dl_formats import get_format, get_opts, AUDIO_FORMATS from datetime import datetime log = logging.getLogger('ytdl') class DownloadQueueNotifier: async def added(self, dl): raise NotImplementedError async def updated(self, dl): raise NotImplementedError async def completed(self, dl): raise NotImplementedError async def canceled(self, id): raise NotImplementedError async def cleared(self, id): raise NotImplementedError async def event(self, event): raise NotImplementedError class DownloadInfo: def __init__(self, id, title, url, quality, format, folder, custom_name_prefix, error, entry, playlist_item_limit): self.id = id if len(custom_name_prefix) == 0 else f'{custom_name_prefix}.{id}' self.title = title if len(custom_name_prefix) == 0 else f'{custom_name_prefix}.{title}' self.url = url self.quality = quality self.format = format self.folder = folder self.custom_name_prefix = custom_name_prefix self.msg = self.percent = self.speed = self.eta = None self.status = "pending" self.size = None self.timestamp = time.time_ns() self.error = error self.entry = entry self.playlist_item_limit = playlist_item_limit # Extract website domain from URL parsed_url = urlparse(url) self.website = parsed_url.netloc self.file_exists = None class Download: manager = None def __init__(self, download_dir, temp_dir, output_template, output_template_chapter, quality, format, ytdl_opts, info): self.download_dir = download_dir self.temp_dir = temp_dir self.output_template = output_template self.output_template_chapter = output_template_chapter self.format = get_format(format, quality) self.ytdl_opts = get_opts(format, quality, ytdl_opts) if "impersonate" in self.ytdl_opts: self.ytdl_opts["impersonate"] = yt_dlp.networking.impersonate.ImpersonateTarget.from_str(self.ytdl_opts["impersonate"]) self.info = info self.canceled = False self.tmpfilename = None self.status_queue = None self.proc = None self.loop = None self.notifier = None self.had_download = False # Track if actual download occurred def _download(self): log.info(f"Starting download for: {self.info.title} ({self.info.url})") log.info(f"[TRACE] Download config: download_dir={self.download_dir}, temp_dir={self.temp_dir}") log.info(f"[TRACE] Output template: {self.output_template}") try: def put_status(st): # Log every status update to trace the flow status_type = st.get('status', 'unknown') if status_type == 'downloading': # Mark that we're actually downloading (not skipping) self.had_download = True if 'tmpfilename' in st: log.debug(f"[TRACE] Downloading - tmpfile: {st.get('tmpfilename')}") elif status_type == 'finished': log.info(f"[TRACE] put_status FINISHED - filename: {st.get('filename')}, tmpfilename: {st.get('tmpfilename')}") log.info(f"[TRACE] had_download flag: {self.had_download}") if st.get('filename'): exists = os.path.exists(st['filename']) log.info(f"[TRACE] File exists at reported location? {exists}") if exists: log.info(f"[TRACE] File size: {os.path.getsize(st['filename'])} bytes") elif status_type == 'error': log.error(f"[TRACE] put_status ERROR - msg: {st.get('msg')}") self.status_queue.put({k: v for k, v in st.items() if k in ( 'tmpfilename', 'filename', 'status', 'msg', 'total_bytes', 'total_bytes_estimate', 'downloaded_bytes', 'speed', 'eta', )}) def put_status_postprocessor(d): log.info(f"[TRACE] ===== POSTPROCESSOR CALLED =====") log.info(f"[TRACE] Postprocessor: {d.get('postprocessor')}, Status: {d.get('status')}") if d['postprocessor'] == 'MoveFiles' and d['status'] == 'finished': log.info(f"[TRACE] MoveFiles postprocessor triggered") log.info(f"[TRACE] had_download flag in postprocessor: {self.had_download}") log.info(f"[TRACE] info_dict keys: {list(d['info_dict'].keys())}") log.info(f"[TRACE] info_dict filepath: {d['info_dict'].get('filepath')}") log.info(f"[TRACE] info_dict __finaldir: {d['info_dict'].get('__finaldir')}") if '__finaldir' in d['info_dict']: filename = os.path.join(d['info_dict']['__finaldir'], os.path.basename(d['info_dict']['filepath'])) else: filename = d['info_dict']['filepath'] log.info(f"[TRACE] Resolved filename: {filename}") log.info(f"[TRACE] File exists? {os.path.exists(filename)}") # List files in directory dir_name = os.path.dirname(filename) if os.path.isdir(dir_name): all_files = os.listdir(dir_name) log.info(f"[TRACE] Files in {dir_name}: {all_files}") # Check if file exists at expected location if os.path.exists(filename): log.info(f"[TRACE] File FOUND at expected location") # If yt-dlp didn't actually download (skipped), just report the existing file if not self.had_download: log.info(f"[TRACE] No actual download occurred - yt-dlp reused existing file") log.info(f"[TRACE] Sending status with existing filename: {filename}") self.status_queue.put({'status': 'finished', 'filename': filename}) else: # Actual download happened - check for conflicts log.info(f"[TRACE] Actual download occurred - checking for conflicts") base_name = os.path.basename(filename) name, ext = os.path.splitext(base_name) # Look for other files with same base name (excluding current file) other_files = [] if os.path.isdir(dir_name): for existing_file in os.listdir(dir_name): if existing_file == base_name: log.debug(f"[TRACE] Skipping current file: {existing_file}") continue # Skip the current file existing_name, existing_ext = os.path.splitext(existing_file) # Check for exact name match if existing_ext == ext and existing_name == name: log.info(f"[TRACE] Found matching file: {existing_file}") other_files.append(existing_file) log.info(f"[TRACE] Found {len(other_files)} other files with same base name: {other_files}") # If other files exist with same name, we have a duplicate - rename the NEW file if len(other_files) > 0: log.info(f"[TRACE] CONFLICT DETECTED! Other files: {other_files}") unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5)) new_filename = f"{name}_{unique_id}{ext}" new_filepath = os.path.join(dir_name, new_filename) log.info(f"[TRACE] Attempting rename: {filename} -> {new_filepath}") try: os.rename(filename, new_filepath) log.warning(f"Filename conflict detected. Renamed: {base_name} → {new_filename}") log.info(f"[TRACE] Rename successful") filename = new_filepath except Exception as e: log.error(f"[TRACE] Rename FAILED: {e}") log.error(f"Failed to rename file due to conflict: {e}") else: log.info(f"[TRACE] No conflict - this is the only file with this name") log.info(f"[TRACE] Sending status with filename: {filename}") self.status_queue.put({'status': 'finished', 'filename': filename}) else: log.info(f"[TRACE] File NOT FOUND at expected location") base_name = os.path.basename(filename) self.status_queue.put({'status': 'error', 'msg': f'File not found: {base_name}'}) else: log.debug(f"[TRACE] Other postprocessor: {d.get('postprocessor')}") ret = yt_dlp.YoutubeDL(params={ 'quiet': True, 'no_color': True, 'paths': {"home": self.download_dir, "temp": self.temp_dir}, 'outtmpl': { "default": self.output_template, "chapter": self.output_template_chapter }, 'format': self.format, 'socket_timeout': 30, 'ignore_no_formats_error': True, 'progress_hooks': [put_status], 'postprocessor_hooks': [put_status_postprocessor], **self.ytdl_opts, }).download([self.info.url]) self.status_queue.put({'status': 'finished' if ret == 0 else 'error'}) log.info(f"Finished download for: {self.info.title}") except yt_dlp.utils.YoutubeDLError as exc: log.error(f"Download error for {self.info.title}: {str(exc)}") self.status_queue.put({'status': 'error', 'msg': str(exc)}) async def start(self, notifier): log.info(f"Preparing download for: {self.info.title}") if Download.manager is None: Download.manager = multiprocessing.Manager() self.status_queue = Download.manager.Queue() self.proc = multiprocessing.Process(target=self._download) self.proc.start() self.loop = asyncio.get_running_loop() self.notifier = notifier self.info.status = 'preparing' await self.notifier.updated(self.info) asyncio.create_task(self.update_status()) return await self.loop.run_in_executor(None, self.proc.join) def _resolve_filename_conflict(self, filepath): """ Resolve filename conflicts by appending a short unique ID. Returns the final non-conflicting filepath. """ dir_name = os.path.dirname(filepath) base_name = os.path.basename(filepath) name, ext = os.path.splitext(base_name) # Generate a short unique ID (5 alphanumeric characters) unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5)) new_filename = f"{name}_{unique_id}{ext}" new_filepath = os.path.join(dir_name, new_filename) return new_filepath def cancel(self): log.info(f"Cancelling download: {self.info.title}") if self.running(): try: self.proc.kill() except Exception as e: log.error(f"Error killing process for {self.info.title}: {e}") self.canceled = True if self.status_queue is not None: self.status_queue.put(None) def close(self): log.info(f"Closing download process for: {self.info.title}") if self.started(): self.proc.close() if self.status_queue is not None: self.status_queue.put(None) def running(self): try: return self.proc is not None and self.proc.is_alive() except ValueError: return False def started(self): return self.proc is not None async def update_status(self): while True: status = await self.loop.run_in_executor(None, self.status_queue.get) if status is None: log.info(f"Status update finished for: {self.info.title}") return if self.canceled: log.info(f"Download {self.info.title} is canceled; stopping status updates.") return self.tmpfilename = status.get('tmpfilename') if 'filename' in status: fileName = status.get('filename') self.info.filename = os.path.relpath(fileName, self.download_dir) self.info.size = os.path.getsize(fileName) if os.path.exists(fileName) else None if self.info.format == 'thumbnail': self.info.filename = re.sub(r'\.webm$', '.jpg', self.info.filename) self.info.status = status['status'] self.info.msg = status.get('msg') if 'downloaded_bytes' in status: total = status.get('total_bytes') or status.get('total_bytes_estimate') if total: self.info.percent = status['downloaded_bytes'] / total * 100 self.info.speed = status.get('speed') self.info.eta = status.get('eta') log.info(f"Updating status for {self.info.title}: {status}") await self.notifier.updated(self.info) class PersistentQueue: def __init__(self, path): pdir = os.path.dirname(path) if not os.path.isdir(pdir): os.mkdir(pdir) with shelve.open(path, 'c'): pass self.path = path self.dict = OrderedDict() def load(self): for k, v in self.saved_items(): # Ensure website field is populated for older downloads if not hasattr(v, 'website') or v.website is None: parsed_url = urlparse(v.url) v.website = parsed_url.netloc # Ensure file_exists field exists if not hasattr(v, 'file_exists'): v.file_exists = None self.dict[k] = Download(None, None, None, None, None, None, {}, v) def exists(self, key): return key in self.dict def get(self, key): return self.dict[key] def items(self): return self.dict.items() def saved_items(self): with shelve.open(self.path, 'r') as shelf: return sorted(shelf.items(), key=lambda item: item[1].timestamp) def put(self, value): key = value.info.url self.dict[key] = value with shelve.open(self.path, 'w') as shelf: shelf[key] = value.info def delete(self, key): if key in self.dict: del self.dict[key] with shelve.open(self.path, 'w') as shelf: shelf.pop(key, None) def next(self): k, v = next(iter(self.dict.items())) return k, v def empty(self): return not bool(self.dict) class DownloadQueue: def __init__(self, config, notifier): self.config = config self.notifier = notifier self.queue = PersistentQueue(self.config.STATE_DIR + '/queue') self.done = PersistentQueue(self.config.STATE_DIR + '/completed') self.pending = PersistentQueue(self.config.STATE_DIR + '/pending') self.active_downloads = set() self.semaphore = None # For sequential mode, use an asyncio lock to ensure one-at-a-time execution. if self.config.DOWNLOAD_MODE == 'sequential': self.seq_lock = asyncio.Lock() elif self.config.DOWNLOAD_MODE == 'limited': self.semaphore = asyncio.Semaphore(int(self.config.MAX_CONCURRENT_DOWNLOADS)) # PreCheck queue for sequential conflict detection (no locks needed) self.precheck_queue = asyncio.Queue() self.reserved_filenames = set() # Track filenames being processed self.precheck_in_progress = {} # Track URL -> DownloadInfo for items in precheck queue # Event notifications (keep last 5 in memory) self.events = [] # List of {type, message, timestamp, url} self.max_events = 5 self.done.load() async def __import_queue(self): for k, v in self.queue.saved_items(): await self.__add_download(v, True) async def __import_pending(self): for k, v in self.pending.saved_items(): await self.__add_download(v, False) async def initialize(self): log.info("Initializing DownloadQueue") # Start the precheck worker for sequential conflict detection asyncio.create_task(self.__precheck_worker()) asyncio.create_task(self.__import_queue()) asyncio.create_task(self.__import_pending()) async def __precheck_worker(self): """Background worker that processes precheck queue sequentially. Sequential processing naturally prevents race conditions without locks.""" log.info("[PreCheck] Worker started") while True: try: # Get next item from queue (blocks if empty) item = await self.precheck_queue.get() log.debug(f"[PreCheck] Processing item: {item['dl'].url}") # Process the precheck and start download await self.__process_precheck(item) # Mark task as done self.precheck_queue.task_done() except Exception as e: log.error(f"[PreCheck] Worker error: {e}", exc_info=True) async def __process_precheck(self, item): """Process a single download with conflict detection. Called sequentially by worker - no race conditions possible.""" dl = item['dl'] auto_start = item['auto_start'] dldirectory = item['dldirectory'] output = item['output'] output_chapter = item['output_chapter'] ytdl_options = item['ytdl_options'] entry = item['entry'] log.info(f"[PreCheck] Checking for filename conflicts before download") log.debug(f"[PreCheck] Original output template: {output}") # Try to predict the filename that yt-dlp will generate if entry and 'title' in entry: # Check if we have the real title or just a placeholder title = entry.get('title', '') video_id = entry.get('id', '') # If title looks like a placeholder (contains the ID), we need full extraction needs_full_extraction = ( not title or # No title title == f"twitter video #{video_id}" or # Placeholder pattern video_id in title # ID is in title (likely placeholder) ) if needs_full_extraction: log.debug(f"[PreCheck] Title appears to be placeholder: '{title}', doing full info extraction") try: # Do a full (non-flat) extraction to get real title full_entry = await asyncio.get_running_loop().run_in_executor( None, lambda: yt_dlp.YoutubeDL(params={ 'quiet': True, 'no_color': True, 'extract_flat': False, # Full extraction 'skip_download': True, # Don't download, just get info 'paths': {"home": dldirectory, "temp": self.config.TEMP_DIR}, **ytdl_options, }).extract_info(dl.url, download=False) ) if full_entry and 'title' in full_entry: title = full_entry['title'] log.debug(f"[PreCheck] Got real title from full extraction: '{title}'") except Exception as e: log.warning(f"[PreCheck] Failed to get full info: {e}, using placeholder title") predicted_filename = output # Replace title if '%(title)s' in predicted_filename: predicted_filename = predicted_filename.replace('%(title)s', title) # Replace id if '%(id)s' in predicted_filename and video_id: predicted_filename = predicted_filename.replace('%(id)s', video_id) # Handle ext specially - default to format's extension if not in entry if '%(ext)s' in predicted_filename: ext = entry.get('ext', dl.format if dl.format in ['mp4', 'mkv', 'webm', 'mp3', 'm4a'] else 'mp4') predicted_filename = predicted_filename.replace('%(ext)s', ext) predicted_filepath = os.path.join(dldirectory, predicted_filename) log.info(f"[PreCheck] Predicted filepath: {predicted_filepath}") # Check if file already exists OR is reserved by another download in queue # Sequential processing means we check one at a time - no race condition if os.path.exists(predicted_filepath) or predicted_filepath in self.reserved_filenames: if predicted_filepath in self.reserved_filenames: log.warning(f"[PreCheck] Filename is reserved by pending download! Will append unique ID") else: log.warning(f"[PreCheck] File already exists! Will append unique ID to avoid conflict") # Generate unique ID unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5)) # Modify output template to include unique ID before extension # Change "%(title)s.%(ext)s" to "%(title)s_XXXXX.%(ext)s" if '.%(ext)s' in output: output = output.replace('.%(ext)s', f'_{unique_id}.%(ext)s') else: # Fallback: append to end output = f"{output}_{unique_id}" # Re-predict the new filename predicted_filename = output if '%(title)s' in predicted_filename: predicted_filename = predicted_filename.replace('%(title)s', title) if '%(id)s' in predicted_filename and video_id: predicted_filename = predicted_filename.replace('%(id)s', video_id) if '%(ext)s' in predicted_filename: ext = entry.get('ext', dl.format if dl.format in ['mp4', 'mkv', 'webm', 'mp3', 'm4a'] else 'mp4') predicted_filename = predicted_filename.replace('%(ext)s', ext) predicted_filepath = os.path.join(dldirectory, predicted_filename) log.info(f"[PreCheck] Modified output template: {output}") log.info(f"[PreCheck] New predicted filepath: {predicted_filepath}") else: log.info(f"[PreCheck] No conflict detected, using original template") # Reserve this filename to prevent concurrent downloads from using it self.reserved_filenames.add(predicted_filepath) log.debug(f"[PreCheck] Reserved filename: {predicted_filepath}") else: predicted_filepath = None log.debug(f"[PreCheck] No entry data available, skipping pre-check") log.debug(f"final resolved output template: {output}") download = Download(dldirectory, self.config.TEMP_DIR, output, output_chapter, dl.quality, dl.format, ytdl_options, dl) # Store the reserved filepath for cleanup download.reserved_filepath = predicted_filepath # Remove from in-progress set before adding to queue # This allows checking queue.exists() to work properly if dl.url in self.precheck_in_progress: del self.precheck_in_progress[dl.url] log.debug(f"[PreCheck] Removed from in-progress tracking: {dl.url}") if auto_start is True: self.queue.put(download) asyncio.create_task(self.__start_download(download)) else: self.pending.put(download) async def __start_download(self, download): if download.canceled: log.info(f"Download {download.info.title} was canceled, skipping start.") return if self.config.DOWNLOAD_MODE == 'sequential': async with self.seq_lock: log.info("Starting sequential download.") await download.start(self.notifier) self._post_download_cleanup(download) elif self.config.DOWNLOAD_MODE == 'limited' and self.semaphore is not None: await self.__limited_concurrent_download(download) else: await self.__concurrent_download(download) async def __concurrent_download(self, download): log.info("Starting concurrent download without limits.") asyncio.create_task(self._run_download(download)) async def __limited_concurrent_download(self, download): log.info("Starting limited concurrent download.") async with self.semaphore: await self._run_download(download) async def _run_download(self, download): if download.canceled: log.info(f"Download {download.info.title} is canceled; skipping start.") return await download.start(self.notifier) self._post_download_cleanup(download) def _post_download_cleanup(self, download): # Release filename reservation if it exists if hasattr(download, 'reserved_filepath') and download.reserved_filepath: if download.reserved_filepath in self.reserved_filenames: self.reserved_filenames.discard(download.reserved_filepath) log.debug(f"[PreCheck] Released reservation for: {download.reserved_filepath}") if download.info.status != 'finished': if download.tmpfilename and os.path.isfile(download.tmpfilename): try: os.remove(download.tmpfilename) except: pass download.info.status = 'error' download.close() if self.queue.exists(download.info.url): self.queue.delete(download.info.url) if download.canceled: asyncio.create_task(self.notifier.canceled(download.info.url)) else: self.done.put(download) asyncio.create_task(self.notifier.completed(download.info)) def __extract_info(self, url, playlist_strict_mode): return yt_dlp.YoutubeDL(params={ 'quiet': True, 'no_color': True, 'extract_flat': True, 'ignore_no_formats_error': True, 'noplaylist': playlist_strict_mode, 'paths': {"home": self.config.DOWNLOAD_DIR, "temp": self.config.TEMP_DIR}, **self.config.YTDL_OPTIONS, **({'impersonate': yt_dlp.networking.impersonate.ImpersonateTarget.from_str(self.config.YTDL_OPTIONS['impersonate'])} if 'impersonate' in self.config.YTDL_OPTIONS else {}), }).extract_info(url, download=False) def __calc_download_path(self, quality, format, folder): base_directory = self.config.DOWNLOAD_DIR if (quality != 'audio' and format not in AUDIO_FORMATS) else self.config.AUDIO_DOWNLOAD_DIR if folder: if not self.config.CUSTOM_DIRS: return None, {'status': 'error', 'msg': f'A folder for the download was specified but CUSTOM_DIRS is not true in the configuration.'} dldirectory = os.path.realpath(os.path.join(base_directory, folder)) real_base_directory = os.path.realpath(base_directory) if not dldirectory.startswith(real_base_directory): return None, {'status': 'error', 'msg': f'Folder "{folder}" must resolve inside the base download directory "{real_base_directory}"'} if not os.path.isdir(dldirectory): if not self.config.CREATE_CUSTOM_DIRS: return None, {'status': 'error', 'msg': f'Folder "{folder}" for download does not exist inside base directory "{real_base_directory}", and CREATE_CUSTOM_DIRS is not true in the configuration.'} os.makedirs(dldirectory, exist_ok=True) else: dldirectory = base_directory return dldirectory, None async def __add_download(self, dl, auto_start): """Fast path: validate and queue for precheck processing. Returns immediately without blocking on slow operations.""" # Check if this exact URL is already being processed, in queue, or already downloaded # This prevents duplicate downloads when same URL is submitted multiple times if (dl.url in self.precheck_in_progress or self.queue.exists(dl.url) or self.pending.exists(dl.url) or self.done.exists(dl.url)): log.info(f"[PreCheck] URL already queued/processing/downloaded, skipping: {dl.url}") # Add event notification self._add_event('duplicate_skipped', 'URL already in queue or downloaded', dl.url) return {'status': 'ok', 'msg': 'Download already exists'} dldirectory, error_message = self.__calc_download_path(dl.quality, dl.format, dl.folder) if error_message is not None: return error_message output = self.config.OUTPUT_TEMPLATE if len(dl.custom_name_prefix) == 0 else f'{dl.custom_name_prefix}.{self.config.OUTPUT_TEMPLATE}' output_chapter = self.config.OUTPUT_TEMPLATE_CHAPTER entry = getattr(dl, 'entry', None) if entry is not None and 'playlist' in entry and entry['playlist'] is not None: if len(self.config.OUTPUT_TEMPLATE_PLAYLIST): output = self.config.OUTPUT_TEMPLATE_PLAYLIST for property, value in entry.items(): if property.startswith("playlist"): output = output.replace(f"%({property})s", str(value)) ytdl_options = dict(self.config.YTDL_OPTIONS) playlist_item_limit = getattr(dl, 'playlist_item_limit', 0) if playlist_item_limit > 0: log.info(f'playlist limit is set. Processing only first {playlist_item_limit} entries') ytdl_options['playlistend'] = playlist_item_limit # Check if cookie file exists for this domain parsed_url = urlparse(dl.url) domain = parsed_url.netloc log.info(f"[Cookie] Checking for cookie file for domain: {domain}") cookies_dir = os.path.join(self.config.STATE_DIR, 'cookies') log.debug(f"[Cookie] Cookies directory: {cookies_dir}") # Try domain-specific cookie file safe_domain = domain.replace(':', '_').replace('/', '_') cookie_file = os.path.join(cookies_dir, f'{safe_domain}.txt') log.debug(f"[Cookie] Looking for cookie file at: {cookie_file}") if os.path.exists(cookie_file): log.info(f"[Cookie] Found cookie file: {cookie_file}") # Verify file is readable and has content try: with open(cookie_file, 'r') as f: lines = f.readlines() cookie_lines = [l for l in lines if l.strip() and not l.startswith('#')] log.info(f"[Cookie] Cookie file contains {len(cookie_lines)} cookie entries") if len(cookie_lines) == 0: log.warning(f"[Cookie] Cookie file exists but contains no cookies!") else: log.debug(f"[Cookie] First cookie entry: {cookie_lines[0][:50]}...") except Exception as e: log.error(f"[Cookie] Error reading cookie file: {e}", exc_info=True) ytdl_options['cookiefile'] = cookie_file log.info(f"[Cookie] Configured yt-dlp to use cookiefile: {cookie_file}") else: log.info(f"[Cookie] No cookie file found for domain {domain}") log.debug(f"[Cookie] Checked path: {cookie_file}") # List available cookie files for debugging if os.path.exists(cookies_dir): available_cookies = os.listdir(cookies_dir) if available_cookies: log.debug(f"[Cookie] Available cookie files: {available_cookies}") else: log.debug(f"[Cookie] Cookies directory is empty") else: log.debug(f"[Cookie] Cookies directory does not exist") # Mark URL as being processed to prevent duplicates # Store the DownloadInfo so we can display it in UI self.precheck_in_progress[dl.url] = dl # Queue for sequential precheck processing (fast, non-blocking) await self.precheck_queue.put({ 'dl': dl, 'auto_start': auto_start, 'dldirectory': dldirectory, 'output': output, 'output_chapter': output_chapter, 'ytdl_options': ytdl_options, 'entry': entry, }) log.debug(f"[PreCheck] Queued for processing: {dl.url}") # Notify immediately (fast response to user) await self.notifier.added(dl) async def __add_entry(self, entry, quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start, already): if not entry: return {'status': 'error', 'msg': "Invalid/empty data was given."} error = None if "live_status" in entry and "release_timestamp" in entry and entry.get("live_status") == "is_upcoming": dt_ts = datetime.fromtimestamp(entry.get("release_timestamp")).strftime('%Y-%m-%d %H:%M:%S %z') error = f"Live stream is scheduled to start at {dt_ts}" else: if "msg" in entry: error = entry["msg"] etype = entry.get('_type') or 'video' if etype.startswith('url'): log.debug('Processing as an url') return await self.add(entry['url'], quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start, already) elif etype == 'playlist': log.debug('Processing as a playlist') entries = entry['entries'] log.info(f'playlist detected with {len(entries)} entries') playlist_index_digits = len(str(len(entries))) results = [] if playlist_item_limit > 0: log.info(f'Playlist item limit is set. Processing only first {playlist_item_limit} entries') entries = entries[:playlist_item_limit] # Verify playlist entry has 'id' before using it playlist_id = entry.get("id", "unknown_playlist") if "id" not in entry: log.warning(f"Playlist entry missing 'id' field. Using fallback 'unknown_playlist'. Entry keys: {list(entry.keys())}") for index, etr in enumerate(entries, start=1): etr["_type"] = "video" etr["playlist"] = playlist_id etr["playlist_index"] = '{{0:0{0:d}d}}'.format(playlist_index_digits).format(index) for property in ("id", "title", "uploader", "uploader_id"): if property in entry: etr[f"playlist_{property}"] = entry[property] results.append(await self.__add_entry(etr, quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start, already)) if any(res['status'] == 'error' for res in results): return {'status': 'error', 'msg': ', '.join(res['msg'] for res in results if res['status'] == 'error' and 'msg' in res)} return {'status': 'ok'} elif etype == 'video' or (etype.startswith('url') and 'id' in entry and 'title' in entry): log.debug('Processing as a video') # Extract ID from entry, or derive from URL if missing video_id = entry.get('id') if not video_id: # Try to extract ID from URL (e.g., viewkey parameter or URL path) video_url = entry.get('url', '') if 'viewkey=' in video_url: # Extract viewkey parameter (common in PornHub, etc.) match = re.search(r'viewkey=([^&]+)', video_url) if match: video_id = match.group(1) log.info(f"Extracted video ID from viewkey: {video_id}") elif 'webpage_url' in entry: # Use webpage_url as fallback video_id = entry['webpage_url'] else: # Last resort: use the URL itself video_id = video_url if not video_id: log.error(f"Video entry missing 'id' field and could not extract from URL. Entry keys: {list(entry.keys())}") return {'status': 'error', 'msg': "Video entry missing required 'id' field and URL extraction failed"} key = entry.get('webpage_url') or entry['url'] if not self.queue.exists(key): dl = DownloadInfo(video_id, entry.get('title') or video_id, key, quality, format, folder, custom_name_prefix, error, entry, playlist_item_limit) await self.__add_download(dl, auto_start) return {'status': 'ok'} return {'status': 'error', 'msg': f'Unsupported resource "{etype}"'} async def add(self, url, quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start=True, already=None): log.info(f'adding {url}: {quality=} {format=} {already=} {folder=} {custom_name_prefix=} {playlist_strict_mode=} {playlist_item_limit=} {auto_start=}') already = set() if already is None else already if url in already: log.info('recursion detected, skipping') return {'status': 'ok'} else: already.add(url) try: entry = await asyncio.get_running_loop().run_in_executor(None, self.__extract_info, url, playlist_strict_mode) except yt_dlp.utils.YoutubeDLError as exc: return {'status': 'error', 'msg': str(exc)} return await self.__add_entry(entry, quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start, already) async def start_pending(self, ids): for id in ids: if not self.pending.exists(id): log.warn(f'requested start for non-existent download {id}') continue dl = self.pending.get(id) self.queue.put(dl) self.pending.delete(id) asyncio.create_task(self.__start_download(dl)) return {'status': 'ok'} async def cancel(self, ids): for id in ids: if self.pending.exists(id): self.pending.delete(id) await self.notifier.canceled(id) continue if not self.queue.exists(id): log.warn(f'requested cancel for non-existent download {id}') continue if self.queue.get(id).started(): self.queue.get(id).cancel() else: self.queue.delete(id) await self.notifier.canceled(id) return {'status': 'ok'} async def clear(self, ids): for id in ids: if not self.done.exists(id): log.warn(f'requested delete for non-existent download {id}') continue if self.config.DELETE_FILE_ON_TRASHCAN: dl = self.done.get(id) try: dldirectory, _ = self.__calc_download_path(dl.info.quality, dl.info.format, dl.info.folder) os.remove(os.path.join(dldirectory, dl.info.filename)) except Exception as e: log.warn(f'deleting file for download {id} failed with error message {e!r}') self.done.delete(id) await self.notifier.cleared(id) return {'status': 'ok'} def get(self): # Ensure website field is populated for all downloads for k, v in self.queue.items(): if not hasattr(v.info, 'website') or v.info.website is None: parsed_url = urlparse(v.info.url) v.info.website = parsed_url.netloc for k, v in self.pending.items(): if not hasattr(v.info, 'website') or v.info.website is None: parsed_url = urlparse(v.info.url) v.info.website = parsed_url.netloc # Update file existence status for done downloads for k, v in self.done.items(): if not hasattr(v.info, 'website') or v.info.website is None: parsed_url = urlparse(v.info.url) v.info.website = parsed_url.netloc # Use getattr with default to safely check for filename attribute filename = getattr(v.info, 'filename', None) if filename: dldirectory, _ = self.__calc_download_path(v.info.quality, v.info.format, v.info.folder) if dldirectory: filepath = os.path.join(dldirectory, filename) v.info.file_exists = os.path.exists(filepath) else: v.info.file_exists = False else: v.info.file_exists = False # Create list from items in precheck queue # These items have 'preparing' status to indicate they're being analyzed precheck_list = [(dl.url, dl) for dl in self.precheck_in_progress.values()] return (precheck_list + list((k, v.info) for k, v in self.queue.items()) + list((k, v.info) for k, v in self.pending.items()), list((k, v.info) for k, v in self.done.items())) def _add_event(self, event_type, message, url=None): """Add an event to the events list (keep only last 5).""" event = { 'type': event_type, 'message': message, 'timestamp': int(time.time()), 'url': url } self.events.append(event) # Keep only last 5 events if len(self.events) > self.max_events: self.events = self.events[-self.max_events:] # Notify frontend via WebSocket asyncio.create_task(self.notifier.event(event)) def get_events(self): """Get all events (last 5).""" return self.events def clear_events(self): """Clear all events.""" self.events = []