metube/app/ytdl.py

import os
import yt_dlp
from collections import OrderedDict
import shelve
import time
import asyncio
import multiprocessing
import logging
import re
import random
import string
from urllib.parse import urlparse

import yt_dlp.networking.impersonate
from dl_formats import get_format, get_opts, AUDIO_FORMATS
from datetime import datetime

log = logging.getLogger('ytdl')

class DownloadQueueNotifier:
    async def added(self, dl):
        raise NotImplementedError

    async def updated(self, dl):
        raise NotImplementedError

    async def completed(self, dl):
        raise NotImplementedError

    async def canceled(self, id):
        raise NotImplementedError

    async def cleared(self, id):
        raise NotImplementedError

    async def event(self, event):
        raise NotImplementedError

class DownloadInfo:
    def __init__(self, id, title, url, quality, format, folder, custom_name_prefix, error, entry, playlist_item_limit):
        self.id = id if len(custom_name_prefix) == 0 else f'{custom_name_prefix}.{id}'
        self.title = title if len(custom_name_prefix) == 0 else f'{custom_name_prefix}.{title}'
        self.url = url
        self.quality = quality
        self.format = format
        self.folder = folder
        self.custom_name_prefix = custom_name_prefix
        self.msg = self.percent = self.speed = self.eta = None
        self.status = "pending"
        self.size = None
        self.timestamp = time.time_ns()
        self.error = error
        self.entry = entry
        self.playlist_item_limit = playlist_item_limit
        # Extract website domain from URL
        parsed_url = urlparse(url)
        self.website = parsed_url.netloc
        self.file_exists = None

class Download:
    manager = None

    def __init__(self, download_dir, temp_dir, output_template, output_template_chapter, quality, format, ytdl_opts, info):
        self.download_dir = download_dir
        self.temp_dir = temp_dir
        self.output_template = output_template
        self.output_template_chapter = output_template_chapter
        self.format = get_format(format, quality)
        self.ytdl_opts = get_opts(format, quality, ytdl_opts)
        if "impersonate" in self.ytdl_opts:
            self.ytdl_opts["impersonate"] = yt_dlp.networking.impersonate.ImpersonateTarget.from_str(self.ytdl_opts["impersonate"])
        self.info = info
        self.canceled = False
        self.tmpfilename = None
        self.status_queue = None
        self.proc = None
        self.loop = None
        self.notifier = None
        self.had_download = False  # Track if actual download occurred

    def _download(self):
        log.info(f"Starting download for: {self.info.title} ({self.info.url})")
        log.info(f"[TRACE] Download config: download_dir={self.download_dir}, temp_dir={self.temp_dir}")
        log.info(f"[TRACE] Output template: {self.output_template}")
        try:
            def put_status(st):
                # Log every status update to trace the flow
                status_type = st.get('status', 'unknown')
                if status_type == 'downloading':
                    # Mark that we're actually downloading (not skipping)
                    self.had_download = True
                    if 'tmpfilename' in st:
                        log.debug(f"[TRACE] Downloading - tmpfile: {st.get('tmpfilename')}")
                elif status_type == 'finished':
                    log.info(f"[TRACE] put_status FINISHED - filename: {st.get('filename')}, tmpfilename: {st.get('tmpfilename')}")
                    log.info(f"[TRACE] had_download flag: {self.had_download}")
                    if st.get('filename'):
                        exists = os.path.exists(st['filename'])
                        log.info(f"[TRACE] File exists at reported location? {exists}")
                        if exists:
                            log.info(f"[TRACE] File size: {os.path.getsize(st['filename'])} bytes")
                elif status_type == 'error':
                    log.error(f"[TRACE] put_status ERROR - msg: {st.get('msg')}")

                self.status_queue.put({k: v for k, v in st.items() if k in (
                    'tmpfilename',
                    'filename',
                    'status',
                    'msg',
                    'total_bytes',
                    'total_bytes_estimate',
                    'downloaded_bytes',
                    'speed',
                    'eta',
                )})

            def put_status_postprocessor(d):
                log.info(f"[TRACE] ===== POSTPROCESSOR CALLED =====")
                log.info(f"[TRACE] Postprocessor: {d.get('postprocessor')}, Status: {d.get('status')}")

                if d['postprocessor'] == 'MoveFiles' and d['status'] == 'finished':
                    log.info(f"[TRACE] MoveFiles postprocessor triggered")
                    log.info(f"[TRACE] had_download flag in postprocessor: {self.had_download}")
                    log.info(f"[TRACE] info_dict keys: {list(d['info_dict'].keys())}")
                    log.info(f"[TRACE] info_dict filepath: {d['info_dict'].get('filepath')}")
                    log.info(f"[TRACE] info_dict __finaldir: {d['info_dict'].get('__finaldir')}")

                    if '__finaldir' in d['info_dict']:
                        filename = os.path.join(d['info_dict']['__finaldir'], os.path.basename(d['info_dict']['filepath']))
                    else:
                        filename = d['info_dict']['filepath']

                    log.info(f"[TRACE] Resolved filename: {filename}")
                    log.info(f"[TRACE] File exists? {os.path.exists(filename)}")

                    # List files in directory
                    dir_name = os.path.dirname(filename)
                    if os.path.isdir(dir_name):
                        all_files = os.listdir(dir_name)
                        log.info(f"[TRACE] Files in {dir_name}: {all_files}")

                    # Check if file exists at expected location
                    if os.path.exists(filename):
                        log.info(f"[TRACE] File FOUND at expected location")

                        # If yt-dlp didn't actually download (skipped), just report the existing file
                        if not self.had_download:
                            log.info(f"[TRACE] No actual download occurred - yt-dlp reused existing file")
                            log.info(f"[TRACE] Sending status with existing filename: {filename}")
                            self.status_queue.put({'status': 'finished', 'filename': filename})
                        else:
                            # Actual download happened - check for conflicts
                            log.info(f"[TRACE] Actual download occurred - checking for conflicts")
                            base_name = os.path.basename(filename)
                            name, ext = os.path.splitext(base_name)

                            # Look for other files with same base name (excluding current file)
                            other_files = []
                            if os.path.isdir(dir_name):
                                for existing_file in os.listdir(dir_name):
                                    if existing_file == base_name:
                                        log.debug(f"[TRACE] Skipping current file: {existing_file}")
                                        continue  # Skip the current file
                                    existing_name, existing_ext = os.path.splitext(existing_file)
                                    # Check for exact name match
                                    if existing_ext == ext and existing_name == name:
                                        log.info(f"[TRACE] Found matching file: {existing_file}")
                                        other_files.append(existing_file)

                            log.info(f"[TRACE] Found {len(other_files)} other files with same base name: {other_files}")

                            # If other files exist with same name, we have a duplicate - rename the NEW file
                            if len(other_files) > 0:
                                log.info(f"[TRACE] CONFLICT DETECTED! Other files: {other_files}")
                                unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
                                new_filename = f"{name}_{unique_id}{ext}"
                                new_filepath = os.path.join(dir_name, new_filename)

                                log.info(f"[TRACE] Attempting rename: {filename} -> {new_filepath}")
                                try:
                                    os.rename(filename, new_filepath)
                                    log.warning(f"Filename conflict detected. Renamed: {base_name} → {new_filename}")
                                    log.info(f"[TRACE] Rename successful")
                                    filename = new_filepath
                                except Exception as e:
                                    log.error(f"[TRACE] Rename FAILED: {e}")
                                    log.error(f"Failed to rename file due to conflict: {e}")
                            else:
                                log.info(f"[TRACE] No conflict - this is the only file with this name")

                            log.info(f"[TRACE] Sending status with filename: {filename}")
                            self.status_queue.put({'status': 'finished', 'filename': filename})
                    else:
                        log.info(f"[TRACE] File NOT FOUND at expected location")
                        base_name = os.path.basename(filename)
                        self.status_queue.put({'status': 'error', 'msg': f'File not found: {base_name}'})
                else:
                    log.debug(f"[TRACE] Other postprocessor: {d.get('postprocessor')}")

            ret = yt_dlp.YoutubeDL(params={
                'quiet': True,
                'no_color': True,
                'paths': {"home": self.download_dir, "temp": self.temp_dir},
                'outtmpl': { "default": self.output_template, "chapter": self.output_template_chapter },
                'format': self.format,
                'socket_timeout': 30,
                'ignore_no_formats_error': True,
                'progress_hooks': [put_status],
                'postprocessor_hooks': [put_status_postprocessor],
                **self.ytdl_opts,
            }).download([self.info.url])
            self.status_queue.put({'status': 'finished' if ret == 0 else 'error'})
            log.info(f"Finished download for: {self.info.title}")
        except yt_dlp.utils.YoutubeDLError as exc:
            log.error(f"Download error for {self.info.title}: {str(exc)}")
            self.status_queue.put({'status': 'error', 'msg': str(exc)})

    async def start(self, notifier):
        log.info(f"Preparing download for: {self.info.title}")
        if Download.manager is None:
            Download.manager = multiprocessing.Manager()
        self.status_queue = Download.manager.Queue()
        self.proc = multiprocessing.Process(target=self._download)
        self.proc.start()
        self.loop = asyncio.get_running_loop()
        self.notifier = notifier
        self.info.status = 'preparing'
        await self.notifier.updated(self.info)
        asyncio.create_task(self.update_status())
        return await self.loop.run_in_executor(None, self.proc.join)

    def _resolve_filename_conflict(self, filepath):
        """
        Resolve filename conflicts by appending a short unique ID.
        Returns the final non-conflicting filepath.
        """
        dir_name = os.path.dirname(filepath)
        base_name = os.path.basename(filepath)
        name, ext = os.path.splitext(base_name)

        # Generate a short unique ID (5 alphanumeric characters)
        unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
        new_filename = f"{name}_{unique_id}{ext}"
        new_filepath = os.path.join(dir_name, new_filename)

        return new_filepath

    def cancel(self):
        log.info(f"Cancelling download: {self.info.title}")
        if self.running():
            try:
                self.proc.kill()
            except Exception as e:
                log.error(f"Error killing process for {self.info.title}: {e}")
        self.canceled = True
        if self.status_queue is not None:
            self.status_queue.put(None)

    def close(self):
        log.info(f"Closing download process for: {self.info.title}")
        if self.started():
            self.proc.close()
            if self.status_queue is not None:
                self.status_queue.put(None)

    def running(self):
        try:
            return self.proc is not None and self.proc.is_alive()
        except ValueError:
            return False

    def started(self):
        return self.proc is not None

    async def update_status(self):
        while True:
            status = await self.loop.run_in_executor(None, self.status_queue.get)
            if status is None:
                log.info(f"Status update finished for: {self.info.title}")
                return
            if self.canceled:
                log.info(f"Download {self.info.title} is canceled; stopping status updates.")
                return
            self.tmpfilename = status.get('tmpfilename')
            if 'filename' in status:
                fileName = status.get('filename')
                self.info.filename = os.path.relpath(fileName, self.download_dir)
                self.info.size = os.path.getsize(fileName) if os.path.exists(fileName) else None
                if self.info.format == 'thumbnail':
                    self.info.filename = re.sub(r'\.webm$', '.jpg', self.info.filename)
            self.info.status = status['status']
            self.info.msg = status.get('msg')
            if 'downloaded_bytes' in status:
                total = status.get('total_bytes') or status.get('total_bytes_estimate')
                if total:
                    self.info.percent = status['downloaded_bytes'] / total * 100
            self.info.speed = status.get('speed')
            self.info.eta = status.get('eta')
            log.info(f"Updating status for {self.info.title}: {status}")
            await self.notifier.updated(self.info)

class PersistentQueue:
    def __init__(self, path):
        pdir = os.path.dirname(path)
        if not os.path.isdir(pdir):
            os.mkdir(pdir)
        with shelve.open(path, 'c'):
            pass
        self.path = path
        self.dict = OrderedDict()

    def load(self):
        for k, v in self.saved_items():
            # Ensure website field is populated for older downloads
            if not hasattr(v, 'website') or v.website is None:
                parsed_url = urlparse(v.url)
                v.website = parsed_url.netloc
            # Ensure file_exists field exists
            if not hasattr(v, 'file_exists'):
                v.file_exists = None
            self.dict[k] = Download(None, None, None, None, None, None, {}, v)

    def exists(self, key):
        return key in self.dict

    def get(self, key):
        return self.dict[key]

    def items(self):
        return self.dict.items()

    def saved_items(self):
        with shelve.open(self.path, 'r') as shelf:
            return sorted(shelf.items(), key=lambda item: item[1].timestamp)

    def put(self, value):
        key = value.info.url
        self.dict[key] = value
        with shelve.open(self.path, 'w') as shelf:
            shelf[key] = value.info

    def delete(self, key):
        if key in self.dict:
            del self.dict[key]
            with shelve.open(self.path, 'w') as shelf:
                shelf.pop(key, None)

    def next(self):
        k, v = next(iter(self.dict.items()))
        return k, v

    def empty(self):
        return not bool(self.dict)

class DownloadQueue:
    def __init__(self, config, notifier):
        self.config = config
        self.notifier = notifier
        self.queue = PersistentQueue(self.config.STATE_DIR + '/queue')
        self.done = PersistentQueue(self.config.STATE_DIR + '/completed')
        self.pending = PersistentQueue(self.config.STATE_DIR + '/pending')
        self.active_downloads = set()
        self.semaphore = None
        # For sequential mode, use an asyncio lock to ensure one-at-a-time execution.
        if self.config.DOWNLOAD_MODE == 'sequential':
            self.seq_lock = asyncio.Lock()
        elif self.config.DOWNLOAD_MODE == 'limited':
            self.semaphore = asyncio.Semaphore(int(self.config.MAX_CONCURRENT_DOWNLOADS))

        # PreCheck queue for sequential conflict detection (no locks needed)
        self.precheck_queue = asyncio.Queue()
        self.reserved_filenames = set()  # Track filenames being processed
        self.precheck_in_progress = {}  # Track URL -> DownloadInfo for items in precheck queue

        # Event notifications (keep last 5 in memory)
        self.events = []  # List of {type, message, timestamp, url}
        self.max_events = 5

        self.done.load()

    async def __import_queue(self):
        for k, v in self.queue.saved_items():
            await self.__add_download(v, True)

    async def __import_pending(self):
        for k, v in self.pending.saved_items():
            await self.__add_download(v, False)

    async def initialize(self):
        log.info("Initializing DownloadQueue")
        # Start the precheck worker for sequential conflict detection
        asyncio.create_task(self.__precheck_worker())
        asyncio.create_task(self.__import_queue())
        asyncio.create_task(self.__import_pending())

    async def __precheck_worker(self):
        """Background worker that processes precheck queue sequentially.
        Sequential processing naturally prevents race conditions without locks."""
        log.info("[PreCheck] Worker started")
        while True:
            try:
                # Get next item from queue (blocks if empty)
                item = await self.precheck_queue.get()
                log.debug(f"[PreCheck] Processing item: {item['dl'].url}")

                # Process the precheck and start download
                await self.__process_precheck(item)

                # Mark task as done
                self.precheck_queue.task_done()
            except Exception as e:
                log.error(f"[PreCheck] Worker error: {e}", exc_info=True)

    async def __process_precheck(self, item):
        """Process a single download with conflict detection.
        Called sequentially by worker - no race conditions possible."""
        dl = item['dl']
        auto_start = item['auto_start']
        dldirectory = item['dldirectory']
        output = item['output']
        output_chapter = item['output_chapter']
        ytdl_options = item['ytdl_options']
        entry = item['entry']

        log.info(f"[PreCheck] Checking for filename conflicts before download")
        log.debug(f"[PreCheck] Original output template: {output}")

        # Try to predict the filename that yt-dlp will generate
        if entry and 'title' in entry:
            # Check if we have the real title or just a placeholder
            title = entry.get('title', '')
            video_id = entry.get('id', '')

            # If title looks like a placeholder (contains the ID), we need full extraction
            needs_full_extraction = (
                not title or  # No title
                title == f"twitter video #{video_id}" or  # Placeholder pattern
                video_id in title  # ID is in title (likely placeholder)
            )

            if needs_full_extraction:
                log.debug(f"[PreCheck] Title appears to be placeholder: '{title}', doing full info extraction")
                try:
                    # Do a full (non-flat) extraction to get real title
                    full_entry = await asyncio.get_running_loop().run_in_executor(
                        None,
                        lambda: yt_dlp.YoutubeDL(params={
                            'quiet': True,
                            'no_color': True,
                            'extract_flat': False,  # Full extraction
                            'skip_download': True,  # Don't download, just get info
                            'paths': {"home": dldirectory, "temp": self.config.TEMP_DIR},
                            **ytdl_options,
                        }).extract_info(dl.url, download=False)
                    )
                    if full_entry and 'title' in full_entry:
                        title = full_entry['title']
                        log.debug(f"[PreCheck] Got real title from full extraction: '{title}'")
                except Exception as e:
                    log.warning(f"[PreCheck] Failed to get full info: {e}, using placeholder title")

            predicted_filename = output
            # Replace title
            if '%(title)s' in predicted_filename:
                predicted_filename = predicted_filename.replace('%(title)s', title)

            # Replace id
            if '%(id)s' in predicted_filename and video_id:
                predicted_filename = predicted_filename.replace('%(id)s', video_id)

            # Handle ext specially - default to format's extension if not in entry
            if '%(ext)s' in predicted_filename:
                ext = entry.get('ext', dl.format if dl.format in ['mp4', 'mkv', 'webm', 'mp3', 'm4a'] else 'mp4')
                predicted_filename = predicted_filename.replace('%(ext)s', ext)

            predicted_filepath = os.path.join(dldirectory, predicted_filename)
            log.info(f"[PreCheck] Predicted filepath: {predicted_filepath}")

            # Check if file already exists OR is reserved by another download in queue
            # Sequential processing means we check one at a time - no race condition
            if os.path.exists(predicted_filepath) or predicted_filepath in self.reserved_filenames:
                if predicted_filepath in self.reserved_filenames:
                    log.warning(f"[PreCheck] Filename is reserved by pending download! Will append unique ID")
                else:
                    log.warning(f"[PreCheck] File already exists! Will append unique ID to avoid conflict")

                # Generate unique ID
                unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))

                # Modify output template to include unique ID before extension
                # Change "%(title)s.%(ext)s" to "%(title)s_XXXXX.%(ext)s"
                if '.%(ext)s' in output:
                    output = output.replace('.%(ext)s', f'_{unique_id}.%(ext)s')
                else:
                    # Fallback: append to end
                    output = f"{output}_{unique_id}"

                # Re-predict the new filename
                predicted_filename = output
                if '%(title)s' in predicted_filename:
                    predicted_filename = predicted_filename.replace('%(title)s', title)
                if '%(id)s' in predicted_filename and video_id:
                    predicted_filename = predicted_filename.replace('%(id)s', video_id)
                if '%(ext)s' in predicted_filename:
                    ext = entry.get('ext', dl.format if dl.format in ['mp4', 'mkv', 'webm', 'mp3', 'm4a'] else 'mp4')
                    predicted_filename = predicted_filename.replace('%(ext)s', ext)
                predicted_filepath = os.path.join(dldirectory, predicted_filename)

                log.info(f"[PreCheck] Modified output template: {output}")
                log.info(f"[PreCheck] New predicted filepath: {predicted_filepath}")
            else:
                log.info(f"[PreCheck] No conflict detected, using original template")

            # Reserve this filename to prevent concurrent downloads from using it
            self.reserved_filenames.add(predicted_filepath)
            log.debug(f"[PreCheck] Reserved filename: {predicted_filepath}")
        else:
            predicted_filepath = None
            log.debug(f"[PreCheck] No entry data available, skipping pre-check")

        log.debug(f"final resolved output template: {output}")
        download = Download(dldirectory, self.config.TEMP_DIR, output, output_chapter, dl.quality, dl.format, ytdl_options, dl)

        # Store the reserved filepath for cleanup
        download.reserved_filepath = predicted_filepath

        # Remove from in-progress set before adding to queue
        # This allows checking queue.exists() to work properly
        if dl.url in self.precheck_in_progress:
            del self.precheck_in_progress[dl.url]
            log.debug(f"[PreCheck] Removed from in-progress tracking: {dl.url}")

        if auto_start is True:
            self.queue.put(download)
            asyncio.create_task(self.__start_download(download))
        else:
            self.pending.put(download)

    async def __start_download(self, download):
        if download.canceled:
            log.info(f"Download {download.info.title} was canceled, skipping start.")
            return
        if self.config.DOWNLOAD_MODE == 'sequential':
            async with self.seq_lock:
                log.info("Starting sequential download.")
                await download.start(self.notifier)
                self._post_download_cleanup(download)
        elif self.config.DOWNLOAD_MODE == 'limited' and self.semaphore is not None:
            await self.__limited_concurrent_download(download)
        else:
            await self.__concurrent_download(download)

    async def __concurrent_download(self, download):
        log.info("Starting concurrent download without limits.")
        asyncio.create_task(self._run_download(download))

    async def __limited_concurrent_download(self, download):
        log.info("Starting limited concurrent download.")
        async with self.semaphore:
            await self._run_download(download)

    async def _run_download(self, download):
        if download.canceled:
            log.info(f"Download {download.info.title} is canceled; skipping start.")
            return
        await download.start(self.notifier)
        self._post_download_cleanup(download)

    def _post_download_cleanup(self, download):
        # Release filename reservation if it exists
        if hasattr(download, 'reserved_filepath') and download.reserved_filepath:
            if download.reserved_filepath in self.reserved_filenames:
                self.reserved_filenames.discard(download.reserved_filepath)
                log.debug(f"[PreCheck] Released reservation for: {download.reserved_filepath}")

        if download.info.status != 'finished':
            if download.tmpfilename and os.path.isfile(download.tmpfilename):
                try:
                    os.remove(download.tmpfilename)
                except:
                    pass
            download.info.status = 'error'
        download.close()
        if self.queue.exists(download.info.url):
            self.queue.delete(download.info.url)
            if download.canceled:
                asyncio.create_task(self.notifier.canceled(download.info.url))
            else:
                self.done.put(download)
                asyncio.create_task(self.notifier.completed(download.info))

    def __extract_info(self, url, playlist_strict_mode):
        return yt_dlp.YoutubeDL(params={
            'quiet': True,
            'no_color': True,
            'extract_flat': True,
            'ignore_no_formats_error': True,
            'noplaylist': playlist_strict_mode,
            'paths': {"home": self.config.DOWNLOAD_DIR, "temp": self.config.TEMP_DIR},
            **self.config.YTDL_OPTIONS,
            **({'impersonate': yt_dlp.networking.impersonate.ImpersonateTarget.from_str(self.config.YTDL_OPTIONS['impersonate'])} if 'impersonate' in self.config.YTDL_OPTIONS else {}),
        }).extract_info(url, download=False)

    def __calc_download_path(self, quality, format, folder):
        base_directory = self.config.DOWNLOAD_DIR if (quality != 'audio' and format not in AUDIO_FORMATS) else self.config.AUDIO_DOWNLOAD_DIR
        if folder:
            if not self.config.CUSTOM_DIRS:
                return None, {'status': 'error', 'msg': f'A folder for the download was specified but CUSTOM_DIRS is not true in the configuration.'}
            dldirectory = os.path.realpath(os.path.join(base_directory, folder))
            real_base_directory = os.path.realpath(base_directory)
            if not dldirectory.startswith(real_base_directory):
                return None, {'status': 'error', 'msg': f'Folder "{folder}" must resolve inside the base download directory "{real_base_directory}"'}
            if not os.path.isdir(dldirectory):
                if not self.config.CREATE_CUSTOM_DIRS:
                    return None, {'status': 'error', 'msg': f'Folder "{folder}" for download does not exist inside base directory "{real_base_directory}", and CREATE_CUSTOM_DIRS is not true in the configuration.'}
                os.makedirs(dldirectory, exist_ok=True)
        else:
            dldirectory = base_directory
        return dldirectory, None

    async def __add_download(self, dl, auto_start):
        """Fast path: validate and queue for precheck processing.
        Returns immediately without blocking on slow operations."""
        # Check if this exact URL is already being processed, in queue, or already downloaded
        # This prevents duplicate downloads when same URL is submitted multiple times
        if (dl.url in self.precheck_in_progress or
            self.queue.exists(dl.url) or
            self.pending.exists(dl.url) or
            self.done.exists(dl.url)):
            log.info(f"[PreCheck] URL already queued/processing/downloaded, skipping: {dl.url}")
            # Add event notification
            self._add_event('duplicate_skipped', 'URL already in queue or downloaded', dl.url)
            return {'status': 'ok', 'msg': 'Download already exists'}

        dldirectory, error_message = self.__calc_download_path(dl.quality, dl.format, dl.folder)
        if error_message is not None:
            return error_message

        output = self.config.OUTPUT_TEMPLATE if len(dl.custom_name_prefix) == 0 else f'{dl.custom_name_prefix}.{self.config.OUTPUT_TEMPLATE}'
        output_chapter = self.config.OUTPUT_TEMPLATE_CHAPTER
        entry = getattr(dl, 'entry', None)

        if entry is not None and 'playlist' in entry and entry['playlist'] is not None:
            if len(self.config.OUTPUT_TEMPLATE_PLAYLIST):
                output = self.config.OUTPUT_TEMPLATE_PLAYLIST
            for property, value in entry.items():
                if property.startswith("playlist"):
                    output = output.replace(f"%({property})s", str(value))

        ytdl_options = dict(self.config.YTDL_OPTIONS)
        playlist_item_limit = getattr(dl, 'playlist_item_limit', 0)
        if playlist_item_limit > 0:
            log.info(f'playlist limit is set. Processing only first {playlist_item_limit} entries')
            ytdl_options['playlistend'] = playlist_item_limit

        # Check if cookie file exists for this domain
        parsed_url = urlparse(dl.url)
        domain = parsed_url.netloc
        log.info(f"[Cookie] Checking for cookie file for domain: {domain}")

        cookies_dir = os.path.join(self.config.STATE_DIR, 'cookies')
        log.debug(f"[Cookie] Cookies directory: {cookies_dir}")

        # Try domain-specific cookie file
        safe_domain = domain.replace(':', '_').replace('/', '_')
        cookie_file = os.path.join(cookies_dir, f'{safe_domain}.txt')

        log.debug(f"[Cookie] Looking for cookie file at: {cookie_file}")

        if os.path.exists(cookie_file):
            log.info(f"[Cookie] Found cookie file: {cookie_file}")
            # Verify file is readable and has content
            try:
                with open(cookie_file, 'r') as f:
                    lines = f.readlines()
                    cookie_lines = [l for l in lines if l.strip() and not l.startswith('#')]
                    log.info(f"[Cookie] Cookie file contains {len(cookie_lines)} cookie entries")
                    if len(cookie_lines) == 0:
                        log.warning(f"[Cookie] Cookie file exists but contains no cookies!")
                    else:
                        log.debug(f"[Cookie] First cookie entry: {cookie_lines[0][:50]}...")
            except Exception as e:
                log.error(f"[Cookie] Error reading cookie file: {e}", exc_info=True)

            ytdl_options['cookiefile'] = cookie_file
            log.info(f"[Cookie] Configured yt-dlp to use cookiefile: {cookie_file}")
        else:
            log.info(f"[Cookie] No cookie file found for domain {domain}")
            log.debug(f"[Cookie] Checked path: {cookie_file}")
            # List available cookie files for debugging
            if os.path.exists(cookies_dir):
                available_cookies = os.listdir(cookies_dir)
                if available_cookies:
                    log.debug(f"[Cookie] Available cookie files: {available_cookies}")
                else:
                    log.debug(f"[Cookie] Cookies directory is empty")
            else:
                log.debug(f"[Cookie] Cookies directory does not exist")

        # Mark URL as being processed to prevent duplicates
        # Store the DownloadInfo so we can display it in UI
        self.precheck_in_progress[dl.url] = dl

        # Queue for sequential precheck processing (fast, non-blocking)
        await self.precheck_queue.put({
            'dl': dl,
            'auto_start': auto_start,
            'dldirectory': dldirectory,
            'output': output,
            'output_chapter': output_chapter,
            'ytdl_options': ytdl_options,
            'entry': entry,
        })
        log.debug(f"[PreCheck] Queued for processing: {dl.url}")

        # Notify immediately (fast response to user)
        await self.notifier.added(dl)

    async def __add_entry(self, entry, quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start, already):
        if not entry:
            return {'status': 'error', 'msg': "Invalid/empty data was given."}

        error = None
        if "live_status" in entry and "release_timestamp" in entry and entry.get("live_status") == "is_upcoming":
            dt_ts = datetime.fromtimestamp(entry.get("release_timestamp")).strftime('%Y-%m-%d %H:%M:%S %z')
            error = f"Live stream is scheduled to start at {dt_ts}"
        else:
            if "msg" in entry:
                error = entry["msg"]

        etype = entry.get('_type') or 'video'

        if etype.startswith('url'):
            log.debug('Processing as an url')
            return await self.add(entry['url'], quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start, already)
        elif etype == 'playlist':
            log.debug('Processing as a playlist')
            entries = entry['entries']
            log.info(f'playlist detected with {len(entries)} entries')

            playlist_index_digits = len(str(len(entries)))
            results = []
            if playlist_item_limit > 0:
                log.info(f'Playlist item limit is set. Processing only first {playlist_item_limit} entries')
                entries = entries[:playlist_item_limit]

            # Verify playlist entry has 'id' before using it
            playlist_id = entry.get("id", "unknown_playlist")
            if "id" not in entry:
                log.warning(f"Playlist entry missing 'id' field. Using fallback 'unknown_playlist'. Entry keys: {list(entry.keys())}")

            for index, etr in enumerate(entries, start=1):
                etr["_type"] = "video"
                etr["playlist"] = playlist_id
                etr["playlist_index"] = '{{0:0{0:d}d}}'.format(playlist_index_digits).format(index)
                for property in ("id", "title", "uploader", "uploader_id"):
                    if property in entry:
                        etr[f"playlist_{property}"] = entry[property]
                results.append(await self.__add_entry(etr, quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start, already))
            if any(res['status'] == 'error' for res in results):
                return {'status': 'error', 'msg': ', '.join(res['msg'] for res in results if res['status'] == 'error' and 'msg' in res)}
            return {'status': 'ok'}
        elif etype == 'video' or (etype.startswith('url') and 'id' in entry and 'title' in entry):
            log.debug('Processing as a video')

            # Extract ID from entry, or derive from URL if missing
            video_id = entry.get('id')
            if not video_id:
                # Try to extract ID from URL (e.g., viewkey parameter or URL path)
                video_url = entry.get('url', '')
                if 'viewkey=' in video_url:
                    # Extract viewkey parameter (common in PornHub, etc.)
                    match = re.search(r'viewkey=([^&]+)', video_url)
                    if match:
                        video_id = match.group(1)
                        log.info(f"Extracted video ID from viewkey: {video_id}")
                elif 'webpage_url' in entry:
                    # Use webpage_url as fallback
                    video_id = entry['webpage_url']
                else:
                    # Last resort: use the URL itself
                    video_id = video_url

                if not video_id:
                    log.error(f"Video entry missing 'id' field and could not extract from URL. Entry keys: {list(entry.keys())}")
                    return {'status': 'error', 'msg': "Video entry missing required 'id' field and URL extraction failed"}

            key = entry.get('webpage_url') or entry['url']
            if not self.queue.exists(key):
                dl = DownloadInfo(video_id, entry.get('title') or video_id, key, quality, format, folder, custom_name_prefix, error, entry, playlist_item_limit)
                await self.__add_download(dl, auto_start)
            return {'status': 'ok'}
        return {'status': 'error', 'msg': f'Unsupported resource "{etype}"'}

    async def add(self, url, quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start=True, already=None):
        log.info(f'adding {url}: {quality=} {format=} {already=} {folder=} {custom_name_prefix=} {playlist_strict_mode=} {playlist_item_limit=} {auto_start=}')
        already = set() if already is None else already
        if url in already:
            log.info('recursion detected, skipping')
            return {'status': 'ok'}
        else:
            already.add(url)
        try:
            entry = await asyncio.get_running_loop().run_in_executor(None, self.__extract_info, url, playlist_strict_mode)
        except yt_dlp.utils.YoutubeDLError as exc:
            return {'status': 'error', 'msg': str(exc)}
        return await self.__add_entry(entry, quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start, already)

    async def start_pending(self, ids):
        for id in ids:
            if not self.pending.exists(id):
                log.warn(f'requested start for non-existent download {id}')
                continue
            dl = self.pending.get(id)
            self.queue.put(dl)
            self.pending.delete(id)
            asyncio.create_task(self.__start_download(dl))
        return {'status': 'ok'}

    async def cancel(self, ids):
        for id in ids:
            if self.pending.exists(id):
                self.pending.delete(id)
                await self.notifier.canceled(id)
                continue
            if not self.queue.exists(id):
                log.warn(f'requested cancel for non-existent download {id}')
                continue
            if self.queue.get(id).started():
                self.queue.get(id).cancel()
            else:
                self.queue.delete(id)
                await self.notifier.canceled(id)
        return {'status': 'ok'}

    async def clear(self, ids):
        for id in ids:
            if not self.done.exists(id):
                log.warn(f'requested delete for non-existent download {id}')
                continue
            if self.config.DELETE_FILE_ON_TRASHCAN:
                dl = self.done.get(id)
                try:
                    dldirectory, _ = self.__calc_download_path(dl.info.quality, dl.info.format, dl.info.folder)
                    os.remove(os.path.join(dldirectory, dl.info.filename))
                except Exception as e:
                    log.warn(f'deleting file for download {id} failed with error message {e!r}')
            self.done.delete(id)
            await self.notifier.cleared(id)
        return {'status': 'ok'}

    def get(self):
        # Ensure website field is populated for all downloads
        for k, v in self.queue.items():
            if not hasattr(v.info, 'website') or v.info.website is None:
                parsed_url = urlparse(v.info.url)
                v.info.website = parsed_url.netloc

        for k, v in self.pending.items():
            if not hasattr(v.info, 'website') or v.info.website is None:
                parsed_url = urlparse(v.info.url)
                v.info.website = parsed_url.netloc

        # Update file existence status for done downloads
        for k, v in self.done.items():
            if not hasattr(v.info, 'website') or v.info.website is None:
                parsed_url = urlparse(v.info.url)
                v.info.website = parsed_url.netloc

            # Use getattr with default to safely check for filename attribute
            filename = getattr(v.info, 'filename', None)
            if filename:
                dldirectory, _ = self.__calc_download_path(v.info.quality, v.info.format, v.info.folder)
                if dldirectory:
                    filepath = os.path.join(dldirectory, filename)
                    v.info.file_exists = os.path.exists(filepath)
                else:
                    v.info.file_exists = False
            else:
                v.info.file_exists = False

        # Create list from items in precheck queue
        # These items have 'preparing' status to indicate they're being analyzed
        precheck_list = [(dl.url, dl) for dl in self.precheck_in_progress.values()]

        return (precheck_list +
                list((k, v.info) for k, v in self.queue.items()) +
                list((k, v.info) for k, v in self.pending.items()),
                list((k, v.info) for k, v in self.done.items()))

    def _add_event(self, event_type, message, url=None):
        """Add an event to the events list (keep only last 5)."""
        event = {
            'type': event_type,
            'message': message,
            'timestamp': int(time.time()),
            'url': url
        }
        self.events.append(event)
        # Keep only last 5 events
        if len(self.events) > self.max_events:
            self.events = self.events[-self.max_events:]
        # Notify frontend via WebSocket
        asyncio.create_task(self.notifier.event(event))

    def get_events(self):
        """Get all events (last 5)."""
        return self.events

    def clear_events(self):
        """Clear all events."""
        self.events = []