913 lines
44 KiB
Python
913 lines
44 KiB
Python
import os
|
|
import yt_dlp
|
|
from collections import OrderedDict
|
|
import shelve
|
|
import time
|
|
import asyncio
|
|
import multiprocessing
|
|
import logging
|
|
import re
|
|
import random
|
|
import string
|
|
from urllib.parse import urlparse
|
|
|
|
import yt_dlp.networking.impersonate
|
|
from dl_formats import get_format, get_opts, AUDIO_FORMATS
|
|
from datetime import datetime
|
|
|
|
log = logging.getLogger('ytdl')
|
|
|
|
class DownloadQueueNotifier:
|
|
async def added(self, dl):
|
|
raise NotImplementedError
|
|
|
|
async def updated(self, dl):
|
|
raise NotImplementedError
|
|
|
|
async def completed(self, dl):
|
|
raise NotImplementedError
|
|
|
|
async def canceled(self, id):
|
|
raise NotImplementedError
|
|
|
|
async def cleared(self, id):
|
|
raise NotImplementedError
|
|
|
|
async def event(self, event):
|
|
raise NotImplementedError
|
|
|
|
class DownloadInfo:
|
|
def __init__(self, id, title, url, quality, format, folder, custom_name_prefix, error, entry, playlist_item_limit):
|
|
self.id = id if len(custom_name_prefix) == 0 else f'{custom_name_prefix}.{id}'
|
|
self.title = title if len(custom_name_prefix) == 0 else f'{custom_name_prefix}.{title}'
|
|
self.url = url
|
|
self.quality = quality
|
|
self.format = format
|
|
self.folder = folder
|
|
self.custom_name_prefix = custom_name_prefix
|
|
self.msg = self.percent = self.speed = self.eta = None
|
|
self.status = "pending"
|
|
self.size = None
|
|
self.timestamp = time.time_ns()
|
|
self.error = error
|
|
self.entry = entry
|
|
self.playlist_item_limit = playlist_item_limit
|
|
# Extract website domain from URL
|
|
parsed_url = urlparse(url)
|
|
self.website = parsed_url.netloc
|
|
self.file_exists = None
|
|
|
|
class Download:
|
|
manager = None
|
|
|
|
def __init__(self, download_dir, temp_dir, output_template, output_template_chapter, quality, format, ytdl_opts, info):
|
|
self.download_dir = download_dir
|
|
self.temp_dir = temp_dir
|
|
self.output_template = output_template
|
|
self.output_template_chapter = output_template_chapter
|
|
self.format = get_format(format, quality)
|
|
self.ytdl_opts = get_opts(format, quality, ytdl_opts)
|
|
if "impersonate" in self.ytdl_opts:
|
|
self.ytdl_opts["impersonate"] = yt_dlp.networking.impersonate.ImpersonateTarget.from_str(self.ytdl_opts["impersonate"])
|
|
self.info = info
|
|
self.canceled = False
|
|
self.tmpfilename = None
|
|
self.status_queue = None
|
|
self.proc = None
|
|
self.loop = None
|
|
self.notifier = None
|
|
self.had_download = False # Track if actual download occurred
|
|
|
|
def _download(self):
|
|
log.info(f"Starting download for: {self.info.title} ({self.info.url})")
|
|
log.info(f"[TRACE] Download config: download_dir={self.download_dir}, temp_dir={self.temp_dir}")
|
|
log.info(f"[TRACE] Output template: {self.output_template}")
|
|
try:
|
|
def put_status(st):
|
|
# Log every status update to trace the flow
|
|
status_type = st.get('status', 'unknown')
|
|
if status_type == 'downloading':
|
|
# Mark that we're actually downloading (not skipping)
|
|
self.had_download = True
|
|
if 'tmpfilename' in st:
|
|
log.debug(f"[TRACE] Downloading - tmpfile: {st.get('tmpfilename')}")
|
|
elif status_type == 'finished':
|
|
log.info(f"[TRACE] put_status FINISHED - filename: {st.get('filename')}, tmpfilename: {st.get('tmpfilename')}")
|
|
log.info(f"[TRACE] had_download flag: {self.had_download}")
|
|
if st.get('filename'):
|
|
exists = os.path.exists(st['filename'])
|
|
log.info(f"[TRACE] File exists at reported location? {exists}")
|
|
if exists:
|
|
log.info(f"[TRACE] File size: {os.path.getsize(st['filename'])} bytes")
|
|
elif status_type == 'error':
|
|
log.error(f"[TRACE] put_status ERROR - msg: {st.get('msg')}")
|
|
|
|
self.status_queue.put({k: v for k, v in st.items() if k in (
|
|
'tmpfilename',
|
|
'filename',
|
|
'status',
|
|
'msg',
|
|
'total_bytes',
|
|
'total_bytes_estimate',
|
|
'downloaded_bytes',
|
|
'speed',
|
|
'eta',
|
|
)})
|
|
|
|
def put_status_postprocessor(d):
|
|
log.info(f"[TRACE] ===== POSTPROCESSOR CALLED =====")
|
|
log.info(f"[TRACE] Postprocessor: {d.get('postprocessor')}, Status: {d.get('status')}")
|
|
|
|
if d['postprocessor'] == 'MoveFiles' and d['status'] == 'finished':
|
|
log.info(f"[TRACE] MoveFiles postprocessor triggered")
|
|
log.info(f"[TRACE] had_download flag in postprocessor: {self.had_download}")
|
|
log.info(f"[TRACE] info_dict keys: {list(d['info_dict'].keys())}")
|
|
log.info(f"[TRACE] info_dict filepath: {d['info_dict'].get('filepath')}")
|
|
log.info(f"[TRACE] info_dict __finaldir: {d['info_dict'].get('__finaldir')}")
|
|
|
|
if '__finaldir' in d['info_dict']:
|
|
filename = os.path.join(d['info_dict']['__finaldir'], os.path.basename(d['info_dict']['filepath']))
|
|
else:
|
|
filename = d['info_dict']['filepath']
|
|
|
|
log.info(f"[TRACE] Resolved filename: {filename}")
|
|
log.info(f"[TRACE] File exists? {os.path.exists(filename)}")
|
|
|
|
# List files in directory
|
|
dir_name = os.path.dirname(filename)
|
|
if os.path.isdir(dir_name):
|
|
all_files = os.listdir(dir_name)
|
|
log.info(f"[TRACE] Files in {dir_name}: {all_files}")
|
|
|
|
# Check if file exists at expected location
|
|
if os.path.exists(filename):
|
|
log.info(f"[TRACE] File FOUND at expected location")
|
|
|
|
# If yt-dlp didn't actually download (skipped), just report the existing file
|
|
if not self.had_download:
|
|
log.info(f"[TRACE] No actual download occurred - yt-dlp reused existing file")
|
|
log.info(f"[TRACE] Sending status with existing filename: {filename}")
|
|
self.status_queue.put({'status': 'finished', 'filename': filename})
|
|
else:
|
|
# Actual download happened - check for conflicts
|
|
log.info(f"[TRACE] Actual download occurred - checking for conflicts")
|
|
base_name = os.path.basename(filename)
|
|
name, ext = os.path.splitext(base_name)
|
|
|
|
# Look for other files with same base name (excluding current file)
|
|
other_files = []
|
|
if os.path.isdir(dir_name):
|
|
for existing_file in os.listdir(dir_name):
|
|
if existing_file == base_name:
|
|
log.debug(f"[TRACE] Skipping current file: {existing_file}")
|
|
continue # Skip the current file
|
|
existing_name, existing_ext = os.path.splitext(existing_file)
|
|
# Check for exact name match
|
|
if existing_ext == ext and existing_name == name:
|
|
log.info(f"[TRACE] Found matching file: {existing_file}")
|
|
other_files.append(existing_file)
|
|
|
|
log.info(f"[TRACE] Found {len(other_files)} other files with same base name: {other_files}")
|
|
|
|
# If other files exist with same name, we have a duplicate - rename the NEW file
|
|
if len(other_files) > 0:
|
|
log.info(f"[TRACE] CONFLICT DETECTED! Other files: {other_files}")
|
|
unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
|
|
new_filename = f"{name}_{unique_id}{ext}"
|
|
new_filepath = os.path.join(dir_name, new_filename)
|
|
|
|
log.info(f"[TRACE] Attempting rename: {filename} -> {new_filepath}")
|
|
try:
|
|
os.rename(filename, new_filepath)
|
|
log.warning(f"Filename conflict detected. Renamed: {base_name} → {new_filename}")
|
|
log.info(f"[TRACE] Rename successful")
|
|
filename = new_filepath
|
|
except Exception as e:
|
|
log.error(f"[TRACE] Rename FAILED: {e}")
|
|
log.error(f"Failed to rename file due to conflict: {e}")
|
|
else:
|
|
log.info(f"[TRACE] No conflict - this is the only file with this name")
|
|
|
|
log.info(f"[TRACE] Sending status with filename: {filename}")
|
|
self.status_queue.put({'status': 'finished', 'filename': filename})
|
|
else:
|
|
log.info(f"[TRACE] File NOT FOUND at expected location")
|
|
base_name = os.path.basename(filename)
|
|
self.status_queue.put({'status': 'error', 'msg': f'File not found: {base_name}'})
|
|
else:
|
|
log.debug(f"[TRACE] Other postprocessor: {d.get('postprocessor')}")
|
|
|
|
ret = yt_dlp.YoutubeDL(params={
|
|
'quiet': True,
|
|
'no_color': True,
|
|
'paths': {"home": self.download_dir, "temp": self.temp_dir},
|
|
'outtmpl': { "default": self.output_template, "chapter": self.output_template_chapter },
|
|
'format': self.format,
|
|
'socket_timeout': 30,
|
|
'ignore_no_formats_error': True,
|
|
'progress_hooks': [put_status],
|
|
'postprocessor_hooks': [put_status_postprocessor],
|
|
**self.ytdl_opts,
|
|
}).download([self.info.url])
|
|
self.status_queue.put({'status': 'finished' if ret == 0 else 'error'})
|
|
log.info(f"Finished download for: {self.info.title}")
|
|
except yt_dlp.utils.YoutubeDLError as exc:
|
|
log.error(f"Download error for {self.info.title}: {str(exc)}")
|
|
self.status_queue.put({'status': 'error', 'msg': str(exc)})
|
|
|
|
async def start(self, notifier):
|
|
log.info(f"Preparing download for: {self.info.title}")
|
|
if Download.manager is None:
|
|
Download.manager = multiprocessing.Manager()
|
|
self.status_queue = Download.manager.Queue()
|
|
self.proc = multiprocessing.Process(target=self._download)
|
|
self.proc.start()
|
|
self.loop = asyncio.get_running_loop()
|
|
self.notifier = notifier
|
|
self.info.status = 'preparing'
|
|
await self.notifier.updated(self.info)
|
|
asyncio.create_task(self.update_status())
|
|
return await self.loop.run_in_executor(None, self.proc.join)
|
|
|
|
def _resolve_filename_conflict(self, filepath):
|
|
"""
|
|
Resolve filename conflicts by appending a short unique ID.
|
|
Returns the final non-conflicting filepath.
|
|
"""
|
|
dir_name = os.path.dirname(filepath)
|
|
base_name = os.path.basename(filepath)
|
|
name, ext = os.path.splitext(base_name)
|
|
|
|
# Generate a short unique ID (5 alphanumeric characters)
|
|
unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
|
|
new_filename = f"{name}_{unique_id}{ext}"
|
|
new_filepath = os.path.join(dir_name, new_filename)
|
|
|
|
return new_filepath
|
|
|
|
def cancel(self):
|
|
log.info(f"Cancelling download: {self.info.title}")
|
|
if self.running():
|
|
try:
|
|
self.proc.kill()
|
|
except Exception as e:
|
|
log.error(f"Error killing process for {self.info.title}: {e}")
|
|
self.canceled = True
|
|
if self.status_queue is not None:
|
|
self.status_queue.put(None)
|
|
|
|
def close(self):
|
|
log.info(f"Closing download process for: {self.info.title}")
|
|
if self.started():
|
|
self.proc.close()
|
|
if self.status_queue is not None:
|
|
self.status_queue.put(None)
|
|
|
|
def running(self):
|
|
try:
|
|
return self.proc is not None and self.proc.is_alive()
|
|
except ValueError:
|
|
return False
|
|
|
|
def started(self):
|
|
return self.proc is not None
|
|
|
|
async def update_status(self):
|
|
while True:
|
|
status = await self.loop.run_in_executor(None, self.status_queue.get)
|
|
if status is None:
|
|
log.info(f"Status update finished for: {self.info.title}")
|
|
return
|
|
if self.canceled:
|
|
log.info(f"Download {self.info.title} is canceled; stopping status updates.")
|
|
return
|
|
self.tmpfilename = status.get('tmpfilename')
|
|
if 'filename' in status:
|
|
fileName = status.get('filename')
|
|
self.info.filename = os.path.relpath(fileName, self.download_dir)
|
|
self.info.size = os.path.getsize(fileName) if os.path.exists(fileName) else None
|
|
if self.info.format == 'thumbnail':
|
|
self.info.filename = re.sub(r'\.webm$', '.jpg', self.info.filename)
|
|
self.info.status = status['status']
|
|
self.info.msg = status.get('msg')
|
|
if 'downloaded_bytes' in status:
|
|
total = status.get('total_bytes') or status.get('total_bytes_estimate')
|
|
if total:
|
|
self.info.percent = status['downloaded_bytes'] / total * 100
|
|
self.info.speed = status.get('speed')
|
|
self.info.eta = status.get('eta')
|
|
log.info(f"Updating status for {self.info.title}: {status}")
|
|
await self.notifier.updated(self.info)
|
|
|
|
class PersistentQueue:
|
|
def __init__(self, path):
|
|
pdir = os.path.dirname(path)
|
|
if not os.path.isdir(pdir):
|
|
os.mkdir(pdir)
|
|
with shelve.open(path, 'c'):
|
|
pass
|
|
self.path = path
|
|
self.dict = OrderedDict()
|
|
|
|
def load(self):
|
|
for k, v in self.saved_items():
|
|
# Ensure website field is populated for older downloads
|
|
if not hasattr(v, 'website') or v.website is None:
|
|
parsed_url = urlparse(v.url)
|
|
v.website = parsed_url.netloc
|
|
# Ensure file_exists field exists
|
|
if not hasattr(v, 'file_exists'):
|
|
v.file_exists = None
|
|
self.dict[k] = Download(None, None, None, None, None, None, {}, v)
|
|
|
|
def exists(self, key):
|
|
return key in self.dict
|
|
|
|
def get(self, key):
|
|
return self.dict[key]
|
|
|
|
def items(self):
|
|
return self.dict.items()
|
|
|
|
def saved_items(self):
|
|
with shelve.open(self.path, 'r') as shelf:
|
|
return sorted(shelf.items(), key=lambda item: item[1].timestamp)
|
|
|
|
def put(self, value):
|
|
key = value.info.url
|
|
self.dict[key] = value
|
|
with shelve.open(self.path, 'w') as shelf:
|
|
shelf[key] = value.info
|
|
|
|
def delete(self, key):
|
|
if key in self.dict:
|
|
del self.dict[key]
|
|
with shelve.open(self.path, 'w') as shelf:
|
|
shelf.pop(key, None)
|
|
|
|
def next(self):
|
|
k, v = next(iter(self.dict.items()))
|
|
return k, v
|
|
|
|
def empty(self):
|
|
return not bool(self.dict)
|
|
|
|
class DownloadQueue:
|
|
def __init__(self, config, notifier):
|
|
self.config = config
|
|
self.notifier = notifier
|
|
self.queue = PersistentQueue(self.config.STATE_DIR + '/queue')
|
|
self.done = PersistentQueue(self.config.STATE_DIR + '/completed')
|
|
self.pending = PersistentQueue(self.config.STATE_DIR + '/pending')
|
|
self.active_downloads = set()
|
|
self.semaphore = None
|
|
# For sequential mode, use an asyncio lock to ensure one-at-a-time execution.
|
|
if self.config.DOWNLOAD_MODE == 'sequential':
|
|
self.seq_lock = asyncio.Lock()
|
|
elif self.config.DOWNLOAD_MODE == 'limited':
|
|
self.semaphore = asyncio.Semaphore(int(self.config.MAX_CONCURRENT_DOWNLOADS))
|
|
|
|
# PreCheck queue for sequential conflict detection (no locks needed)
|
|
self.precheck_queue = asyncio.Queue()
|
|
self.reserved_filenames = set() # Track filenames being processed
|
|
self.precheck_in_progress = {} # Track URL -> DownloadInfo for items in precheck queue
|
|
|
|
# Event notifications (keep last 5 in memory)
|
|
self.events = [] # List of {type, message, timestamp, url}
|
|
self.max_events = 5
|
|
|
|
self.done.load()
|
|
|
|
async def __import_queue(self):
|
|
for k, v in self.queue.saved_items():
|
|
await self.__add_download(v, True)
|
|
|
|
async def __import_pending(self):
|
|
for k, v in self.pending.saved_items():
|
|
await self.__add_download(v, False)
|
|
|
|
async def initialize(self):
|
|
log.info("Initializing DownloadQueue")
|
|
# Start the precheck worker for sequential conflict detection
|
|
asyncio.create_task(self.__precheck_worker())
|
|
asyncio.create_task(self.__import_queue())
|
|
asyncio.create_task(self.__import_pending())
|
|
|
|
async def __precheck_worker(self):
|
|
"""Background worker that processes precheck queue sequentially.
|
|
Sequential processing naturally prevents race conditions without locks."""
|
|
log.info("[PreCheck] Worker started")
|
|
while True:
|
|
try:
|
|
# Get next item from queue (blocks if empty)
|
|
item = await self.precheck_queue.get()
|
|
log.debug(f"[PreCheck] Processing item: {item['dl'].url}")
|
|
|
|
# Process the precheck and start download
|
|
await self.__process_precheck(item)
|
|
|
|
# Mark task as done
|
|
self.precheck_queue.task_done()
|
|
except Exception as e:
|
|
log.error(f"[PreCheck] Worker error: {e}", exc_info=True)
|
|
|
|
async def __process_precheck(self, item):
|
|
"""Process a single download with conflict detection.
|
|
Called sequentially by worker - no race conditions possible."""
|
|
dl = item['dl']
|
|
auto_start = item['auto_start']
|
|
dldirectory = item['dldirectory']
|
|
output = item['output']
|
|
output_chapter = item['output_chapter']
|
|
ytdl_options = item['ytdl_options']
|
|
entry = item['entry']
|
|
|
|
log.info(f"[PreCheck] Checking for filename conflicts before download")
|
|
log.debug(f"[PreCheck] Original output template: {output}")
|
|
|
|
# Try to predict the filename that yt-dlp will generate
|
|
if entry and 'title' in entry:
|
|
# Check if we have the real title or just a placeholder
|
|
title = entry.get('title', '')
|
|
video_id = entry.get('id', '')
|
|
|
|
# If title looks like a placeholder (contains the ID), we need full extraction
|
|
needs_full_extraction = (
|
|
not title or # No title
|
|
title == f"twitter video #{video_id}" or # Placeholder pattern
|
|
video_id in title # ID is in title (likely placeholder)
|
|
)
|
|
|
|
if needs_full_extraction:
|
|
log.debug(f"[PreCheck] Title appears to be placeholder: '{title}', doing full info extraction")
|
|
try:
|
|
# Do a full (non-flat) extraction to get real title
|
|
full_entry = await asyncio.get_running_loop().run_in_executor(
|
|
None,
|
|
lambda: yt_dlp.YoutubeDL(params={
|
|
'quiet': True,
|
|
'no_color': True,
|
|
'extract_flat': False, # Full extraction
|
|
'skip_download': True, # Don't download, just get info
|
|
'paths': {"home": dldirectory, "temp": self.config.TEMP_DIR},
|
|
**ytdl_options,
|
|
}).extract_info(dl.url, download=False)
|
|
)
|
|
if full_entry and 'title' in full_entry:
|
|
title = full_entry['title']
|
|
log.debug(f"[PreCheck] Got real title from full extraction: '{title}'")
|
|
except Exception as e:
|
|
log.warning(f"[PreCheck] Failed to get full info: {e}, using placeholder title")
|
|
|
|
predicted_filename = output
|
|
# Replace title
|
|
if '%(title)s' in predicted_filename:
|
|
predicted_filename = predicted_filename.replace('%(title)s', title)
|
|
|
|
# Replace id
|
|
if '%(id)s' in predicted_filename and video_id:
|
|
predicted_filename = predicted_filename.replace('%(id)s', video_id)
|
|
|
|
# Handle ext specially - default to format's extension if not in entry
|
|
if '%(ext)s' in predicted_filename:
|
|
ext = entry.get('ext', dl.format if dl.format in ['mp4', 'mkv', 'webm', 'mp3', 'm4a'] else 'mp4')
|
|
predicted_filename = predicted_filename.replace('%(ext)s', ext)
|
|
|
|
predicted_filepath = os.path.join(dldirectory, predicted_filename)
|
|
log.info(f"[PreCheck] Predicted filepath: {predicted_filepath}")
|
|
|
|
# Check if file already exists OR is reserved by another download in queue
|
|
# Sequential processing means we check one at a time - no race condition
|
|
if os.path.exists(predicted_filepath) or predicted_filepath in self.reserved_filenames:
|
|
if predicted_filepath in self.reserved_filenames:
|
|
log.warning(f"[PreCheck] Filename is reserved by pending download! Will append unique ID")
|
|
else:
|
|
log.warning(f"[PreCheck] File already exists! Will append unique ID to avoid conflict")
|
|
|
|
# Generate unique ID
|
|
unique_id = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
|
|
|
|
# Modify output template to include unique ID before extension
|
|
# Change "%(title)s.%(ext)s" to "%(title)s_XXXXX.%(ext)s"
|
|
if '.%(ext)s' in output:
|
|
output = output.replace('.%(ext)s', f'_{unique_id}.%(ext)s')
|
|
else:
|
|
# Fallback: append to end
|
|
output = f"{output}_{unique_id}"
|
|
|
|
# Re-predict the new filename
|
|
predicted_filename = output
|
|
if '%(title)s' in predicted_filename:
|
|
predicted_filename = predicted_filename.replace('%(title)s', title)
|
|
if '%(id)s' in predicted_filename and video_id:
|
|
predicted_filename = predicted_filename.replace('%(id)s', video_id)
|
|
if '%(ext)s' in predicted_filename:
|
|
ext = entry.get('ext', dl.format if dl.format in ['mp4', 'mkv', 'webm', 'mp3', 'm4a'] else 'mp4')
|
|
predicted_filename = predicted_filename.replace('%(ext)s', ext)
|
|
predicted_filepath = os.path.join(dldirectory, predicted_filename)
|
|
|
|
log.info(f"[PreCheck] Modified output template: {output}")
|
|
log.info(f"[PreCheck] New predicted filepath: {predicted_filepath}")
|
|
else:
|
|
log.info(f"[PreCheck] No conflict detected, using original template")
|
|
|
|
# Reserve this filename to prevent concurrent downloads from using it
|
|
self.reserved_filenames.add(predicted_filepath)
|
|
log.debug(f"[PreCheck] Reserved filename: {predicted_filepath}")
|
|
else:
|
|
predicted_filepath = None
|
|
log.debug(f"[PreCheck] No entry data available, skipping pre-check")
|
|
|
|
log.debug(f"final resolved output template: {output}")
|
|
download = Download(dldirectory, self.config.TEMP_DIR, output, output_chapter, dl.quality, dl.format, ytdl_options, dl)
|
|
|
|
# Store the reserved filepath for cleanup
|
|
download.reserved_filepath = predicted_filepath
|
|
|
|
# Remove from in-progress set before adding to queue
|
|
# This allows checking queue.exists() to work properly
|
|
if dl.url in self.precheck_in_progress:
|
|
del self.precheck_in_progress[dl.url]
|
|
log.debug(f"[PreCheck] Removed from in-progress tracking: {dl.url}")
|
|
|
|
if auto_start is True:
|
|
self.queue.put(download)
|
|
asyncio.create_task(self.__start_download(download))
|
|
else:
|
|
self.pending.put(download)
|
|
|
|
async def __start_download(self, download):
|
|
if download.canceled:
|
|
log.info(f"Download {download.info.title} was canceled, skipping start.")
|
|
return
|
|
if self.config.DOWNLOAD_MODE == 'sequential':
|
|
async with self.seq_lock:
|
|
log.info("Starting sequential download.")
|
|
await download.start(self.notifier)
|
|
self._post_download_cleanup(download)
|
|
elif self.config.DOWNLOAD_MODE == 'limited' and self.semaphore is not None:
|
|
await self.__limited_concurrent_download(download)
|
|
else:
|
|
await self.__concurrent_download(download)
|
|
|
|
async def __concurrent_download(self, download):
|
|
log.info("Starting concurrent download without limits.")
|
|
asyncio.create_task(self._run_download(download))
|
|
|
|
async def __limited_concurrent_download(self, download):
|
|
log.info("Starting limited concurrent download.")
|
|
async with self.semaphore:
|
|
await self._run_download(download)
|
|
|
|
async def _run_download(self, download):
|
|
if download.canceled:
|
|
log.info(f"Download {download.info.title} is canceled; skipping start.")
|
|
return
|
|
await download.start(self.notifier)
|
|
self._post_download_cleanup(download)
|
|
|
|
def _post_download_cleanup(self, download):
|
|
# Release filename reservation if it exists
|
|
if hasattr(download, 'reserved_filepath') and download.reserved_filepath:
|
|
if download.reserved_filepath in self.reserved_filenames:
|
|
self.reserved_filenames.discard(download.reserved_filepath)
|
|
log.debug(f"[PreCheck] Released reservation for: {download.reserved_filepath}")
|
|
|
|
if download.info.status != 'finished':
|
|
if download.tmpfilename and os.path.isfile(download.tmpfilename):
|
|
try:
|
|
os.remove(download.tmpfilename)
|
|
except:
|
|
pass
|
|
download.info.status = 'error'
|
|
download.close()
|
|
if self.queue.exists(download.info.url):
|
|
self.queue.delete(download.info.url)
|
|
if download.canceled:
|
|
asyncio.create_task(self.notifier.canceled(download.info.url))
|
|
else:
|
|
self.done.put(download)
|
|
asyncio.create_task(self.notifier.completed(download.info))
|
|
|
|
def __extract_info(self, url, playlist_strict_mode):
|
|
return yt_dlp.YoutubeDL(params={
|
|
'quiet': True,
|
|
'no_color': True,
|
|
'extract_flat': True,
|
|
'ignore_no_formats_error': True,
|
|
'noplaylist': playlist_strict_mode,
|
|
'paths': {"home": self.config.DOWNLOAD_DIR, "temp": self.config.TEMP_DIR},
|
|
**self.config.YTDL_OPTIONS,
|
|
**({'impersonate': yt_dlp.networking.impersonate.ImpersonateTarget.from_str(self.config.YTDL_OPTIONS['impersonate'])} if 'impersonate' in self.config.YTDL_OPTIONS else {}),
|
|
}).extract_info(url, download=False)
|
|
|
|
def __calc_download_path(self, quality, format, folder):
|
|
base_directory = self.config.DOWNLOAD_DIR if (quality != 'audio' and format not in AUDIO_FORMATS) else self.config.AUDIO_DOWNLOAD_DIR
|
|
if folder:
|
|
if not self.config.CUSTOM_DIRS:
|
|
return None, {'status': 'error', 'msg': f'A folder for the download was specified but CUSTOM_DIRS is not true in the configuration.'}
|
|
dldirectory = os.path.realpath(os.path.join(base_directory, folder))
|
|
real_base_directory = os.path.realpath(base_directory)
|
|
if not dldirectory.startswith(real_base_directory):
|
|
return None, {'status': 'error', 'msg': f'Folder "{folder}" must resolve inside the base download directory "{real_base_directory}"'}
|
|
if not os.path.isdir(dldirectory):
|
|
if not self.config.CREATE_CUSTOM_DIRS:
|
|
return None, {'status': 'error', 'msg': f'Folder "{folder}" for download does not exist inside base directory "{real_base_directory}", and CREATE_CUSTOM_DIRS is not true in the configuration.'}
|
|
os.makedirs(dldirectory, exist_ok=True)
|
|
else:
|
|
dldirectory = base_directory
|
|
return dldirectory, None
|
|
|
|
async def __add_download(self, dl, auto_start):
|
|
"""Fast path: validate and queue for precheck processing.
|
|
Returns immediately without blocking on slow operations."""
|
|
# Check if this exact URL is already being processed, in queue, or already downloaded
|
|
# This prevents duplicate downloads when same URL is submitted multiple times
|
|
if (dl.url in self.precheck_in_progress or
|
|
self.queue.exists(dl.url) or
|
|
self.pending.exists(dl.url) or
|
|
self.done.exists(dl.url)):
|
|
log.info(f"[PreCheck] URL already queued/processing/downloaded, skipping: {dl.url}")
|
|
# Add event notification
|
|
self._add_event('duplicate_skipped', 'URL already in queue or downloaded', dl.url)
|
|
return {'status': 'ok', 'msg': 'Download already exists'}
|
|
|
|
dldirectory, error_message = self.__calc_download_path(dl.quality, dl.format, dl.folder)
|
|
if error_message is not None:
|
|
return error_message
|
|
|
|
output = self.config.OUTPUT_TEMPLATE if len(dl.custom_name_prefix) == 0 else f'{dl.custom_name_prefix}.{self.config.OUTPUT_TEMPLATE}'
|
|
output_chapter = self.config.OUTPUT_TEMPLATE_CHAPTER
|
|
entry = getattr(dl, 'entry', None)
|
|
|
|
if entry is not None and 'playlist' in entry and entry['playlist'] is not None:
|
|
if len(self.config.OUTPUT_TEMPLATE_PLAYLIST):
|
|
output = self.config.OUTPUT_TEMPLATE_PLAYLIST
|
|
for property, value in entry.items():
|
|
if property.startswith("playlist"):
|
|
output = output.replace(f"%({property})s", str(value))
|
|
|
|
ytdl_options = dict(self.config.YTDL_OPTIONS)
|
|
playlist_item_limit = getattr(dl, 'playlist_item_limit', 0)
|
|
if playlist_item_limit > 0:
|
|
log.info(f'playlist limit is set. Processing only first {playlist_item_limit} entries')
|
|
ytdl_options['playlistend'] = playlist_item_limit
|
|
|
|
# Check if cookie file exists for this domain
|
|
parsed_url = urlparse(dl.url)
|
|
domain = parsed_url.netloc
|
|
log.info(f"[Cookie] Checking for cookie file for domain: {domain}")
|
|
|
|
cookies_dir = os.path.join(self.config.STATE_DIR, 'cookies')
|
|
log.debug(f"[Cookie] Cookies directory: {cookies_dir}")
|
|
|
|
# Try domain-specific cookie file
|
|
safe_domain = domain.replace(':', '_').replace('/', '_')
|
|
cookie_file = os.path.join(cookies_dir, f'{safe_domain}.txt')
|
|
|
|
log.debug(f"[Cookie] Looking for cookie file at: {cookie_file}")
|
|
|
|
if os.path.exists(cookie_file):
|
|
log.info(f"[Cookie] Found cookie file: {cookie_file}")
|
|
# Verify file is readable and has content
|
|
try:
|
|
with open(cookie_file, 'r') as f:
|
|
lines = f.readlines()
|
|
cookie_lines = [l for l in lines if l.strip() and not l.startswith('#')]
|
|
log.info(f"[Cookie] Cookie file contains {len(cookie_lines)} cookie entries")
|
|
if len(cookie_lines) == 0:
|
|
log.warning(f"[Cookie] Cookie file exists but contains no cookies!")
|
|
else:
|
|
log.debug(f"[Cookie] First cookie entry: {cookie_lines[0][:50]}...")
|
|
except Exception as e:
|
|
log.error(f"[Cookie] Error reading cookie file: {e}", exc_info=True)
|
|
|
|
ytdl_options['cookiefile'] = cookie_file
|
|
log.info(f"[Cookie] Configured yt-dlp to use cookiefile: {cookie_file}")
|
|
else:
|
|
log.info(f"[Cookie] No cookie file found for domain {domain}")
|
|
log.debug(f"[Cookie] Checked path: {cookie_file}")
|
|
# List available cookie files for debugging
|
|
if os.path.exists(cookies_dir):
|
|
available_cookies = os.listdir(cookies_dir)
|
|
if available_cookies:
|
|
log.debug(f"[Cookie] Available cookie files: {available_cookies}")
|
|
else:
|
|
log.debug(f"[Cookie] Cookies directory is empty")
|
|
else:
|
|
log.debug(f"[Cookie] Cookies directory does not exist")
|
|
|
|
# Mark URL as being processed to prevent duplicates
|
|
# Store the DownloadInfo so we can display it in UI
|
|
self.precheck_in_progress[dl.url] = dl
|
|
|
|
# Queue for sequential precheck processing (fast, non-blocking)
|
|
await self.precheck_queue.put({
|
|
'dl': dl,
|
|
'auto_start': auto_start,
|
|
'dldirectory': dldirectory,
|
|
'output': output,
|
|
'output_chapter': output_chapter,
|
|
'ytdl_options': ytdl_options,
|
|
'entry': entry,
|
|
})
|
|
log.debug(f"[PreCheck] Queued for processing: {dl.url}")
|
|
|
|
# Notify immediately (fast response to user)
|
|
await self.notifier.added(dl)
|
|
|
|
async def __add_entry(self, entry, quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start, already):
|
|
if not entry:
|
|
return {'status': 'error', 'msg': "Invalid/empty data was given."}
|
|
|
|
error = None
|
|
if "live_status" in entry and "release_timestamp" in entry and entry.get("live_status") == "is_upcoming":
|
|
dt_ts = datetime.fromtimestamp(entry.get("release_timestamp")).strftime('%Y-%m-%d %H:%M:%S %z')
|
|
error = f"Live stream is scheduled to start at {dt_ts}"
|
|
else:
|
|
if "msg" in entry:
|
|
error = entry["msg"]
|
|
|
|
etype = entry.get('_type') or 'video'
|
|
|
|
if etype.startswith('url'):
|
|
log.debug('Processing as an url')
|
|
return await self.add(entry['url'], quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start, already)
|
|
elif etype == 'playlist':
|
|
log.debug('Processing as a playlist')
|
|
entries = entry['entries']
|
|
log.info(f'playlist detected with {len(entries)} entries')
|
|
|
|
playlist_index_digits = len(str(len(entries)))
|
|
results = []
|
|
if playlist_item_limit > 0:
|
|
log.info(f'Playlist item limit is set. Processing only first {playlist_item_limit} entries')
|
|
entries = entries[:playlist_item_limit]
|
|
|
|
# Verify playlist entry has 'id' before using it
|
|
playlist_id = entry.get("id", "unknown_playlist")
|
|
if "id" not in entry:
|
|
log.warning(f"Playlist entry missing 'id' field. Using fallback 'unknown_playlist'. Entry keys: {list(entry.keys())}")
|
|
|
|
for index, etr in enumerate(entries, start=1):
|
|
etr["_type"] = "video"
|
|
etr["playlist"] = playlist_id
|
|
etr["playlist_index"] = '{{0:0{0:d}d}}'.format(playlist_index_digits).format(index)
|
|
for property in ("id", "title", "uploader", "uploader_id"):
|
|
if property in entry:
|
|
etr[f"playlist_{property}"] = entry[property]
|
|
results.append(await self.__add_entry(etr, quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start, already))
|
|
if any(res['status'] == 'error' for res in results):
|
|
return {'status': 'error', 'msg': ', '.join(res['msg'] for res in results if res['status'] == 'error' and 'msg' in res)}
|
|
return {'status': 'ok'}
|
|
elif etype == 'video' or (etype.startswith('url') and 'id' in entry and 'title' in entry):
|
|
log.debug('Processing as a video')
|
|
|
|
# Extract ID from entry, or derive from URL if missing
|
|
video_id = entry.get('id')
|
|
if not video_id:
|
|
# Try to extract ID from URL (e.g., viewkey parameter or URL path)
|
|
video_url = entry.get('url', '')
|
|
if 'viewkey=' in video_url:
|
|
# Extract viewkey parameter (common in PornHub, etc.)
|
|
match = re.search(r'viewkey=([^&]+)', video_url)
|
|
if match:
|
|
video_id = match.group(1)
|
|
log.info(f"Extracted video ID from viewkey: {video_id}")
|
|
elif 'webpage_url' in entry:
|
|
# Use webpage_url as fallback
|
|
video_id = entry['webpage_url']
|
|
else:
|
|
# Last resort: use the URL itself
|
|
video_id = video_url
|
|
|
|
if not video_id:
|
|
log.error(f"Video entry missing 'id' field and could not extract from URL. Entry keys: {list(entry.keys())}")
|
|
return {'status': 'error', 'msg': "Video entry missing required 'id' field and URL extraction failed"}
|
|
|
|
key = entry.get('webpage_url') or entry['url']
|
|
if not self.queue.exists(key):
|
|
dl = DownloadInfo(video_id, entry.get('title') or video_id, key, quality, format, folder, custom_name_prefix, error, entry, playlist_item_limit)
|
|
await self.__add_download(dl, auto_start)
|
|
return {'status': 'ok'}
|
|
return {'status': 'error', 'msg': f'Unsupported resource "{etype}"'}
|
|
|
|
async def add(self, url, quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start=True, already=None):
|
|
log.info(f'adding {url}: {quality=} {format=} {already=} {folder=} {custom_name_prefix=} {playlist_strict_mode=} {playlist_item_limit=} {auto_start=}')
|
|
already = set() if already is None else already
|
|
if url in already:
|
|
log.info('recursion detected, skipping')
|
|
return {'status': 'ok'}
|
|
else:
|
|
already.add(url)
|
|
try:
|
|
entry = await asyncio.get_running_loop().run_in_executor(None, self.__extract_info, url, playlist_strict_mode)
|
|
except yt_dlp.utils.YoutubeDLError as exc:
|
|
return {'status': 'error', 'msg': str(exc)}
|
|
return await self.__add_entry(entry, quality, format, folder, custom_name_prefix, playlist_strict_mode, playlist_item_limit, auto_start, already)
|
|
|
|
async def start_pending(self, ids):
|
|
for id in ids:
|
|
if not self.pending.exists(id):
|
|
log.warn(f'requested start for non-existent download {id}')
|
|
continue
|
|
dl = self.pending.get(id)
|
|
self.queue.put(dl)
|
|
self.pending.delete(id)
|
|
asyncio.create_task(self.__start_download(dl))
|
|
return {'status': 'ok'}
|
|
|
|
async def cancel(self, ids):
|
|
for id in ids:
|
|
if self.pending.exists(id):
|
|
self.pending.delete(id)
|
|
await self.notifier.canceled(id)
|
|
continue
|
|
if not self.queue.exists(id):
|
|
log.warn(f'requested cancel for non-existent download {id}')
|
|
continue
|
|
if self.queue.get(id).started():
|
|
self.queue.get(id).cancel()
|
|
else:
|
|
self.queue.delete(id)
|
|
await self.notifier.canceled(id)
|
|
return {'status': 'ok'}
|
|
|
|
async def clear(self, ids):
|
|
for id in ids:
|
|
if not self.done.exists(id):
|
|
log.warn(f'requested delete for non-existent download {id}')
|
|
continue
|
|
if self.config.DELETE_FILE_ON_TRASHCAN:
|
|
dl = self.done.get(id)
|
|
try:
|
|
dldirectory, _ = self.__calc_download_path(dl.info.quality, dl.info.format, dl.info.folder)
|
|
os.remove(os.path.join(dldirectory, dl.info.filename))
|
|
except Exception as e:
|
|
log.warn(f'deleting file for download {id} failed with error message {e!r}')
|
|
self.done.delete(id)
|
|
await self.notifier.cleared(id)
|
|
return {'status': 'ok'}
|
|
|
|
def get(self):
|
|
# Ensure website field is populated for all downloads
|
|
for k, v in self.queue.items():
|
|
if not hasattr(v.info, 'website') or v.info.website is None:
|
|
parsed_url = urlparse(v.info.url)
|
|
v.info.website = parsed_url.netloc
|
|
|
|
for k, v in self.pending.items():
|
|
if not hasattr(v.info, 'website') or v.info.website is None:
|
|
parsed_url = urlparse(v.info.url)
|
|
v.info.website = parsed_url.netloc
|
|
|
|
# Update file existence status for done downloads
|
|
for k, v in self.done.items():
|
|
if not hasattr(v.info, 'website') or v.info.website is None:
|
|
parsed_url = urlparse(v.info.url)
|
|
v.info.website = parsed_url.netloc
|
|
|
|
# Use getattr with default to safely check for filename attribute
|
|
filename = getattr(v.info, 'filename', None)
|
|
if filename:
|
|
dldirectory, _ = self.__calc_download_path(v.info.quality, v.info.format, v.info.folder)
|
|
if dldirectory:
|
|
filepath = os.path.join(dldirectory, filename)
|
|
v.info.file_exists = os.path.exists(filepath)
|
|
else:
|
|
v.info.file_exists = False
|
|
else:
|
|
v.info.file_exists = False
|
|
|
|
# Create list from items in precheck queue
|
|
# These items have 'preparing' status to indicate they're being analyzed
|
|
precheck_list = [(dl.url, dl) for dl in self.precheck_in_progress.values()]
|
|
|
|
return (precheck_list +
|
|
list((k, v.info) for k, v in self.queue.items()) +
|
|
list((k, v.info) for k, v in self.pending.items()),
|
|
list((k, v.info) for k, v in self.done.items()))
|
|
|
|
def _add_event(self, event_type, message, url=None):
|
|
"""Add an event to the events list (keep only last 5)."""
|
|
event = {
|
|
'type': event_type,
|
|
'message': message,
|
|
'timestamp': int(time.time()),
|
|
'url': url
|
|
}
|
|
self.events.append(event)
|
|
# Keep only last 5 events
|
|
if len(self.events) > self.max_events:
|
|
self.events = self.events[-self.max_events:]
|
|
# Notify frontend via WebSocket
|
|
asyncio.create_task(self.notifier.event(event))
|
|
|
|
def get_events(self):
|
|
"""Get all events (last 5)."""
|
|
return self.events
|
|
|
|
def clear_events(self):
|
|
"""Clear all events."""
|
|
self.events = []
|