""" YouTube video downloader module using yt-dlp """ import os import random import subprocess import yt_dlp def cleanup_video_file(video_path): """ Delete a specific video file after processing Args: video_path (str): Path to the video file to delete Returns: bool: True if file was deleted successfully, False otherwise """ try: if os.path.exists(video_path): os.remove(video_path) print(f"Cleaned up video file: {video_path}") return True else: print(f"Video file not found for cleanup: {video_path}") return False except Exception as e: print(f"Error cleaning up video file {video_path}: {str(e)}") return False def cleanup_downloads_directory(output_dir="downloads", keep_annotated=True): """ Clean up downloaded videos from the downloads directory Args: output_dir (str): Directory containing downloaded videos keep_annotated (bool): Whether to keep annotated videos (default: True) Returns: dict: Cleanup results with files removed and space freed """ try: if not os.path.exists(output_dir): return {"files_removed": 0, "space_freed_mb": 0} files_removed = 0 space_freed = 0 for filename in os.listdir(output_dir): file_path = os.path.join(output_dir, filename) # Skip if not a file if not os.path.isfile(file_path): continue # Skip annotated videos if keep_annotated is True if keep_annotated and "_annotated" in filename: continue # Skip pro reference videos (they can be reused) if "pro_reference" in filename: continue # Get file size before deletion try: file_size = os.path.getsize(file_path) space_freed += file_size # Remove the file os.remove(file_path) files_removed += 1 print(f"Cleaned up: {filename}") except Exception as e: print(f"Error removing {filename}: {str(e)}") # Convert bytes to MB space_freed_mb = space_freed / (1024 * 1024) return { "files_removed": files_removed, "space_freed_mb": round(space_freed_mb, 2) } except Exception as e: print(f"Error during cleanup: {str(e)}") return {"error": str(e)} def get_user_agents(): """Get a list of common user agents to rotate between""" return [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:120.0) Gecko/20100101 Firefox/120.0', ] def try_extract_browser_cookies(): """ Try to extract cookies from browser automatically Returns path to extracted cookies file if successful, None otherwise """ try: # Try to extract cookies from Chrome first browsers = ['chrome', 'firefox', 'safari', 'edge'] for browser in browsers: try: cookies_path = os.path.expanduser(f"~/.config/yt-dlp/cookies_{browser}.txt") # Use yt-dlp to extract cookies cmd = ['yt-dlp', '--cookies-from-browser', browser, '--print-to-file', 'cookies', cookies_path, '--no-download', 'https://www.youtube.com/'] result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) if result.returncode == 0 and os.path.exists(cookies_path): print(f"Successfully extracted cookies from {browser}") return cookies_path except (subprocess.TimeoutExpired, FileNotFoundError, Exception): continue except Exception: pass return None def find_cookies_file(): """ Look for browser cookies file that can be used for YouTube authentication Returns the path to cookies file if found, None otherwise """ possible_paths = [ os.path.expanduser("~/.config/yt-dlp/cookies.txt"), os.path.expanduser("~/cookies.txt"), "cookies.txt", os.path.join(os.getcwd(), "cookies.txt"), ] # First check for existing cookies files for path in possible_paths: if os.path.exists(path): print(f"Found existing cookies file: {path}") return path # If no existing cookies found, try to extract from browser print("No existing cookies found, trying to extract from browser...") extracted_cookies = try_extract_browser_cookies() if extracted_cookies: return extracted_cookies return None def print_cookie_help(): """ Print helpful instructions for setting up cookies to bypass YouTube bot detection """ help_text = """ šŸ”§ YouTube Bot Detection Fix - Cookie Setup Instructions: Method 1 - Automatic (Recommended): The system will try to automatically extract cookies from your browser. Method 2 - Manual Cookie Export: 1. Install a browser extension like "Get cookies.txt LOCALLY" 2. Go to youtube.com and make sure you're logged in 3. Use the extension to export cookies as 'cookies.txt' 4. Save the file in one of these locations: • ~/cookies.txt (your home directory) • ~/.config/yt-dlp/cookies.txt • In the same folder as this script Method 3 - Command Line (Advanced): Run: yt-dlp --cookies-from-browser chrome --print-to-file cookies ~/cookies.txt --no-download https://youtube.com (Replace 'chrome' with your browser: firefox, safari, edge) Method 4 - Alternative Video Sources: • Try using a different YouTube video URL • Consider using videos that don't require authentication Note: YouTube's bot detection is sometimes temporary - you can also try again later. """ print(help_text) def get_fallback_configs(): """ Get multiple configuration strategies to try in sequence """ user_agents = get_user_agents() cookies_file = find_cookies_file() configs = [] # Strategy 1: Use cookies if available if cookies_file: configs.append({ 'name': 'with_cookies', 'opts': { 'cookiefile': cookies_file, 'http_headers': { 'User-Agent': random.choice(user_agents), }, 'extractor_args': { 'youtube': { 'player_client': ['android', 'web'], } }, } }) # Strategy 2: Android client (often works better) configs.append({ 'name': 'android_client', 'opts': { 'http_headers': { 'User-Agent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip', }, 'extractor_args': { 'youtube': { 'player_client': ['android'], } }, } }) # Strategy 3: Web client with full headers configs.append({ 'name': 'web_client_full', 'opts': { 'http_headers': { 'User-Agent': random.choice(user_agents), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', 'Accept-Encoding': 'gzip,deflate', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', 'Sec-Fetch-User': '?1', }, 'extractor_args': { 'youtube': { 'player_client': ['web'], } }, } }) # Strategy 4: Basic configuration (fallback) configs.append({ 'name': 'basic', 'opts': { 'http_headers': { 'User-Agent': random.choice(user_agents), }, } }) return configs def download_youtube_video(url, output_dir="downloads"): """ Download a YouTube video from the provided URL using yt-dlp with fallback strategies Args: url (str): YouTube video URL output_dir (str): Directory to save the downloaded video Returns: str: Path to the downloaded video file Raises: ValueError: If the URL is invalid or video is unavailable """ # Create output directory if it doesn't exist os.makedirs(output_dir, exist_ok=True) # Set output template for the downloaded file output_template = os.path.join(output_dir, "%(title)s.%(ext)s") # Get fallback configurations to try fallback_configs = get_fallback_configs() last_error = None # Try each configuration strategy for config in fallback_configs: print(f"Trying download strategy: {config['name']}") # Base yt-dlp options ydl_opts = { 'format': 'best[ext=mp4]/best', # Prefer mp4 format 'outtmpl': output_template, 'noplaylist': True, 'quiet': False, 'no_warnings': False, 'ignoreerrors': False, 'sleep_interval': 1, 'max_sleep_interval': 5, } # Merge strategy-specific options ydl_opts.update(config['opts']) try: # Create yt-dlp object and download the video with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=True) # If we get here, download was successful print(f"Download successful with strategy: {config['name']}") # Get the downloaded file path if 'entries' in info: # Playlist (should not happen with noplaylist=True) raise ValueError("Playlists are not supported") # Get video title and extension title = info.get('title', 'video') ext = info.get('ext', 'mp4') # Construct the file path video_path = os.path.join(output_dir, f"{title}.{ext}") # Check if file exists if not os.path.exists(video_path): # Try with sanitized filename sanitized_title = ''.join(c for c in title if c.isalnum() or c in ' ._-') video_path = os.path.join(output_dir, f"{sanitized_title}.{ext}") if not os.path.exists(video_path): # If still not found, look for any mp4 file in the directory mp4_files = [ f for f in os.listdir(output_dir) if f.endswith('.mp4') ] if mp4_files: video_path = os.path.join(output_dir, mp4_files[0]) else: raise ValueError("Downloaded file not found") return video_path except yt_dlp.utils.DownloadError as e: last_error = str(e) print(f"Strategy '{config['name']}' failed: {last_error}") if "Sign in to confirm you're not a bot" in last_error: print("Bot detection encountered, trying next strategy...") continue elif config == fallback_configs[-1]: # Last strategy failed break else: continue except Exception as e: last_error = str(e) print(f"Strategy '{config['name']}' failed with error: {last_error}") continue # If all strategies failed, provide helpful error message error_msg = f"All download strategies failed. Last error: {last_error}" if "Sign in to confirm you're not a bot" in (last_error or ""): print_cookie_help() error_msg += "\n\nāš ļø YouTube bot detection encountered. See the instructions above to fix this issue." raise ValueError(error_msg) def download_youtube_video_simple(url, output_dir="downloads"): """ Simplified YouTube video downloader - tries the most reliable methods first Args: url (str): YouTube video URL output_dir (str): Directory to save the downloaded video Returns: str: Path to the downloaded video file Raises: ValueError: If the URL is invalid or video is unavailable """ print(f"šŸ“„ Starting download from: {url}") try: return download_youtube_video(url, output_dir) except ValueError as e: if "Sign in to confirm you're not a bot" in str(e): print("\nšŸ¤– YouTube bot detection encountered!") print("šŸ’” Quick fixes to try:") print(" • Wait a few minutes and try again") print(" • Try a different YouTube video") print(" • Use a different network/VPN") print("\nšŸ“‹ For persistent issues, run print_cookie_help() for detailed setup instructions") raise e def download_pro_reference(url="https://www.youtube.com/shorts/geR666LWSHg", output_dir="downloads"): """ Download a professional golfer reference video using improved download methods Args: url (str): YouTube video URL of professional golfer (default: provided reference) output_dir (str): Directory to save the downloaded video Returns: str: Path to the downloaded pro reference video file """ try: # Create a specific filename for the pro reference os.makedirs(output_dir, exist_ok=True) # Check if pro reference already exists to avoid re-downloading pro_file_path = os.path.join(output_dir, "pro_reference.mp4") if os.path.exists(pro_file_path): print("Pro reference video already exists, using cached version") return pro_file_path # Try to download using the improved download function first try: print("Downloading pro reference video...") video_path = download_youtube_video(url, output_dir) # Rename to pro_reference ext = os.path.splitext(video_path)[1] new_path = os.path.join(output_dir, f"pro_reference{ext}") os.rename(video_path, new_path) print(f"Pro reference downloaded and saved as: {new_path}") return new_path except Exception as download_error: print(f"Standard download failed: {download_error}") print("Trying direct download with fixed name...") # Fallback: try direct download with fixed filename output_template = os.path.join(output_dir, "pro_reference.%(ext)s") fallback_configs = get_fallback_configs() for config in fallback_configs: print(f"Trying pro reference download with strategy: {config['name']}") ydl_opts = { 'format': 'best[ext=mp4]/best', 'outtmpl': output_template, 'noplaylist': True, 'quiet': False, 'no_warnings': False, 'ignoreerrors': False, } ydl_opts.update(config['opts']) try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.extract_info(url, download=True) # Check if file exists with mp4 extension if os.path.exists(pro_file_path): print(f"Pro reference downloaded successfully with strategy: {config['name']}") return pro_file_path else: # Try other extensions for ext in ['webm', 'mkv']: alt_path = os.path.join(output_dir, f"pro_reference.{ext}") if os.path.exists(alt_path): print(f"Pro reference downloaded as {ext} format") return alt_path except Exception as e: print(f"Pro reference strategy '{config['name']}' failed: {str(e)}") continue raise ValueError("All pro reference download strategies failed") except Exception as e: raise ValueError(f"Error downloading pro reference: {str(e)}")