""" agent.py ───────────────────────────────────────────────────────────── Autonomous Short-Form Video Engine — AI Brain Uses Nemotron-3 Super via OpenRouter (free tier) to transform a niche topic into a production-ready structured JSON package. ───────────────────────────────────────────────────────────── """ import os import re import json import time import logging from openai import OpenAI from dotenv import load_dotenv load_dotenv() logger = logging.getLogger(__name__) # ── OpenRouter client setup ─────────────────────────────── OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "") # Fallback models (VERIFIED ALIVE for free tier) MODELS = [ "meta-llama/llama-3.3-70b-instruct:free", "mistralai/mistral-small-3.1-24b-instruct:free", "google/gemma-3-27b-it:free", "minimax/minimax-m2.5:free", "nvidia/nemotron-3-super-120b-a12b:free", ] DEFAULT_MODEL = MODELS[0] client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=OPENROUTER_API_KEY, default_headers={ "HTTP-Referer": "https://automate-ai.local", "X-Title": "Automate AI Video Engine", }, ) # ── Language Support ────────────────────────────────────── LANGUAGE_MAP = { "English": "English (Standard)", "Hindi": "Hindi (written in Devanagari script)", "Hinglish": "Hinglish (a natural mix of Hindi and English, written in Latin/Roman script)", } # ── System Prompt ───────────────────────────────────────── SYSTEM_PROMPT = """You are a short-form video scriptwriter. Transform the topic into a production-ready JSON package in {language_desc}. Output ONLY valid JSON. No markdown, no filler. Schema: {{ "niche": "string", "hook": {{ "text": "string (≤15 words in {language_desc})", "duration_seconds": 3 }}, "scenes": [ {{ "scene_number": 1, "type": "hook | meat | cta", "script_text": "vocal script in {language_desc}", "on_screen_text": "short caption in {language_desc}", "duration_seconds": 5, "pexels_keywords": ["English keywords"], "visual_description": "English description" }} ], "voiceover_settings": {{ "mood": "energetic", "gender_preference": "male" }}, "seo": {{ "title": "Title in {language_desc}", "description": "...", "hashtags": [], "keywords": [] }}, "total_duration_seconds": 60 }} """ USER_TEMPLATE = "Create a short-form video JSON for: {niche} (Style: {style}, Lang: {language_desc})." SCRIPT_TO_JSON_PROMPT = """You are a script-to-video parser. Convert the provided raw text script into a scene-by-scene package. Target Language for text/captions: {language_desc} Provide: 1. script_text: Exact portion in {language_desc}. 2. on_screen_text: Small caption in {language_desc}. 3. pexels_keywords: Search terms (ALWAYS English). 4. visual_description: Visuals (ALWAYS English). JSON ONLY matching: {{ "niche": "Custom Script", "scenes": [ {{ "scene_number": 1, "script_text": "string", "on_screen_text": "string", "duration_seconds": 5, "pexels_keywords": ["kw1"], "visual_description": "English description" }} ], "voiceover_settings": {{"mood": "energetic", "gender_preference": "male"}}, "seo": {{"title": "Title in {language_desc}", "description": "...", "hashtags": [], "keywords": []}}, "total_duration_seconds": 60 }} SCRIPT: {script} """ def generate_video_package(niche: str, style: str = "engaging and educational", language: str = "English", model: str = None) -> dict: """ Call Nemotron-3 Super via OpenRouter to generate the full video JSON. """ if not OPENROUTER_API_KEY: raise EnvironmentError("OPENROUTER_API_KEY is not set.") # Model rotation logic: start with requested, then try the pool model_queue = MODELS[:] if model and model in model_queue: model_queue.remove(model) model_queue.insert(0, model) elif model: model_queue.insert(0, model) lang_desc = LANGUAGE_MAP.get(language, "English") sys_prompt = SYSTEM_PROMPT.format(language_desc=lang_desc) user_prompt = USER_TEMPLATE.format(niche=niche, style=style, language_desc=lang_desc) last_error = None for attempt in range(len(model_queue) * 2): # Try each model twice if needed active_model = model_queue[attempt % len(model_queue)] logger.info(f"[Agent] Attempt {attempt+1} — using {active_model}...") try: response = client.chat.completions.create( model=active_model, messages=[ {"role": "system", "content": sys_prompt}, {"role": "user", "content": user_prompt}, ], temperature=0.7, max_tokens=2500, ) content = response.choices[0].message.content if not content: raise ValueError("Model returned empty content") raw = content.strip() # Strip markdown code fences if model wraps in ```json ... ``` if raw.startswith("```"): raw = raw.split("```")[1] if raw.startswith("json"): raw = raw[4:] raw = raw.strip() data = _robust_json_parse(raw) data["language"] = language # Store for downstream use logger.info(f"[Agent] ✅ JSON generated ({language}) and repaired successfully.") _validate_schema(data) return data except Exception as e: last_error = str(e) logger.warning(f"[Agent] Model {active_model} failed: {e}") time.sleep(1.5) # Short wait before next model continue raise ValueError(f"CRITICAL: All AI models failed or rate-limited. Last error: {last_error}") def parse_script_into_video_package(script: str, language: str = "English", model: str = None) -> dict: """ Take a raw user script and use AI to parse it into scene-by-scene JSON. """ if not OPENROUTER_API_KEY: raise EnvironmentError("OPENROUTER_API_KEY is not set.") model_queue = [model] if model else MODELS[:] lang_desc = LANGUAGE_MAP.get(language, "English") prompt = SCRIPT_TO_JSON_PROMPT.format(script=script, language_desc=lang_desc) last_error = None for attempt in range(len(model_queue) * 2): active_model = model_queue[attempt % len(model_queue)] logger.info(f"[Agent] Attempt {attempt+1} — Parsing with {active_model}...") try: response = client.chat.completions.create( model=active_model, messages=[ {"role": "system", "content": "You are a specialized script parser. Output ONLY JSON."}, {"role": "user", "content": prompt}, ], temperature=0.3, max_tokens=2500, ) content = response.choices[0].message.content if not content: raise ValueError("Model returned empty content") raw = content.strip() if raw.startswith("```"): raw = raw.split("```")[1] if raw.startswith("json"): raw = raw[4:] raw = raw.strip() data = _robust_json_parse(raw) data["language"] = language logger.info(f"[Agent] ✅ Script parsed ({language}) and repaired.") _validate_schema(data) return data except json.JSONDecodeError as e: logger.warning(f"[Agent] Script parse failed on attempt {attempt}: {e}") last_error = e if attempt < 3: time.sleep(2 ** attempt) except Exception as e: logger.error(f"[Agent] Script parse API error: {e}") last_error = e if attempt < 3: time.sleep(2 ** attempt) raise ValueError(f"Failed to parse script into JSON. Last error: {last_error}") def _robust_json_parse(raw: str) -> dict: """ Extract JSON from text and attempt to repair if it's truncated. """ # 1. Extract the actual JSON block using regex (find first { and last }) # If the response has trailing text or headers, this strips them. match = re.search(r'(\{.*\})', raw, re.DOTALL) if match: raw = match.group(1) else: # If no closing brace, try finding the start and manually closing start_idx = raw.find('{') if start_idx != -1: raw = raw[start_idx:] else: raise ValueError("No JSON object found in response") try: return json.loads(raw) except json.JSONDecodeError: # 2. Attempt Auto-Repair for truncated JSON repaired = _repair_json(raw) try: return json.loads(repaired) except json.JSONDecodeError as e: logger.error(f"[Agent] JSON Repair failed: {e}\nRaw start: {raw[:100]}...\nRaw end: {raw[-100:]}") raise ValueError(f"Failed to parse or repair JSON: {e}") def _repair_json(raw: str) -> str: """ Extremely robust JSON repair for truncated strings, objects and arrays. """ # Remove trailing commas and whitespace that cause issues raw = raw.strip() # Fix unterminated strings # If the last character is not " or }, and there was an unclosed quote... if raw.count('"') % 2 != 0: raw += '"' # Balance brackets and braces # Count open vs closed stack = [] in_string = False escape = False for i, char in enumerate(raw): if char == '"' and not escape: in_string = not in_string if in_string: if char == '\\': escape = not escape else: escape = False continue if char == '{': stack.append('}') elif char == '[': stack.append(']') elif char == '}' or char == ']': if stack and stack[-1] == char: stack.pop() # Close everything in reverse order while stack: raw += stack.pop() return raw def _validate_schema(data: dict) -> None: """Basic schema validation — raises KeyError if required fields missing.""" required_top = ["niche", "scenes", "voiceover_settings", "seo", "total_duration_seconds"] for key in required_top: if key not in data: raise KeyError(f"Missing required key in agent output: '{key}'") if not isinstance(data["scenes"], list) or len(data["scenes"]) == 0: raise ValueError("'scenes' must be a non-empty list") scene_required = ["scene_number", "script_text", "on_screen_text", "duration_seconds", "pexels_keywords"] for i, scene in enumerate(data["scenes"]): for key in scene_required: if key not in scene: raise KeyError(f"Scene {i+1} missing required key: '{key}'") seo_required = ["title", "description", "hashtags", "keywords"] for key in seo_required: if key not in data["seo"]: raise KeyError(f"Missing SEO key: '{key}'") # ── CLI Quick Test ──────────────────────────────────────── if __name__ == "__main__": import sys niche = sys.argv[1] if len(sys.argv) > 1 else "AI Productivity Tools" print(f"\n🧠 Generating video package for: '{niche}'\n") result = generate_video_package(niche) print(json.dumps(result, indent=2)) print(f"\n✅ Total scenes: {len(result['scenes'])}") print(f"⏱ Total duration: {result['total_duration_seconds']}s")