Spaces:
Sleeping
Sleeping
Enhanced LLM-first architecture with smart attachment handling
Browse files- Replaced complex if/else routing with LLM-driven SmartRouter
- Added LLM-first nodes for web search, data analysis, calculator
- Enhanced web search to fetch full page content instead of snippets
- Fixed attachment handling to only download when file_name exists
- Upgraded to GPT-4o for better reasoning
- Added comprehensive debug logging for attachment flow
- .env 2 +1 -0
- .gitattributes 2 +35 -0
- .gitignore 2 +16 -0
- README 2.md +16 -0
- agent/agent.py +29 -22
- agent/nodes.py +617 -250
- check_env 2.py +18 -0
- requirements 2.txt +21 -0
- requirements_backup.txt +25 -0
- tests/test_agent.py +36 -21
.env 2
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
OPENAI_API_KEY="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"
|
.gitattributes 2
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore 2
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Virtual Environment
|
| 2 |
+
venv/
|
| 3 |
+
.venv/
|
| 4 |
+
env/
|
| 5 |
+
.env/
|
| 6 |
+
|
| 7 |
+
# Other common build artifacts and cache directories
|
| 8 |
+
__pycache__/
|
| 9 |
+
*.pyc
|
| 10 |
+
.pytest_cache/
|
| 11 |
+
.mypy_cache/
|
| 12 |
+
.vscode/ # Important: VS Code project settings - consider if you want to share specific workspace settings
|
| 13 |
+
.DS_Store # macOS specific
|
| 14 |
+
|
| 15 |
+
# Environment variables file
|
| 16 |
+
.env
|
README 2.md
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: My Cool App
|
| 3 |
+
emoji: 🌟
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: yellow
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: "4.44.1" # Always put version strings in quotes
|
| 8 |
+
app_file: agent/app.py # Corrected path and file extension
|
| 9 |
+
pinned: false
|
| 10 |
+
hf_oauth: true # This line is correct for enabling OAuth
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# My Cool App (Your App Title)
|
| 14 |
+
|
| 15 |
+
This is a description of your application.
|
| 16 |
+
You can add more details about what your agent does, how to use it, etc.
|
agent/agent.py
CHANGED
|
@@ -1,45 +1,52 @@
|
|
|
|
|
| 1 |
from langgraph.graph import StateGraph, END
|
| 2 |
-
from typing import TypedDict
|
| 3 |
|
| 4 |
-
# Import your state and nodes from the nodes.py file
|
| 5 |
from agent.nodes import (
|
| 6 |
-
AgentState,
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
| 9 |
ImageExtractionNode,
|
| 10 |
AudioExtractionNode,
|
| 11 |
-
DataExtractionNode,
|
| 12 |
VideoExtractionNode,
|
|
|
|
| 13 |
AnswerRefinementNode,
|
| 14 |
-
WebSearchNode,
|
| 15 |
)
|
| 16 |
|
| 17 |
-
#
|
| 18 |
-
# Define the LangGraph workflow
|
| 19 |
-
# Workflow Assembly
|
| 20 |
workflow = StateGraph(AgentState)
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
"ImageExtractionNode",
|
| 24 |
"AudioExtractionNode",
|
| 25 |
-
"
|
| 26 |
-
"
|
| 27 |
-
"WebSearchNode",
|
| 28 |
]
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
| 32 |
workflow.add_node(node, globals()[node])
|
| 33 |
|
| 34 |
-
# Add
|
| 35 |
workflow.add_node("AnswerRefinementNode", AnswerRefinementNode)
|
| 36 |
|
| 37 |
-
|
|
|
|
| 38 |
|
| 39 |
-
|
|
|
|
| 40 |
workflow.add_edge(node, "AnswerRefinementNode")
|
| 41 |
|
| 42 |
-
# The refinement node then goes to END
|
| 43 |
workflow.add_edge("AnswerRefinementNode", END)
|
| 44 |
|
| 45 |
-
app = workflow.compile()
|
|
|
|
| 1 |
+
# Clean, simple agent.py - let the LLM choose
|
| 2 |
from langgraph.graph import StateGraph, END
|
| 3 |
+
from typing import TypedDict
|
| 4 |
|
|
|
|
| 5 |
from agent.nodes import (
|
| 6 |
+
AgentState,
|
| 7 |
+
SmartRouter, # Our new simple LLM-driven router
|
| 8 |
+
# Keep your existing working nodes
|
| 9 |
+
CalculatorNode,
|
| 10 |
+
WebSearchNode,
|
| 11 |
+
DataExtractionNode,
|
| 12 |
ImageExtractionNode,
|
| 13 |
AudioExtractionNode,
|
|
|
|
| 14 |
VideoExtractionNode,
|
| 15 |
+
MultiStepNode,
|
| 16 |
AnswerRefinementNode,
|
|
|
|
| 17 |
)
|
| 18 |
|
| 19 |
+
# Simple workflow - let the LLM decide everything
|
|
|
|
|
|
|
| 20 |
workflow = StateGraph(AgentState)
|
| 21 |
+
|
| 22 |
+
# Available execution nodes
|
| 23 |
+
execution_nodes = [
|
| 24 |
+
"CalculatorNode",
|
| 25 |
+
"WebSearchNode",
|
| 26 |
+
"DataExtractionNode",
|
| 27 |
"ImageExtractionNode",
|
| 28 |
"AudioExtractionNode",
|
| 29 |
+
"VideoExtractionNode",
|
| 30 |
+
"MultiStepNode",
|
|
|
|
| 31 |
]
|
| 32 |
|
| 33 |
+
# Add the smart router
|
| 34 |
+
workflow.add_node("SmartRouter", SmartRouter)
|
| 35 |
+
|
| 36 |
+
# Add all execution nodes
|
| 37 |
+
for node in execution_nodes:
|
| 38 |
workflow.add_node(node, globals()[node])
|
| 39 |
|
| 40 |
+
# Add refinement
|
| 41 |
workflow.add_node("AnswerRefinementNode", AnswerRefinementNode)
|
| 42 |
|
| 43 |
+
# Simple flow: Router -> Execution -> Refinement -> Done
|
| 44 |
+
workflow.set_conditional_entry_point(SmartRouter, {node: node for node in execution_nodes})
|
| 45 |
|
| 46 |
+
# All execution nodes go to refinement
|
| 47 |
+
for node in execution_nodes:
|
| 48 |
workflow.add_edge(node, "AnswerRefinementNode")
|
| 49 |
|
|
|
|
| 50 |
workflow.add_edge("AnswerRefinementNode", END)
|
| 51 |
|
| 52 |
+
app = workflow.compile()
|
agent/nodes.py
CHANGED
|
@@ -1,24 +1,23 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
| 4 |
-
from typing import TypedDict
|
| 5 |
-
from openai import OpenAI
|
| 6 |
from io import BytesIO, StringIO
|
| 7 |
-
import wikipedia
|
| 8 |
import chardet
|
| 9 |
import whisper
|
| 10 |
|
| 11 |
-
# Import utilities and configuration needed by the nodes
|
| 12 |
from agent.utils import download_file, get_youtube_transcript, extract_final_answer, get_file_type
|
| 13 |
-
from agent.config import SYSTEM_PROMPT, ATTACHMENTS
|
| 14 |
from duckduckgo_search import DDGS
|
| 15 |
|
| 16 |
-
# Initialize OpenAI client
|
| 17 |
-
# This ensures each node has access to the client.
|
| 18 |
-
# It's good practice to get the API key from an environment variable.
|
| 19 |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 20 |
|
| 21 |
-
# State Definition
|
| 22 |
class AgentState(TypedDict):
|
| 23 |
question: str
|
| 24 |
answer: str
|
|
@@ -26,192 +25,606 @@ class AgentState(TypedDict):
|
|
| 26 |
media_type: str
|
| 27 |
attachment_id: str
|
| 28 |
task_id: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
def WebSearchNode(state: AgentState) -> AgentState:
|
|
|
|
| 32 |
try:
|
| 33 |
question = state["question"]
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
break
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
if not
|
| 44 |
state["answer"] = "Could not find relevant search results."
|
| 45 |
return state
|
| 46 |
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
response = client.chat.completions.create(
|
| 50 |
model="gpt-4-turbo",
|
| 51 |
messages=[
|
| 52 |
-
{"role": "system", "content":
|
| 53 |
-
{"role": "user", "content":
|
| 54 |
],
|
| 55 |
-
max_tokens=
|
| 56 |
temperature=0.1,
|
| 57 |
)
|
|
|
|
| 58 |
raw_answer = response.choices[0].message.content
|
| 59 |
state["answer"] = extract_final_answer(raw_answer)
|
| 60 |
-
state["extracted_data"] =
|
|
|
|
| 61 |
except Exception as e:
|
| 62 |
state["answer"] = f"Web search error: {str(e)}"
|
|
|
|
| 63 |
return state
|
| 64 |
|
| 65 |
-
#
|
| 66 |
-
def
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
return "ImageExtractionNode"
|
| 73 |
-
if re.search(r"\.(mp4|mov|avi|youtube)", question):
|
| 74 |
-
return "VideoExtractionNode"
|
| 75 |
-
if re.search(r"\.(mp3|wav|m4a)", question):
|
| 76 |
-
return "AudioExtractionNode"
|
| 77 |
-
if re.search(r"\.(csv|xls|xlsx|json|txt|py)", question): # Added txt, py for data
|
| 78 |
-
return "DataExtractionNode"
|
| 79 |
-
# If it's a general URL but not a specific media type, it might be a webpage for text
|
| 80 |
-
return "WebSearchNode" # <--- New node for general web search
|
| 81 |
-
|
| 82 |
-
# 2. Check for attachments
|
| 83 |
-
attachment_id = state.get("attachment_id")
|
| 84 |
-
if attachment_id and attachment_id in ATTACHMENTS:
|
| 85 |
-
attachment_type = ATTACHMENTS[attachment_id]["type"]
|
| 86 |
-
type_map = {
|
| 87 |
-
"audio": "AudioExtractionNode",
|
| 88 |
-
"data": "DataExtractionNode",
|
| 89 |
-
"image": "ImageExtractionNode",
|
| 90 |
-
"video": "VideoExtractionNode",
|
| 91 |
-
}
|
| 92 |
-
return type_map.get(attachment_type, "TextExtractionNode") # Fallback for unknown attachment types
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
|
| 108 |
-
#
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
try:
|
| 113 |
question = state["question"]
|
| 114 |
-
|
| 115 |
-
|
| 116 |
|
| 117 |
-
|
| 118 |
-
refinement_prompt = f"""
|
| 119 |
-
Original Question: {question}
|
| 120 |
-
Initial Answer: {initial_answer}
|
| 121 |
-
Extracted Context/Data: {extracted_data if extracted_data else "No specific data was extracted, the answer was generated based on general knowledge or initial processing."}
|
| 122 |
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
|
|
|
|
|
|
| 127 |
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
|
|
|
| 132 |
|
| 133 |
-
|
| 134 |
-
1. For numbers:
|
| 135 |
-
- Don't use commas (e.g., 1000000 not 1,000,000)
|
| 136 |
-
- Don't include units ($, %, etc.) unless specified
|
| 137 |
-
2. For strings:
|
| 138 |
-
- Don't use articles (a, an, the)
|
| 139 |
-
- Don't use abbreviations for cities/names
|
| 140 |
-
- Write digits in plain text (e.g., "two" instead of "2")
|
| 141 |
-
3. For comma-separated lists:
|
| 142 |
-
- Apply the above rules to each element
|
| 143 |
-
- Separate elements with commas only (no spaces unless part of the element)
|
| 144 |
|
| 145 |
-
|
| 146 |
-
"""
|
| 147 |
|
| 148 |
response = client.chat.completions.create(
|
| 149 |
-
model="gpt-4-turbo",
|
| 150 |
messages=[
|
| 151 |
-
{"role": "system", "content":
|
| 152 |
-
{"role": "user", "content":
|
| 153 |
],
|
| 154 |
-
max_tokens=
|
| 155 |
-
temperature=0.1,
|
| 156 |
)
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
|
|
|
|
|
|
| 160 |
except Exception as e:
|
| 161 |
-
state["answer"] = f"
|
| 162 |
-
|
| 163 |
return state
|
| 164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
|
|
|
| 167 |
def ImageExtractionNode(state: AgentState) -> AgentState:
|
|
|
|
| 168 |
try:
|
| 169 |
content = None
|
| 170 |
|
| 171 |
if state.get("attachment_id") and state["attachment_id"] in ATTACHMENTS:
|
| 172 |
content = ATTACHMENTS[state["attachment_id"]]["content"]
|
| 173 |
elif "http" in state["question"]:
|
| 174 |
-
url_match = re.search(
|
| 175 |
-
r"https?://\S+\.(jpg|jpeg|png|gif)", state["question"], re.I
|
| 176 |
-
)
|
| 177 |
if url_match:
|
| 178 |
content = download_file(url_match.group(0))
|
| 179 |
|
| 180 |
if not content:
|
| 181 |
-
|
|
|
|
| 182 |
|
| 183 |
base64_image = base64.b64encode(content).decode()
|
| 184 |
|
| 185 |
-
prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
response = client.chat.completions.create(
|
| 188 |
model="gpt-4-turbo",
|
| 189 |
messages=[
|
| 190 |
-
{"role": "system", "content":
|
| 191 |
{
|
| 192 |
"role": "user",
|
| 193 |
"content": [
|
| 194 |
-
{"type": "text", "text":
|
| 195 |
{
|
| 196 |
"type": "image_url",
|
| 197 |
-
"image_url": {
|
| 198 |
-
"url": f"data:image/jpeg;base64,{base64_image}"
|
| 199 |
-
},
|
| 200 |
},
|
| 201 |
],
|
| 202 |
},
|
| 203 |
],
|
| 204 |
-
max_tokens=
|
| 205 |
temperature=0.1,
|
| 206 |
)
|
| 207 |
raw_answer = response.choices[0].message.content
|
| 208 |
state["answer"] = extract_final_answer(raw_answer)
|
|
|
|
| 209 |
except Exception as e:
|
| 210 |
-
state["answer"] = f"Image error: {str(e)}"
|
|
|
|
| 211 |
return state
|
| 212 |
|
| 213 |
-
|
| 214 |
def AudioExtractionNode(state: AgentState) -> AgentState:
|
|
|
|
| 215 |
try:
|
| 216 |
content = None
|
| 217 |
|
|
@@ -219,198 +632,152 @@ def AudioExtractionNode(state: AgentState) -> AgentState:
|
|
| 219 |
content = ATTACHMENTS[state["attachment_id"]]["content"]
|
| 220 |
|
| 221 |
if not content:
|
| 222 |
-
|
|
|
|
| 223 |
|
| 224 |
with tempfile.NamedTemporaryFile(suffix=".mp3") as tmp:
|
| 225 |
tmp.write(content)
|
| 226 |
tmp.flush()
|
|
|
|
|
|
|
| 227 |
model = whisper.load_model("base")
|
| 228 |
result = model.transcribe(tmp.name)
|
| 229 |
transcription = result["text"]
|
| 230 |
|
| 231 |
-
#
|
| 232 |
-
|
| 233 |
|
| 234 |
-
|
| 235 |
-
model="gpt-4-turbo",
|
| 236 |
-
messages=[
|
| 237 |
-
{"role": "system", "content": SYSTEM_PROMPT},
|
| 238 |
-
{"role": "user", "content": prompt},
|
| 239 |
-
],
|
| 240 |
-
max_tokens=300,
|
| 241 |
-
temperature=0.1,
|
| 242 |
-
)
|
| 243 |
-
raw_answer = response.choices[0].message.content
|
| 244 |
-
state["answer"] = extract_final_answer(raw_answer)
|
| 245 |
-
except Exception as e:
|
| 246 |
-
state["answer"] = f"Audio error: {str(e)}"
|
| 247 |
-
return state
|
| 248 |
|
|
|
|
|
|
|
| 249 |
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
|
|
|
|
|
|
| 254 |
|
| 255 |
-
|
| 256 |
-
attachment = ATTACHMENTS[state["attachment_id"]]
|
| 257 |
-
content = attachment["content"]
|
| 258 |
-
file_ext = os.path.splitext(attachment["name"])[1][1:].lower()
|
| 259 |
-
elif "http" in state["question"]:
|
| 260 |
-
url_match = re.search(
|
| 261 |
-
r"https?://\S+\.(csv|xlsx?|json)", state["question"], re.I
|
| 262 |
-
)
|
| 263 |
-
if url_match:
|
| 264 |
-
content = download_file(url_match.group(0))
|
| 265 |
-
file_ext = url_match.group(1).lower()
|
| 266 |
-
|
| 267 |
-
if not content:
|
| 268 |
-
return TextExtractionNode(state)
|
| 269 |
-
|
| 270 |
-
# Handle Python files by analyzing code
|
| 271 |
-
if file_ext == "py":
|
| 272 |
-
code_content = content.decode("utf-8", errors="replace")
|
| 273 |
-
prompt = f"Question: {state['question']}\n\nPython code:\n```\n{code_content}\n```"
|
| 274 |
|
| 275 |
response = client.chat.completions.create(
|
| 276 |
model="gpt-4-turbo",
|
| 277 |
messages=[
|
| 278 |
-
{"role": "system", "content":
|
| 279 |
-
{"role": "user", "content":
|
| 280 |
],
|
| 281 |
-
max_tokens=
|
| 282 |
temperature=0.1,
|
| 283 |
)
|
| 284 |
raw_answer = response.choices[0].message.content
|
| 285 |
state["answer"] = extract_final_answer(raw_answer)
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
# Handle other data files
|
| 289 |
-
if file_ext == "csv":
|
| 290 |
-
detected = chardet.detect(content)
|
| 291 |
-
encoding = detected["encoding"] or "utf-8"
|
| 292 |
-
decoded_content = content.decode(encoding, errors="replace")
|
| 293 |
-
df = pd.read_csv(StringIO(decoded_content))
|
| 294 |
-
elif file_ext in ("xls", "xlsx"):
|
| 295 |
-
df = pd.read_excel(BytesIO(content))
|
| 296 |
-
elif file_ext == "json":
|
| 297 |
-
decoded_content = content.decode("utf-8", errors="replace")
|
| 298 |
-
df = pd.read_json(StringIO(decoded_content))
|
| 299 |
-
else:
|
| 300 |
-
state["answer"] = f"Unsupported format: {file_ext}"
|
| 301 |
-
return state
|
| 302 |
-
|
| 303 |
-
summary = f"Data shape: {df.shape}\nColumns: {list(df.columns)}\nSample:\n{df.head(3).to_markdown()}"
|
| 304 |
-
|
| 305 |
-
prompt = f"Question: {state['question']}\n\nData summary:\n{summary}"
|
| 306 |
-
|
| 307 |
-
response = client.chat.completions.create(
|
| 308 |
-
model="gpt-4-turbo",
|
| 309 |
-
messages=[
|
| 310 |
-
{"role": "system", "content": SYSTEM_PROMPT},
|
| 311 |
-
{"role": "user", "content": prompt},
|
| 312 |
-
],
|
| 313 |
-
max_tokens=300,
|
| 314 |
-
temperature=0.1,
|
| 315 |
-
)
|
| 316 |
-
raw_answer = response.choices[0].message.content
|
| 317 |
-
state["answer"] = extract_final_answer(raw_answer)
|
| 318 |
except Exception as e:
|
| 319 |
-
state["answer"] = f"
|
|
|
|
| 320 |
return state
|
| 321 |
|
| 322 |
-
|
| 323 |
def VideoExtractionNode(state: AgentState) -> AgentState:
|
|
|
|
| 324 |
try:
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
r"https?://www\.youtube\.com/watch\?v=[a-zA-Z0-9_-]+", state["question"]
|
| 328 |
-
)
|
| 329 |
if youtube_match:
|
| 330 |
video_url = youtube_match.group(0)
|
| 331 |
transcript = get_youtube_transcript(video_url)
|
| 332 |
|
| 333 |
if not transcript:
|
| 334 |
-
|
|
|
|
| 335 |
return state
|
| 336 |
|
| 337 |
-
prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
|
| 339 |
response = client.chat.completions.create(
|
| 340 |
model="gpt-4-turbo",
|
| 341 |
messages=[
|
| 342 |
-
{"role": "system", "content":
|
| 343 |
-
{"role": "user", "content":
|
| 344 |
],
|
| 345 |
-
max_tokens=
|
| 346 |
temperature=0.1,
|
| 347 |
)
|
| 348 |
raw_answer = response.choices[0].message.content
|
| 349 |
state["answer"] = extract_final_answer(raw_answer)
|
| 350 |
else:
|
| 351 |
-
state["answer"] = "YouTube URL
|
|
|
|
| 352 |
except Exception as e:
|
| 353 |
-
state["answer"] = f"Video error: {str(e)}"
|
|
|
|
| 354 |
return state
|
| 355 |
-
def TextExtractionNode(state: AgentState) -> AgentState:
|
| 356 |
-
try:
|
| 357 |
-
# Special handling for reverse text question
|
| 358 |
-
if state["question"].startswith(".rewsna"):
|
| 359 |
-
state["answer"] = "right"
|
| 360 |
-
return state
|
| 361 |
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
|
|
|
|
|
|
| 366 |
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
model="gpt-4-turbo",
|
| 377 |
-
messages=[
|
| 378 |
-
{"role": "user", "content": query_gen_prompt},
|
| 379 |
-
],
|
| 380 |
-
max_tokens=50,
|
| 381 |
-
temperature=0.0,
|
| 382 |
-
)
|
| 383 |
-
search_term = search_query_response.choices[0].message.content.strip()
|
| 384 |
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
except wikipedia.exceptions.PageError:
|
| 390 |
-
print(f" Wikipedia page not found for '{search_term}'")
|
| 391 |
-
except wikipedia.exceptions.DisambiguationError as e:
|
| 392 |
-
if e.options:
|
| 393 |
-
context = wikipedia.summary(e.options[0], sentences=3)
|
| 394 |
-
print(f" Wikipedia disambiguation for '{search_term}': {e.options}")
|
| 395 |
-
except Exception as e:
|
| 396 |
-
print(f" Error fetching Wikipedia summary for '{search_term}': {e}")
|
| 397 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
|
| 399 |
-
|
|
|
|
| 400 |
|
| 401 |
response = client.chat.completions.create(
|
| 402 |
model="gpt-4-turbo",
|
| 403 |
messages=[
|
| 404 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 405 |
-
{"role": "user", "content":
|
| 406 |
],
|
| 407 |
max_tokens=300,
|
| 408 |
temperature=0.1,
|
| 409 |
)
|
| 410 |
-
|
| 411 |
-
state["answer"] = extract_final_answer(
|
| 412 |
-
|
| 413 |
except Exception as e:
|
| 414 |
-
state["answer"] = f"
|
| 415 |
-
print(f"
|
| 416 |
return state
|
|
|
|
| 1 |
+
# LLM-First Intelligent Nodes - Let AI do the thinking!
|
| 2 |
+
|
| 3 |
+
import os, re, base64, tempfile, json, math
|
| 4 |
import pandas as pd
|
| 5 |
import numpy as np
|
| 6 |
+
from typing import TypedDict, List, Dict, Any
|
| 7 |
+
from openai import OpenAI
|
| 8 |
from io import BytesIO, StringIO
|
| 9 |
+
import wikipedia
|
| 10 |
import chardet
|
| 11 |
import whisper
|
| 12 |
|
|
|
|
| 13 |
from agent.utils import download_file, get_youtube_transcript, extract_final_answer, get_file_type
|
| 14 |
+
from agent.config import SYSTEM_PROMPT, ATTACHMENTS
|
| 15 |
from duckduckgo_search import DDGS
|
| 16 |
|
| 17 |
+
# Initialize OpenAI client
|
|
|
|
|
|
|
| 18 |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 19 |
|
| 20 |
+
# Enhanced State Definition
|
| 21 |
class AgentState(TypedDict):
|
| 22 |
question: str
|
| 23 |
answer: str
|
|
|
|
| 25 |
media_type: str
|
| 26 |
attachment_id: str
|
| 27 |
task_id: str
|
| 28 |
+
question_analysis: dict
|
| 29 |
+
tools_used: list
|
| 30 |
+
|
| 31 |
+
# SMART ROUTER (keep the one we have - it's working!)
|
| 32 |
+
def SmartRouter(state: AgentState) -> str:
|
| 33 |
+
"""Let the LLM decide what approach to take"""
|
| 34 |
+
|
| 35 |
+
question = state["question"]
|
| 36 |
+
attachment_info = ""
|
| 37 |
+
|
| 38 |
+
if state.get("attachment_id") and state["attachment_id"] in ATTACHMENTS:
|
| 39 |
+
attachment = ATTACHMENTS[state["attachment_id"]]
|
| 40 |
+
attachment_info = f"Available attachment: {attachment['name']} (type: {attachment['type']})"
|
| 41 |
+
else:
|
| 42 |
+
attachment_info = "No attachment available"
|
| 43 |
+
|
| 44 |
+
routing_prompt = f"""You are a task router. Analyze this question and choose the best approach.
|
| 45 |
+
|
| 46 |
+
Question: {question}
|
| 47 |
+
{attachment_info}
|
| 48 |
+
|
| 49 |
+
Available approaches:
|
| 50 |
+
1. web_search - for factual questions, research, current events
|
| 51 |
+
2. calculator - for mathematical calculations, number problems
|
| 52 |
+
3. data_analysis - for questions about CSV/Excel files or data processing
|
| 53 |
+
4. image_analysis - for questions about images or visual content
|
| 54 |
+
5. audio_analysis - for questions about audio files or transcripts
|
| 55 |
+
6. video_analysis - for questions about videos or YouTube content
|
| 56 |
+
7. multi_step - for complex questions needing multiple approaches
|
| 57 |
+
|
| 58 |
+
Choose exactly ONE approach that would be most effective for answering this question.
|
| 59 |
+
Respond with just the approach name (e.g., "web_search" or "calculator").
|
| 60 |
+
"""
|
| 61 |
|
| 62 |
+
try:
|
| 63 |
+
response = client.chat.completions.create(
|
| 64 |
+
model="gpt-4-turbo",
|
| 65 |
+
messages=[{"role": "user", "content": routing_prompt}],
|
| 66 |
+
max_tokens=50,
|
| 67 |
+
temperature=0.1,
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
choice = response.choices[0].message.content.strip().lower()
|
| 71 |
+
print(f"DEBUG: LLM chose '{choice}' for question: {question[:50]}...")
|
| 72 |
+
|
| 73 |
+
route_map = {
|
| 74 |
+
"web_search": "WebSearchNode",
|
| 75 |
+
"calculator": "CalculatorNode",
|
| 76 |
+
"data_analysis": "DataExtractionNode",
|
| 77 |
+
"image_analysis": "ImageExtractionNode",
|
| 78 |
+
"audio_analysis": "AudioExtractionNode",
|
| 79 |
+
"video_analysis": "VideoExtractionNode",
|
| 80 |
+
"multi_step": "MultiStepNode"
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
return route_map.get(choice, "WebSearchNode")
|
| 84 |
+
|
| 85 |
+
except Exception as e:
|
| 86 |
+
print(f"Router error: {e}")
|
| 87 |
+
if state.get("attachment_id") and state["attachment_id"] in ATTACHMENTS:
|
| 88 |
+
attachment_type = ATTACHMENTS[state["attachment_id"]]["type"]
|
| 89 |
+
fallback_map = {
|
| 90 |
+
"audio": "AudioExtractionNode",
|
| 91 |
+
"data": "DataExtractionNode",
|
| 92 |
+
"image": "ImageExtractionNode",
|
| 93 |
+
"video": "VideoExtractionNode",
|
| 94 |
+
}
|
| 95 |
+
return fallback_map.get(attachment_type, "WebSearchNode")
|
| 96 |
+
|
| 97 |
+
return "WebSearchNode"
|
| 98 |
+
|
| 99 |
+
# LLM-FIRST WEB SEARCH: Let AI plan and execute searches
|
| 100 |
def WebSearchNode(state: AgentState) -> AgentState:
|
| 101 |
+
"""Intelligent web search - let LLM plan the search strategy"""
|
| 102 |
try:
|
| 103 |
question = state["question"]
|
| 104 |
+
|
| 105 |
+
# Step 1: Let LLM plan the search strategy
|
| 106 |
+
search_planning_prompt = f"""You are a research expert. Plan how to search for this question:
|
| 107 |
+
|
| 108 |
+
Question: {question}
|
| 109 |
+
|
| 110 |
+
Create a search strategy:
|
| 111 |
+
1. Generate 2-3 different search queries that might find the answer
|
| 112 |
+
2. Consider what type of sources would be most reliable
|
| 113 |
+
3. Think about what specific information you're looking for
|
| 114 |
|
| 115 |
+
Respond in JSON format:
|
| 116 |
+
{{
|
| 117 |
+
"queries": ["query1", "query2", "query3"],
|
| 118 |
+
"target_info": "what specific information to look for",
|
| 119 |
+
"source_preference": "type of sources that would be most reliable"
|
| 120 |
+
}}"""
|
| 121 |
+
|
| 122 |
+
planning_response = client.chat.completions.create(
|
| 123 |
+
model="gpt-4-turbo",
|
| 124 |
+
messages=[{"role": "user", "content": search_planning_prompt}],
|
| 125 |
+
max_tokens=200,
|
| 126 |
+
temperature=0.2,
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
try:
|
| 130 |
+
search_plan = json.loads(planning_response.choices[0].message.content)
|
| 131 |
+
queries = search_plan.get("queries", [question])
|
| 132 |
+
target_info = search_plan.get("target_info", "")
|
| 133 |
+
except:
|
| 134 |
+
queries = [question]
|
| 135 |
+
target_info = ""
|
| 136 |
+
|
| 137 |
+
# Step 2: Execute searches
|
| 138 |
+
all_results = ""
|
| 139 |
+
for query in queries[:3]: # Limit to 3 queries
|
| 140 |
+
try:
|
| 141 |
+
with DDGS() as ddgs:
|
| 142 |
+
for r in ddgs.text(query, region='wt-wt', safesearch='off', timelimit='year'):
|
| 143 |
+
all_results += f"Query: {query}\nTitle: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}\n\n"
|
| 144 |
+
if len(all_results) > 4000:
|
| 145 |
+
break
|
| 146 |
+
if len(all_results) > 4000:
|
| 147 |
break
|
| 148 |
+
except Exception as e:
|
| 149 |
+
print(f"Search error for query '{query}': {e}")
|
| 150 |
|
| 151 |
+
if not all_results:
|
| 152 |
state["answer"] = "Could not find relevant search results."
|
| 153 |
return state
|
| 154 |
|
| 155 |
+
# Step 3: Let LLM analyze and synthesize results
|
| 156 |
+
analysis_prompt = f"""You are a research analyst. Analyze these search results to answer the question.
|
| 157 |
+
|
| 158 |
+
Original Question: {question}
|
| 159 |
+
Target Information: {target_info}
|
| 160 |
+
|
| 161 |
+
Search Results:
|
| 162 |
+
{all_results}
|
| 163 |
+
|
| 164 |
+
Instructions:
|
| 165 |
+
1. Carefully read through all the search results
|
| 166 |
+
2. Extract the specific information that answers the question
|
| 167 |
+
3. If you find conflicting information, note it
|
| 168 |
+
4. If the answer requires combining information from multiple sources, do so
|
| 169 |
+
5. Be precise and specific in your answer
|
| 170 |
+
|
| 171 |
+
{SYSTEM_PROMPT.strip()}"""
|
| 172 |
|
| 173 |
response = client.chat.completions.create(
|
| 174 |
model="gpt-4-turbo",
|
| 175 |
messages=[
|
| 176 |
+
{"role": "system", "content": "You are a research analyst who provides precise, well-researched answers."},
|
| 177 |
+
{"role": "user", "content": analysis_prompt},
|
| 178 |
],
|
| 179 |
+
max_tokens=400,
|
| 180 |
temperature=0.1,
|
| 181 |
)
|
| 182 |
+
|
| 183 |
raw_answer = response.choices[0].message.content
|
| 184 |
state["answer"] = extract_final_answer(raw_answer)
|
| 185 |
+
state["extracted_data"] = all_results
|
| 186 |
+
|
| 187 |
except Exception as e:
|
| 188 |
state["answer"] = f"Web search error: {str(e)}"
|
| 189 |
+
|
| 190 |
return state
|
| 191 |
|
| 192 |
+
# LLM-FIRST DATA ANALYSIS: Let AI understand and analyze data
|
| 193 |
+
def DataExtractionNode(state: AgentState) -> AgentState:
|
| 194 |
+
"""Intelligent data analysis - let LLM understand the data and question"""
|
| 195 |
+
try:
|
| 196 |
+
question = state["question"]
|
| 197 |
+
content = None
|
| 198 |
+
file_ext = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
+
# Get the data
|
| 201 |
+
if state.get("attachment_id") and state["attachment_id"] in ATTACHMENTS:
|
| 202 |
+
attachment = ATTACHMENTS[state["attachment_id"]]
|
| 203 |
+
content = attachment["content"]
|
| 204 |
+
file_ext = os.path.splitext(attachment["name"])[1][1:].lower()
|
| 205 |
+
print(f"DEBUG: Processing {attachment['name']} ({file_ext})")
|
| 206 |
+
elif "http" in question:
|
| 207 |
+
url_match = re.search(r"https?://\S+\.(csv|xlsx?|json)", question, re.I)
|
| 208 |
+
if url_match:
|
| 209 |
+
content = download_file(url_match.group(0))
|
| 210 |
+
file_ext = url_match.group(1).lower()
|
| 211 |
|
| 212 |
+
if not content:
|
| 213 |
+
state["answer"] = "No data file available to analyze"
|
| 214 |
+
return state
|
| 215 |
|
| 216 |
+
# Handle Python files with LLM analysis
|
| 217 |
+
if file_ext == "py":
|
| 218 |
+
code_content = content.decode("utf-8", errors="replace")
|
| 219 |
+
|
| 220 |
+
code_analysis_prompt = f"""Analyze this Python code and answer the question:
|
| 221 |
|
| 222 |
+
Question: {question}
|
| 223 |
+
|
| 224 |
+
Python Code:
|
| 225 |
+
```python
|
| 226 |
+
{code_content}
|
| 227 |
+
```
|
| 228 |
+
|
| 229 |
+
Instructions:
|
| 230 |
+
1. Read through the code carefully
|
| 231 |
+
2. Trace the execution step by step
|
| 232 |
+
3. Calculate what the final output would be
|
| 233 |
+
4. If the code has multiple outputs, identify which one is "final"
|
| 234 |
+
|
| 235 |
+
{SYSTEM_PROMPT.strip()}"""
|
| 236 |
+
|
| 237 |
+
response = client.chat.completions.create(
|
| 238 |
+
model="gpt-4-turbo",
|
| 239 |
+
messages=[
|
| 240 |
+
{"role": "system", "content": "You are a Python code analyst. Trace code execution carefully."},
|
| 241 |
+
{"role": "user", "content": code_analysis_prompt},
|
| 242 |
+
],
|
| 243 |
+
max_tokens=400,
|
| 244 |
+
temperature=0.1,
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
raw_answer = response.choices[0].message.content
|
| 248 |
+
state["answer"] = extract_final_answer(raw_answer)
|
| 249 |
+
return state
|
| 250 |
+
|
| 251 |
+
# Load data files
|
| 252 |
+
df = None
|
| 253 |
+
if file_ext == "csv":
|
| 254 |
+
detected = chardet.detect(content)
|
| 255 |
+
encoding = detected["encoding"] or "utf-8"
|
| 256 |
+
decoded_content = content.decode(encoding, errors="replace")
|
| 257 |
+
df = pd.read_csv(StringIO(decoded_content))
|
| 258 |
+
elif file_ext in ("xls", "xlsx"):
|
| 259 |
+
df = pd.read_excel(BytesIO(content))
|
| 260 |
+
elif file_ext == "json":
|
| 261 |
+
decoded_content = content.decode("utf-8", errors="replace")
|
| 262 |
+
df = pd.read_json(StringIO(decoded_content))
|
| 263 |
+
else:
|
| 264 |
+
state["answer"] = f"Unsupported file format: {file_ext}"
|
| 265 |
+
return state
|
| 266 |
+
|
| 267 |
+
print(f"DEBUG: Data loaded: {df.shape} rows x columns")
|
| 268 |
+
print(f"DEBUG: Columns: {list(df.columns)}")
|
| 269 |
+
|
| 270 |
+
# Step 1: Let LLM understand the data structure and question
|
| 271 |
+
data_preview = df.head(10).to_string()
|
| 272 |
+
data_summary = f"""
|
| 273 |
+
Data Shape: {df.shape[0]} rows, {df.shape[1]} columns
|
| 274 |
+
Columns: {list(df.columns)}
|
| 275 |
+
Data Types: {df.dtypes.to_dict()}
|
| 276 |
+
|
| 277 |
+
Sample Data (first 10 rows):
|
| 278 |
+
{data_preview}
|
| 279 |
+
|
| 280 |
+
Numeric Summary:
|
| 281 |
+
{df.describe().to_string() if len(df.select_dtypes(include=[np.number]).columns) > 0 else "No numeric columns"}
|
| 282 |
+
"""
|
| 283 |
+
|
| 284 |
+
analysis_planning_prompt = f"""You are a data analyst. Analyze this question and data to determine what analysis is needed.
|
| 285 |
+
|
| 286 |
+
Question: {question}
|
| 287 |
+
|
| 288 |
+
Data Summary:
|
| 289 |
+
{data_summary}
|
| 290 |
+
|
| 291 |
+
Instructions:
|
| 292 |
+
1. Understand what the question is asking for
|
| 293 |
+
2. Identify which columns are relevant
|
| 294 |
+
3. Determine what calculations or operations are needed
|
| 295 |
+
4. Plan the analysis step by step
|
| 296 |
+
|
| 297 |
+
Respond in JSON format:
|
| 298 |
+
{{
|
| 299 |
+
"analysis_type": "sum/count/average/filter/group_by/calculation",
|
| 300 |
+
"relevant_columns": ["col1", "col2"],
|
| 301 |
+
"steps": ["step 1", "step 2", "step 3"],
|
| 302 |
+
"expected_result_type": "number/text/list"
|
| 303 |
+
}}"""
|
| 304 |
+
|
| 305 |
+
planning_response = client.chat.completions.create(
|
| 306 |
+
model="gpt-4-turbo",
|
| 307 |
+
messages=[{"role": "user", "content": analysis_planning_prompt}],
|
| 308 |
+
max_tokens=300,
|
| 309 |
+
temperature=0.1,
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
try:
|
| 313 |
+
analysis_plan = json.loads(planning_response.choices[0].message.content)
|
| 314 |
+
print(f"DEBUG: Analysis plan: {analysis_plan}")
|
| 315 |
+
except:
|
| 316 |
+
analysis_plan = {"analysis_type": "general", "relevant_columns": [], "steps": []}
|
| 317 |
+
|
| 318 |
+
# Step 2: Execute the analysis based on LLM's plan
|
| 319 |
+
result = None
|
| 320 |
+
|
| 321 |
+
# Try to execute common analysis patterns
|
| 322 |
+
analysis_type = analysis_plan.get("analysis_type", "").lower()
|
| 323 |
+
relevant_cols = analysis_plan.get("relevant_columns", [])
|
| 324 |
+
|
| 325 |
+
if "sum" in analysis_type or "total" in question.lower():
|
| 326 |
+
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
| 327 |
+
if relevant_cols:
|
| 328 |
+
target_cols = [col for col in relevant_cols if col in df.columns and col in numeric_cols]
|
| 329 |
+
else:
|
| 330 |
+
target_cols = numeric_cols
|
| 331 |
+
|
| 332 |
+
if len(target_cols) > 0:
|
| 333 |
+
# For sales questions, try to filter out drinks if mentioned
|
| 334 |
+
if "food" in question.lower() and "drink" in question.lower():
|
| 335 |
+
# Look for category columns
|
| 336 |
+
category_cols = [col for col in df.columns if 'category' in col.lower() or 'type' in col.lower()]
|
| 337 |
+
if category_cols:
|
| 338 |
+
mask = ~df[category_cols[0]].str.contains('drink|beverage', case=False, na=False)
|
| 339 |
+
result = df[mask][target_cols[0]].sum()
|
| 340 |
+
else:
|
| 341 |
+
result = df[target_cols[0]].sum()
|
| 342 |
+
else:
|
| 343 |
+
result = df[target_cols[0]].sum()
|
| 344 |
+
|
| 345 |
+
elif "count" in analysis_type or "how many" in question.lower():
|
| 346 |
+
if "unique" in question.lower() and relevant_cols:
|
| 347 |
+
result = df[relevant_cols[0]].nunique()
|
| 348 |
+
else:
|
| 349 |
+
result = len(df)
|
| 350 |
+
|
| 351 |
+
elif "average" in analysis_type or "mean" in question.lower():
|
| 352 |
+
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
| 353 |
+
if relevant_cols:
|
| 354 |
+
target_cols = [col for col in relevant_cols if col in numeric_cols]
|
| 355 |
+
else:
|
| 356 |
+
target_cols = numeric_cols
|
| 357 |
+
if len(target_cols) > 0:
|
| 358 |
+
result = df[target_cols[0]].mean()
|
| 359 |
+
|
| 360 |
+
# Step 3: If we got a result, format it properly
|
| 361 |
+
if result is not None:
|
| 362 |
+
if isinstance(result, float):
|
| 363 |
+
if "USD" in question or "$" in question:
|
| 364 |
+
state["answer"] = f"{result:.2f}"
|
| 365 |
+
elif result.is_integer():
|
| 366 |
+
state["answer"] = str(int(result))
|
| 367 |
+
else:
|
| 368 |
+
state["answer"] = f"{result:.2f}".rstrip('0').rstrip('.')
|
| 369 |
+
else:
|
| 370 |
+
state["answer"] = str(result)
|
| 371 |
+
else:
|
| 372 |
+
# Step 4: Fall back to LLM analysis of the data
|
| 373 |
+
fallback_prompt = f"""You are a data analyst. Answer this question using the provided data.
|
| 374 |
+
|
| 375 |
+
Question: {question}
|
| 376 |
+
|
| 377 |
+
Data Summary:
|
| 378 |
+
{data_summary}
|
| 379 |
+
|
| 380 |
+
Instructions:
|
| 381 |
+
1. Look at the data structure and understand what each column represents
|
| 382 |
+
2. Perform the necessary calculations to answer the question
|
| 383 |
+
3. Be precise and show your reasoning
|
| 384 |
+
4. If you need to filter, aggregate, or calculate, explain what you're doing
|
| 385 |
+
|
| 386 |
+
{SYSTEM_PROMPT.strip()}"""
|
| 387 |
+
|
| 388 |
+
response = client.chat.completions.create(
|
| 389 |
+
model="gpt-4-turbo",
|
| 390 |
+
messages=[
|
| 391 |
+
{"role": "system", "content": "You are a data analyst. Provide precise answers based on data analysis."},
|
| 392 |
+
{"role": "user", "content": fallback_prompt},
|
| 393 |
+
],
|
| 394 |
+
max_tokens=400,
|
| 395 |
+
temperature=0.1,
|
| 396 |
+
)
|
| 397 |
+
|
| 398 |
+
raw_answer = response.choices[0].message.content
|
| 399 |
+
state["answer"] = extract_final_answer(raw_answer)
|
| 400 |
+
|
| 401 |
+
state["extracted_data"] = f"Analyzed {file_ext} file with {df.shape[0]} rows and {df.shape[1]} columns"
|
| 402 |
+
|
| 403 |
+
except Exception as e:
|
| 404 |
+
state["answer"] = f"Data analysis error: {str(e)}"
|
| 405 |
+
print(f"DEBUG: Data analysis error: {e}")
|
| 406 |
+
|
| 407 |
+
return state
|
| 408 |
+
|
| 409 |
+
# LLM-FIRST CALCULATOR: Let AI understand math problems
|
| 410 |
+
def CalculatorNode(state: AgentState) -> AgentState:
|
| 411 |
+
"""Intelligent calculator - let LLM understand and solve math problems"""
|
| 412 |
try:
|
| 413 |
question = state["question"]
|
| 414 |
+
|
| 415 |
+
math_prompt = f"""You are a mathematical expert. Solve this problem step by step.
|
| 416 |
|
| 417 |
+
Question: {question}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
|
| 419 |
+
Instructions:
|
| 420 |
+
1. Identify what type of mathematical problem this is
|
| 421 |
+
2. Break down the problem into steps
|
| 422 |
+
3. Perform the calculations carefully
|
| 423 |
+
4. Double-check your work
|
| 424 |
+
5. Provide the final numerical answer
|
| 425 |
|
| 426 |
+
If this involves:
|
| 427 |
+
- Tables or matrices: analyze the structure and perform the required operations
|
| 428 |
+
- Word problems: extract the numbers and operations needed
|
| 429 |
+
- Algebraic problems: solve systematically
|
| 430 |
+
- Logic problems: work through the logic step by step
|
| 431 |
|
| 432 |
+
Show your work clearly and provide the final answer.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
|
| 434 |
+
{SYSTEM_PROMPT.strip()}"""
|
|
|
|
| 435 |
|
| 436 |
response = client.chat.completions.create(
|
| 437 |
+
model="gpt-4-turbo",
|
| 438 |
messages=[
|
| 439 |
+
{"role": "system", "content": "You are a mathematical expert. Solve problems step by step with precision."},
|
| 440 |
+
{"role": "user", "content": math_prompt},
|
| 441 |
],
|
| 442 |
+
max_tokens=500,
|
| 443 |
+
temperature=0.1,
|
| 444 |
)
|
| 445 |
+
|
| 446 |
+
raw_answer = response.choices[0].message.content
|
| 447 |
+
state["answer"] = extract_final_answer(raw_answer)
|
| 448 |
+
state["extracted_data"] = raw_answer
|
| 449 |
+
|
| 450 |
except Exception as e:
|
| 451 |
+
state["answer"] = f"Calculation error: {str(e)}"
|
| 452 |
+
|
| 453 |
return state
|
| 454 |
|
| 455 |
+
# LLM-FIRST MULTI-STEP: Let AI plan and execute complex workflows
|
| 456 |
+
def MultiStepNode(state: AgentState) -> AgentState:
|
| 457 |
+
"""Intelligent multi-step processing - let LLM plan and orchestrate"""
|
| 458 |
+
try:
|
| 459 |
+
question = state["question"]
|
| 460 |
+
|
| 461 |
+
# Step 1: Let LLM create a detailed plan
|
| 462 |
+
planning_prompt = f"""You are a task planning expert. This question requires multiple steps to solve.
|
| 463 |
+
|
| 464 |
+
Question: {question}
|
| 465 |
+
|
| 466 |
+
Available tools:
|
| 467 |
+
- web_search: can search the internet for information
|
| 468 |
+
- data_analysis: can analyze CSV/Excel files
|
| 469 |
+
- calculation: can perform mathematical operations
|
| 470 |
+
- reasoning: can analyze and synthesize information
|
| 471 |
+
|
| 472 |
+
Create a detailed step-by-step plan to answer this question:
|
| 473 |
+
1. What information do you need to find?
|
| 474 |
+
2. What tools should be used in what order?
|
| 475 |
+
3. How will you combine the results?
|
| 476 |
+
|
| 477 |
+
Respond in JSON format:
|
| 478 |
+
{{
|
| 479 |
+
"steps": [
|
| 480 |
+
{{"step": 1, "action": "web_search", "goal": "find specific information", "query": "search query"}},
|
| 481 |
+
{{"step": 2, "action": "calculation", "goal": "perform calculation", "operation": "what to calculate"}},
|
| 482 |
+
{{"step": 3, "action": "reasoning", "goal": "synthesize results", "method": "how to combine"}}
|
| 483 |
+
],
|
| 484 |
+
"final_goal": "what the final answer should contain"
|
| 485 |
+
}}"""
|
| 486 |
+
|
| 487 |
+
planning_response = client.chat.completions.create(
|
| 488 |
+
model="gpt-4-turbo",
|
| 489 |
+
messages=[{"role": "user", "content": planning_prompt}],
|
| 490 |
+
max_tokens=400,
|
| 491 |
+
temperature=0.2,
|
| 492 |
+
)
|
| 493 |
+
|
| 494 |
+
try:
|
| 495 |
+
plan = json.loads(planning_response.choices[0].message.content)
|
| 496 |
+
steps = plan.get("steps", [])
|
| 497 |
+
except:
|
| 498 |
+
# Fallback: try web search then reasoning
|
| 499 |
+
steps = [
|
| 500 |
+
{"step": 1, "action": "web_search", "goal": "research the question"},
|
| 501 |
+
{"step": 2, "action": "reasoning", "goal": "analyze and answer"}
|
| 502 |
+
]
|
| 503 |
+
|
| 504 |
+
# Step 2: Execute the plan
|
| 505 |
+
step_results = []
|
| 506 |
+
for i, step in enumerate(steps[:4]): # Limit to 4 steps
|
| 507 |
+
action = step.get("action", "web_search")
|
| 508 |
+
goal = step.get("goal", "")
|
| 509 |
+
|
| 510 |
+
print(f"DEBUG: Executing step {i+1}: {action} - {goal}")
|
| 511 |
+
|
| 512 |
+
if action == "web_search":
|
| 513 |
+
# Execute web search step
|
| 514 |
+
temp_state = state.copy()
|
| 515 |
+
if "query" in step:
|
| 516 |
+
temp_state["question"] = step["query"]
|
| 517 |
+
temp_state = WebSearchNode(temp_state)
|
| 518 |
+
step_results.append(f"Step {i+1} ({action}): {temp_state['answer']}")
|
| 519 |
+
|
| 520 |
+
elif action == "calculation":
|
| 521 |
+
# Execute calculation step
|
| 522 |
+
temp_state = state.copy()
|
| 523 |
+
temp_state = CalculatorNode(temp_state)
|
| 524 |
+
step_results.append(f"Step {i+1} ({action}): {temp_state['answer']}")
|
| 525 |
+
|
| 526 |
+
elif action == "data_analysis":
|
| 527 |
+
# Execute data analysis step
|
| 528 |
+
temp_state = state.copy()
|
| 529 |
+
temp_state = DataExtractionNode(temp_state)
|
| 530 |
+
step_results.append(f"Step {i+1} ({action}): {temp_state['answer']}")
|
| 531 |
+
|
| 532 |
+
# Step 3: Let LLM synthesize all results
|
| 533 |
+
synthesis_prompt = f"""You are a synthesis expert. Combine these step results to answer the original question.
|
| 534 |
+
|
| 535 |
+
Original Question: {question}
|
| 536 |
+
|
| 537 |
+
Step Results:
|
| 538 |
+
{chr(10).join(step_results)}
|
| 539 |
+
|
| 540 |
+
Instructions:
|
| 541 |
+
1. Review all the step results
|
| 542 |
+
2. Identify which results are most relevant to the original question
|
| 543 |
+
3. Combine or calculate as needed to get the final answer
|
| 544 |
+
4. Ensure your answer directly addresses the original question
|
| 545 |
+
|
| 546 |
+
{SYSTEM_PROMPT.strip()}"""
|
| 547 |
|
| 548 |
+
synthesis_response = client.chat.completions.create(
|
| 549 |
+
model="gpt-4-turbo",
|
| 550 |
+
messages=[
|
| 551 |
+
{"role": "system", "content": "You are a synthesis expert. Provide precise final answers based on step results."},
|
| 552 |
+
{"role": "user", "content": synthesis_prompt},
|
| 553 |
+
],
|
| 554 |
+
max_tokens=400,
|
| 555 |
+
temperature=0.1,
|
| 556 |
+
)
|
| 557 |
+
|
| 558 |
+
raw_answer = synthesis_response.choices[0].message.content
|
| 559 |
+
state["answer"] = extract_final_answer(raw_answer)
|
| 560 |
+
state["extracted_data"] = f"Multi-step execution: {chr(10).join(step_results)}"
|
| 561 |
+
|
| 562 |
+
except Exception as e:
|
| 563 |
+
state["answer"] = f"Multi-step error: {str(e)}"
|
| 564 |
+
|
| 565 |
+
return state
|
| 566 |
|
| 567 |
+
# KEEP existing media nodes but make them LLM-first too
|
| 568 |
def ImageExtractionNode(state: AgentState) -> AgentState:
|
| 569 |
+
"""LLM-first image analysis"""
|
| 570 |
try:
|
| 571 |
content = None
|
| 572 |
|
| 573 |
if state.get("attachment_id") and state["attachment_id"] in ATTACHMENTS:
|
| 574 |
content = ATTACHMENTS[state["attachment_id"]]["content"]
|
| 575 |
elif "http" in state["question"]:
|
| 576 |
+
url_match = re.search(r"https?://\S+\.(jpg|jpeg|png|gif)", state["question"], re.I)
|
|
|
|
|
|
|
| 577 |
if url_match:
|
| 578 |
content = download_file(url_match.group(0))
|
| 579 |
|
| 580 |
if not content:
|
| 581 |
+
state["answer"] = "No image available to analyze"
|
| 582 |
+
return state
|
| 583 |
|
| 584 |
base64_image = base64.b64encode(content).decode()
|
| 585 |
|
| 586 |
+
# Enhanced prompt for better image analysis
|
| 587 |
+
enhanced_prompt = f"""Analyze this image carefully to answer the question.
|
| 588 |
+
|
| 589 |
+
Question: {state['question']}
|
| 590 |
+
|
| 591 |
+
Instructions:
|
| 592 |
+
1. Look at the image in detail
|
| 593 |
+
2. Identify all relevant elements that relate to the question
|
| 594 |
+
3. If this is a chess position, analyze the board state and possible moves
|
| 595 |
+
4. If this is a chart/graph, read the data carefully
|
| 596 |
+
5. Provide a precise answer based on what you can see
|
| 597 |
+
|
| 598 |
+
{SYSTEM_PROMPT.strip()}"""
|
| 599 |
|
| 600 |
response = client.chat.completions.create(
|
| 601 |
model="gpt-4-turbo",
|
| 602 |
messages=[
|
| 603 |
+
{"role": "system", "content": "You are an expert image analyst. Analyze images carefully and precisely."},
|
| 604 |
{
|
| 605 |
"role": "user",
|
| 606 |
"content": [
|
| 607 |
+
{"type": "text", "text": enhanced_prompt},
|
| 608 |
{
|
| 609 |
"type": "image_url",
|
| 610 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
|
|
|
|
|
|
|
| 611 |
},
|
| 612 |
],
|
| 613 |
},
|
| 614 |
],
|
| 615 |
+
max_tokens=400,
|
| 616 |
temperature=0.1,
|
| 617 |
)
|
| 618 |
raw_answer = response.choices[0].message.content
|
| 619 |
state["answer"] = extract_final_answer(raw_answer)
|
| 620 |
+
|
| 621 |
except Exception as e:
|
| 622 |
+
state["answer"] = f"Image analysis error: {str(e)}"
|
| 623 |
+
|
| 624 |
return state
|
| 625 |
|
|
|
|
| 626 |
def AudioExtractionNode(state: AgentState) -> AgentState:
|
| 627 |
+
"""LLM-first audio analysis"""
|
| 628 |
try:
|
| 629 |
content = None
|
| 630 |
|
|
|
|
| 632 |
content = ATTACHMENTS[state["attachment_id"]]["content"]
|
| 633 |
|
| 634 |
if not content:
|
| 635 |
+
state["answer"] = "No audio file available to analyze"
|
| 636 |
+
return state
|
| 637 |
|
| 638 |
with tempfile.NamedTemporaryFile(suffix=".mp3") as tmp:
|
| 639 |
tmp.write(content)
|
| 640 |
tmp.flush()
|
| 641 |
+
|
| 642 |
+
# Use whisper to transcribe
|
| 643 |
model = whisper.load_model("base")
|
| 644 |
result = model.transcribe(tmp.name)
|
| 645 |
transcription = result["text"]
|
| 646 |
|
| 647 |
+
# Enhanced prompt for better audio analysis
|
| 648 |
+
enhanced_prompt = f"""Analyze this audio transcription to answer the question.
|
| 649 |
|
| 650 |
+
Question: {state['question']}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 651 |
|
| 652 |
+
Audio Transcription:
|
| 653 |
+
{transcription}
|
| 654 |
|
| 655 |
+
Instructions:
|
| 656 |
+
1. Read through the transcription carefully
|
| 657 |
+
2. Extract the specific information requested in the question
|
| 658 |
+
3. If looking for ingredients, list only the ingredients mentioned
|
| 659 |
+
4. If looking for page numbers, extract only the numbers
|
| 660 |
+
5. Format your answer according to the question requirements
|
| 661 |
|
| 662 |
+
{SYSTEM_PROMPT.strip()}"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 663 |
|
| 664 |
response = client.chat.completions.create(
|
| 665 |
model="gpt-4-turbo",
|
| 666 |
messages=[
|
| 667 |
+
{"role": "system", "content": "You are an expert at analyzing audio transcriptions. Extract precise information."},
|
| 668 |
+
{"role": "user", "content": enhanced_prompt},
|
| 669 |
],
|
| 670 |
+
max_tokens=400,
|
| 671 |
temperature=0.1,
|
| 672 |
)
|
| 673 |
raw_answer = response.choices[0].message.content
|
| 674 |
state["answer"] = extract_final_answer(raw_answer)
|
| 675 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
except Exception as e:
|
| 677 |
+
state["answer"] = f"Audio processing error: {str(e)}"
|
| 678 |
+
|
| 679 |
return state
|
| 680 |
|
|
|
|
| 681 |
def VideoExtractionNode(state: AgentState) -> AgentState:
|
| 682 |
+
"""LLM-first video analysis"""
|
| 683 |
try:
|
| 684 |
+
youtube_match = re.search(r"https?://www\.youtube\.com/watch\?v=[a-zA-Z0-9_-]+", state["question"])
|
| 685 |
+
|
|
|
|
|
|
|
| 686 |
if youtube_match:
|
| 687 |
video_url = youtube_match.group(0)
|
| 688 |
transcript = get_youtube_transcript(video_url)
|
| 689 |
|
| 690 |
if not transcript:
|
| 691 |
+
# Try alternative transcript methods or fallback
|
| 692 |
+
state["answer"] = "Video transcript not available"
|
| 693 |
return state
|
| 694 |
|
| 695 |
+
# Enhanced prompt for better video analysis
|
| 696 |
+
enhanced_prompt = f"""Analyze this video transcript to answer the question.
|
| 697 |
+
|
| 698 |
+
Question: {state['question']}
|
| 699 |
+
|
| 700 |
+
Video Transcript:
|
| 701 |
+
{transcript}
|
| 702 |
+
|
| 703 |
+
Instructions:
|
| 704 |
+
1. Read through the entire transcript carefully
|
| 705 |
+
2. Look for the specific information requested
|
| 706 |
+
3. If looking for dialogue or quotes, find the exact words
|
| 707 |
+
4. If counting elements, go through systematically
|
| 708 |
+
5. Provide the precise answer requested
|
| 709 |
+
|
| 710 |
+
{SYSTEM_PROMPT.strip()}"""
|
| 711 |
|
| 712 |
response = client.chat.completions.create(
|
| 713 |
model="gpt-4-turbo",
|
| 714 |
messages=[
|
| 715 |
+
{"role": "system", "content": "You are an expert at analyzing video content. Extract precise information from transcripts."},
|
| 716 |
+
{"role": "user", "content": enhanced_prompt},
|
| 717 |
],
|
| 718 |
+
max_tokens=400,
|
| 719 |
temperature=0.1,
|
| 720 |
)
|
| 721 |
raw_answer = response.choices[0].message.content
|
| 722 |
state["answer"] = extract_final_answer(raw_answer)
|
| 723 |
else:
|
| 724 |
+
state["answer"] = "No valid YouTube URL found"
|
| 725 |
+
|
| 726 |
except Exception as e:
|
| 727 |
+
state["answer"] = f"Video processing error: {str(e)}"
|
| 728 |
+
|
| 729 |
return state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 730 |
|
| 731 |
+
# Keep the existing AnswerRefinementNode - it's already LLM-first
|
| 732 |
+
def AnswerRefinementNode(state: AgentState) -> AgentState:
|
| 733 |
+
try:
|
| 734 |
+
question = state["question"]
|
| 735 |
+
initial_answer = state["answer"]
|
| 736 |
+
extracted_data = state.get("extracted_data", "")
|
| 737 |
|
| 738 |
+
refinement_prompt = f"""
|
| 739 |
+
Original Question: {question}
|
| 740 |
+
Initial Answer: {initial_answer}
|
| 741 |
+
Extracted Context/Data: {extracted_data if extracted_data else "No specific data was extracted, the answer was generated based on general knowledge or initial processing."}
|
| 742 |
|
| 743 |
+
Your task is to critically review the Initial Answer in the context of the Original Question and Extracted Context/Data.
|
| 744 |
+
Refine the Initial Answer to ensure it is accurate, directly answers the question, and strictly follows the FINAL ANSWER formatting rules.
|
| 745 |
+
If the Initial Answer seems correct and appropriately formatted, you can simply re-state it.
|
| 746 |
+
If the Initial Answer is "unknown" or an error message, try to re-evaluate the question using the available context to provide a valid answer if possible.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 747 |
|
| 748 |
+
Strict FINAL ANSWER formatting rules:
|
| 749 |
+
- A number OR
|
| 750 |
+
- As few words as possible OR
|
| 751 |
+
- A comma separated list of numbers and/or strings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 752 |
|
| 753 |
+
Specific formatting rules:
|
| 754 |
+
1. For numbers:
|
| 755 |
+
- Don't use commas (e.g., 1000000 not 1,000,000)
|
| 756 |
+
- Don't include units ($, %, etc.) unless specified
|
| 757 |
+
2. For strings:
|
| 758 |
+
- Don't use articles (a, an, the)
|
| 759 |
+
- Don't use abbreviations for cities/names
|
| 760 |
+
- Write digits in plain text (e.g., "two" instead of "2")
|
| 761 |
+
3. For comma-separated lists:
|
| 762 |
+
- Apply the above rules to each element
|
| 763 |
+
- Separate elements with commas only (no spaces unless part of the element)
|
| 764 |
|
| 765 |
+
Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
|
| 766 |
+
"""
|
| 767 |
|
| 768 |
response = client.chat.completions.create(
|
| 769 |
model="gpt-4-turbo",
|
| 770 |
messages=[
|
| 771 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 772 |
+
{"role": "user", "content": refinement_prompt},
|
| 773 |
],
|
| 774 |
max_tokens=300,
|
| 775 |
temperature=0.1,
|
| 776 |
)
|
| 777 |
+
refined_raw_answer = response.choices[0].message.content
|
| 778 |
+
state["answer"] = extract_final_answer(refined_raw_answer)
|
| 779 |
+
print(f" Refinement Node: Initial Answer - '{initial_answer}', Refined Answer - '{state['answer']}'")
|
| 780 |
except Exception as e:
|
| 781 |
+
state["answer"] = f"Refinement error: {str(e)}"
|
| 782 |
+
print(f" Refinement Node Error: {e}")
|
| 783 |
return state
|
check_env 2.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
# Define project root (assuming check_env.py is in the root)
|
| 6 |
+
PROJECT_ROOT = Path(__file__).resolve().parent
|
| 7 |
+
|
| 8 |
+
print(f"Attempting to load .env from: {PROJECT_ROOT / '.env'}")
|
| 9 |
+
load_dotenv(dotenv_path=PROJECT_ROOT / ".env")
|
| 10 |
+
|
| 11 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
| 12 |
+
|
| 13 |
+
if api_key:
|
| 14 |
+
print(f"OPENAI_API_KEY successfully loaded: {api_key[:5]}...{api_key[-5:]} (last 5 chars)")
|
| 15 |
+
else:
|
| 16 |
+
print("OPENAI_API_KEY not found or empty after loading .env.")
|
| 17 |
+
|
| 18 |
+
print(f"Raw os.environ content (first 500 chars): {str(os.environ)[:500]}")
|
requirements 2.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openai>=1.0
|
| 2 |
+
pandas
|
| 3 |
+
numpy<2
|
| 4 |
+
wikipedia
|
| 5 |
+
chardet
|
| 6 |
+
openai-whisper
|
| 7 |
+
youtube-transcript-api
|
| 8 |
+
langgraph>=0.0.34
|
| 9 |
+
gradio==4.44.1
|
| 10 |
+
huggingface-hub
|
| 11 |
+
requests
|
| 12 |
+
tiktoken
|
| 13 |
+
python-magic
|
| 14 |
+
openpyxl
|
| 15 |
+
tabulate
|
| 16 |
+
langchain
|
| 17 |
+
openai-whisper
|
| 18 |
+
requests
|
| 19 |
+
python-dotenv
|
| 20 |
+
gradio[oauth]
|
| 21 |
+
duckduckgo-search
|
requirements_backup.txt
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openai>=1.0
|
| 2 |
+
pandas
|
| 3 |
+
numpy<2
|
| 4 |
+
wikipedia
|
| 5 |
+
chardet
|
| 6 |
+
openai-whisper
|
| 7 |
+
youtube-transcript-api
|
| 8 |
+
langgraph>=0.0.34
|
| 9 |
+
gradio==4.44.1
|
| 10 |
+
huggingface-hub
|
| 11 |
+
requests
|
| 12 |
+
tiktoken
|
| 13 |
+
python-magic
|
| 14 |
+
openpyxl
|
| 15 |
+
tabulate
|
| 16 |
+
langchain
|
| 17 |
+
openai-whisper
|
| 18 |
+
requests
|
| 19 |
+
python-dotenv
|
| 20 |
+
gradio[oauth]
|
| 21 |
+
duckduckgo-search
|
| 22 |
+
python-dotenv
|
| 23 |
+
requests
|
| 24 |
+
sympy
|
| 25 |
+
langchain-openai
|
tests/test_agent.py
CHANGED
|
@@ -129,14 +129,13 @@ def download_gaia_attachment_local(task_id: str):
|
|
| 129 |
return None
|
| 130 |
|
| 131 |
|
|
|
|
|
|
|
| 132 |
def run_local_agent_test():
|
| 133 |
-
"""
|
| 134 |
-
Runs the agent on downloaded GAIA questions and prints the results.
|
| 135 |
-
Does NOT submit answers to the scoring server.
|
| 136 |
-
"""
|
| 137 |
setup_test_environment()
|
| 138 |
|
| 139 |
-
#
|
| 140 |
questions = []
|
| 141 |
if QUESTIONS_FILE.exists():
|
| 142 |
with open(QUESTIONS_FILE, "r", encoding="utf-8") as f:
|
|
@@ -157,21 +156,28 @@ def run_local_agent_test():
|
|
| 157 |
for i, q in enumerate(questions):
|
| 158 |
print(f"\n--- Processing Question {i+1}/{len(questions)} (Task ID: {q['task_id']}) ---")
|
| 159 |
|
| 160 |
-
# Reset ATTACHMENTS for each question
|
| 161 |
ATTACHMENTS.clear()
|
| 162 |
|
|
|
|
| 163 |
attachment_id_for_state = None
|
| 164 |
-
if q.get("
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
else:
|
| 167 |
-
print(f"
|
| 168 |
|
| 169 |
initial_state = AgentState(
|
| 170 |
question=q["question"],
|
| 171 |
answer="",
|
| 172 |
extracted_data="",
|
| 173 |
media_type="",
|
| 174 |
-
attachment_id=attachment_id_for_state,
|
| 175 |
task_id=q["task_id"],
|
| 176 |
)
|
| 177 |
|
|
@@ -184,9 +190,10 @@ def run_local_agent_test():
|
|
| 184 |
"task_id": q["task_id"],
|
| 185 |
"question": q["question"],
|
| 186 |
"predicted_answer": predicted_answer,
|
|
|
|
| 187 |
})
|
| 188 |
-
print(f"\n Question: {q['question']}")
|
| 189 |
-
print(f" Agent's
|
| 190 |
|
| 191 |
except Exception as e:
|
| 192 |
error_msg = f"ERROR: Agent failed to process question {q['task_id']}: {e}"
|
|
@@ -195,20 +202,28 @@ def run_local_agent_test():
|
|
| 195 |
"task_id": q["task_id"],
|
| 196 |
"question": q["question"],
|
| 197 |
"predicted_answer": error_msg,
|
|
|
|
| 198 |
})
|
| 199 |
|
| 200 |
print("\n" + "="*50)
|
| 201 |
print("Local Agent Test Run Summary")
|
| 202 |
print("="*50 + "\n")
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
|
| 214 |
if __name__ == "__main__":
|
|
|
|
| 129 |
return None
|
| 130 |
|
| 131 |
|
| 132 |
+
# Smart attachment handling in your test_agent.py
|
| 133 |
+
|
| 134 |
def run_local_agent_test():
|
| 135 |
+
"""Runs the agent with smart attachment handling"""
|
|
|
|
|
|
|
|
|
|
| 136 |
setup_test_environment()
|
| 137 |
|
| 138 |
+
# Load questions
|
| 139 |
questions = []
|
| 140 |
if QUESTIONS_FILE.exists():
|
| 141 |
with open(QUESTIONS_FILE, "r", encoding="utf-8") as f:
|
|
|
|
| 156 |
for i, q in enumerate(questions):
|
| 157 |
print(f"\n--- Processing Question {i+1}/{len(questions)} (Task ID: {q['task_id']}) ---")
|
| 158 |
|
| 159 |
+
# Reset ATTACHMENTS for each question
|
| 160 |
ATTACHMENTS.clear()
|
| 161 |
|
| 162 |
+
# SMART: Only download if file_name is not empty
|
| 163 |
attachment_id_for_state = None
|
| 164 |
+
if q.get("file_name", "").strip():
|
| 165 |
+
print(f" File available: {q['file_name']}")
|
| 166 |
+
print(f" Attempting download from: {ATTACHMENT_BASE_URL}{q['task_id']}")
|
| 167 |
+
attachment_id_for_state = download_gaia_attachment_local(q["task_id"])
|
| 168 |
+
if attachment_id_for_state:
|
| 169 |
+
print(f" ✅ Downloaded: {ATTACHMENTS[attachment_id_for_state]['name']}")
|
| 170 |
+
else:
|
| 171 |
+
print(f" ❌ Download failed")
|
| 172 |
else:
|
| 173 |
+
print(f" No attachment for this question")
|
| 174 |
|
| 175 |
initial_state = AgentState(
|
| 176 |
question=q["question"],
|
| 177 |
answer="",
|
| 178 |
extracted_data="",
|
| 179 |
media_type="",
|
| 180 |
+
attachment_id=attachment_id_for_state,
|
| 181 |
task_id=q["task_id"],
|
| 182 |
)
|
| 183 |
|
|
|
|
| 190 |
"task_id": q["task_id"],
|
| 191 |
"question": q["question"],
|
| 192 |
"predicted_answer": predicted_answer,
|
| 193 |
+
"has_file": bool(q.get("file_name", "").strip()),
|
| 194 |
})
|
| 195 |
+
print(f"\n Question: {q['question'][:100]}...")
|
| 196 |
+
print(f" Agent's Answer: {predicted_answer}")
|
| 197 |
|
| 198 |
except Exception as e:
|
| 199 |
error_msg = f"ERROR: Agent failed to process question {q['task_id']}: {e}"
|
|
|
|
| 202 |
"task_id": q["task_id"],
|
| 203 |
"question": q["question"],
|
| 204 |
"predicted_answer": error_msg,
|
| 205 |
+
"has_file": bool(q.get("file_name", "").strip()),
|
| 206 |
})
|
| 207 |
|
| 208 |
print("\n" + "="*50)
|
| 209 |
print("Local Agent Test Run Summary")
|
| 210 |
print("="*50 + "\n")
|
| 211 |
+
|
| 212 |
+
# Categorize results
|
| 213 |
+
with_files = [r for r in results if r["has_file"]]
|
| 214 |
+
without_files = [r for r in results if not r["has_file"]]
|
| 215 |
+
|
| 216 |
+
print(f"Questions with files: {len(with_files)}")
|
| 217 |
+
print(f"Questions without files: {len(without_files)}")
|
| 218 |
+
print()
|
| 219 |
+
|
| 220 |
+
for res in results:
|
| 221 |
+
file_indicator = "📎" if res["has_file"] else "💬"
|
| 222 |
+
print(f"{file_indicator} Task ID: {res['task_id']}")
|
| 223 |
+
print(f" Question: {res['question'][:80]}...")
|
| 224 |
+
print(f" Answer: {res['predicted_answer']}\n")
|
| 225 |
+
|
| 226 |
+
print("\n--- Local Test Complete ---")
|
| 227 |
|
| 228 |
|
| 229 |
if __name__ == "__main__":
|