Files changed (1) hide show
  1. app.py +369 -86
app.py CHANGED
@@ -1,34 +1,329 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
6
 
7
- # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
- # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
- username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
@@ -38,15 +333,15 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
38
  questions_url = f"{api_url}/questions"
39
  submit_url = f"{api_url}/submit"
40
 
41
- # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
- print(agent_code)
50
 
51
  # 2. Fetch Questions
52
  print(f"Fetching questions from: {questions_url}")
@@ -55,51 +350,59 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
55
  response.raise_for_status()
56
  questions_data = response.json()
57
  if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
  print(f"Fetched {len(questions_data)} questions.")
61
- except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
- except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
- except Exception as e:
69
- print(f"An unexpected error occurred fetching questions: {e}")
70
- return f"An unexpected error occurred fetching questions: {e}", None
71
 
72
- # 3. Run your Agent
73
  results_log = []
74
  answers_payload = []
75
  print(f"Running agent on {len(questions_data)} questions...")
76
- for item in questions_data:
 
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
 
 
82
  try:
83
- submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
89
 
90
  if not answers_payload:
91
- print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
98
 
99
- # 5. Submit
100
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
  try:
102
- response = requests.post(submit_url, json=submission_data, timeout=60)
103
  response.raise_for_status()
104
  result_data = response.json()
105
  final_status = (
@@ -117,22 +420,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
117
  try:
118
  error_json = e.response.json()
119
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
- except requests.exceptions.JSONDecodeError:
121
  error_detail += f" Response: {e.response.text[:500]}"
122
  status_message = f"Submission Failed: {error_detail}"
123
  print(status_message)
124
  results_df = pd.DataFrame(results_log)
125
  return status_message, results_df
126
- except requests.exceptions.Timeout:
127
- status_message = "Submission Failed: The request timed out."
128
- print(status_message)
129
- results_df = pd.DataFrame(results_log)
130
- return status_message, results_df
131
- except requests.exceptions.RequestException as e:
132
- status_message = f"Submission Failed: Network error - {e}"
133
- print(status_message)
134
- results_df = pd.DataFrame(results_log)
135
- return status_message, results_df
136
  except Exception as e:
137
  status_message = f"An unexpected error occurred during submission: {e}"
138
  print(status_message)
@@ -140,57 +433,47 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
140
  return status_message, results_df
141
 
142
 
143
- # --- Build Gradio Interface using Blocks ---
144
  with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
  gr.Markdown(
147
  """
148
- **Instructions:**
149
 
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
153
 
154
- ---
155
- **Disclaimers:**
156
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
 
 
158
  """
159
  )
160
 
161
  gr.LoginButton()
162
 
163
- run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- # Removed max_rows=10 from DataFrame constructor
167
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
  run_button.click(
170
  fn=run_and_submit_all,
171
- outputs=[status_output, results_table]
172
  )
173
 
174
  if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
- # Check for SPACE_HOST and SPACE_ID at startup for information
177
- space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
-
180
- if space_host_startup:
181
- print(f"✅ SPACE_HOST found: {space_host_startup}")
182
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
183
- else:
184
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
-
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
- print(f"✅ SPACE_ID found: {space_id_startup}")
188
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
- else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
192
 
193
- print("-"*(60 + len(" App Starting ")) + "\n")
 
 
 
 
194
 
195
- print("Launching Gradio Interface for Basic Agent Evaluation...")
 
196
  demo.launch(debug=True, share=False)
 
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
+ import tempfile
6
+ from smolagents import (
7
+ CodeAgent,
8
+ DuckDuckGoSearchTool,
9
+ LiteLLMModel,
10
+ HfApiModel,
11
+ tool,
12
+ )
13
 
 
14
  # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
17
+ # --- GAIA System Prompt (adapted from official GAIA benchmark) ---
18
+ GAIA_SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
19
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
20
+ If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
21
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
22
+ If you are asked for a comma separated list, apply the above rules depending on whether the element to be put in the list is a number or a string.
23
+
24
+ IMPORTANT: Your FINAL ANSWER must be precise and concise. No extra words, no explanations after it.
25
+ """
26
+
27
+ # --- Custom Tools ---
28
+
29
+ @tool
30
+ def visit_webpage(url: str) -> str:
31
+ """Visits a webpage at the given URL and returns its text content.
32
+ Use this to read articles, documentation, or any web page.
33
+
34
+ Args:
35
+ url: The full URL of the webpage to visit (must start with http:// or https://).
36
+ """
37
+ try:
38
+ import re
39
+ headers = {
40
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
41
+ }
42
+ response = requests.get(url, headers=headers, timeout=30)
43
+ response.raise_for_status()
44
+
45
+ # Try to extract text from HTML
46
+ try:
47
+ from markdownify import markdownify
48
+ text = markdownify(response.text, heading_style="ATX")
49
+ except ImportError:
50
+ # Fallback: basic HTML tag removal
51
+ text = re.sub(r'<script[^>]*>.*?</script>', '', response.text, flags=re.DOTALL)
52
+ text = re.sub(r'<style[^>]*>.*?</style>', '', response.text, flags=re.DOTALL)
53
+ text = re.sub(r'<[^>]+>', ' ', text)
54
+ text = re.sub(r'\s+', ' ', text).strip()
55
+
56
+ # Truncate to avoid token limits
57
+ if len(text) > 15000:
58
+ text = text[:15000] + "\n\n[Content truncated - page too long]"
59
+ return text
60
+ except Exception as e:
61
+ return f"Error visiting {url}: {str(e)}"
62
+
63
+
64
+ @tool
65
+ def download_gaia_file(task_id: str) -> str:
66
+ """Downloads a file associated with a GAIA task and returns the local file path.
67
+ Use this when a question references an attached file that you need to read or analyze.
68
+
69
+ Args:
70
+ task_id: The task_id string for the GAIA question that has an associated file.
71
+ """
72
+ try:
73
+ api_url = DEFAULT_API_URL
74
+ file_url = f"{api_url}/files/{task_id}"
75
+ response = requests.get(file_url, timeout=30)
76
+ response.raise_for_status()
77
+
78
+ # Determine file extension from content-disposition or content-type
79
+ content_disp = response.headers.get("content-disposition", "")
80
+ content_type = response.headers.get("content-type", "")
81
+
82
+ ext = ".bin"
83
+ if "filename=" in content_disp:
84
+ import re
85
+ match = re.search(r'filename="?([^";\s]+)"?', content_disp)
86
+ if match:
87
+ fname = match.group(1)
88
+ if "." in fname:
89
+ ext = "." + fname.rsplit(".", 1)[-1]
90
+ elif "text/plain" in content_type:
91
+ ext = ".txt"
92
+ elif "text/csv" in content_type:
93
+ ext = ".csv"
94
+ elif "application/json" in content_type:
95
+ ext = ".json"
96
+ elif "audio" in content_type:
97
+ ext = ".mp3"
98
+ elif "image/png" in content_type:
99
+ ext = ".png"
100
+ elif "image/jpeg" in content_type:
101
+ ext = ".jpg"
102
+ elif "application/pdf" in content_type:
103
+ ext = ".pdf"
104
+ elif "spreadsheet" in content_type or "excel" in content_type:
105
+ ext = ".xlsx"
106
+
107
+ # Save to temp file
108
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
109
+ tmp.write(response.content)
110
+ tmp.close()
111
+
112
+ return f"File downloaded to: {tmp.name} (type: {content_type}, size: {len(response.content)} bytes)"
113
+ except Exception as e:
114
+ return f"Error downloading file for task {task_id}: {str(e)}"
115
+
116
+
117
+ @tool
118
+ def read_file_content(file_path: str) -> str:
119
+ """Reads and returns the content of a local file.
120
+ Supports text files (.txt, .csv, .json, .py, .md), and attempts to read Excel/PDF files.
121
+
122
+ Args:
123
+ file_path: The absolute path to the file to read.
124
+ """
125
+ try:
126
+ ext = file_path.rsplit(".", 1)[-1].lower() if "." in file_path else ""
127
+
128
+ if ext in ("txt", "csv", "json", "py", "md", "html", "xml", "log", "tsv"):
129
+ with open(file_path, "r", encoding="utf-8", errors="replace") as f:
130
+ content = f.read()
131
+ if len(content) > 20000:
132
+ content = content[:20000] + "\n\n[Content truncated]"
133
+ return content
134
+
135
+ elif ext in ("xlsx", "xls"):
136
+ import openpyxl
137
+ wb = openpyxl.load_workbook(file_path, data_only=True)
138
+ result = []
139
+ for sheet_name in wb.sheetnames:
140
+ ws = wb[sheet_name]
141
+ result.append(f"=== Sheet: {sheet_name} ===")
142
+ for row in ws.iter_rows(values_only=True):
143
+ result.append("\t".join(str(c) if c is not None else "" for c in row))
144
+ return "\n".join(result)
145
+
146
+ elif ext == "pdf":
147
+ try:
148
+ import PyPDF2
149
+ with open(file_path, "rb") as f:
150
+ reader = PyPDF2.PdfReader(f)
151
+ text = []
152
+ for page in reader.pages:
153
+ text.append(page.extract_text() or "")
154
+ return "\n".join(text)
155
+ except ImportError:
156
+ return "PyPDF2 not available. Cannot read PDF files."
157
+
158
+ else:
159
+ # Try reading as binary and decode
160
+ with open(file_path, "rb") as f:
161
+ raw = f.read(5000)
162
+ try:
163
+ return raw.decode("utf-8")
164
+ except UnicodeDecodeError:
165
+ return f"Binary file ({ext}), size: {os.path.getsize(file_path)} bytes. Cannot display as text."
166
+
167
+ except Exception as e:
168
+ return f"Error reading file {file_path}: {str(e)}"
169
+
170
+
171
+ @tool
172
+ def wikipedia_search(query: str) -> str:
173
+ """Searches Wikipedia and returns a summary of the most relevant article.
174
+ Useful for factual questions about people, places, events, science, etc.
175
+
176
+ Args:
177
+ query: The search query to look up on Wikipedia.
178
+ """
179
+ try:
180
+ import wikipedia
181
+ results = wikipedia.search(query, results=3)
182
+ if not results:
183
+ return f"No Wikipedia results found for: {query}"
184
+
185
+ # Try to get the first result's summary
186
+ for result_title in results:
187
+ try:
188
+ page = wikipedia.page(result_title, auto_suggest=False)
189
+ summary = page.summary
190
+ if len(summary) > 5000:
191
+ summary = summary[:5000] + "..."
192
+ return f"Wikipedia: {page.title}\n\n{summary}\n\nURL: {page.url}"
193
+ except (wikipedia.DisambiguationError, wikipedia.PageError):
194
+ continue
195
+
196
+ return f"Could not retrieve Wikipedia page for: {query}"
197
+ except Exception as e:
198
+ return f"Error searching Wikipedia: {str(e)}"
199
+
200
+
201
+ @tool
202
+ def perform_calculation(python_expression: str) -> str:
203
+ """Evaluates a mathematical Python expression and returns the result.
204
+ Use this for arithmetic, mathematical calculations, date computations, etc.
205
+
206
+ Args:
207
+ python_expression: A valid Python expression to evaluate (e.g. '2**10', 'round(3.14159, 2)', 'sum(range(1,101))').
208
+ """
209
+ try:
210
+ import math
211
+ import datetime
212
+ allowed_globals = {
213
+ "__builtins__": {},
214
+ "math": math,
215
+ "abs": abs, "round": round, "min": min, "max": max,
216
+ "sum": sum, "len": len, "sorted": sorted,
217
+ "int": int, "float": float, "str": str,
218
+ "list": list, "range": range, "enumerate": enumerate,
219
+ "zip": zip, "map": map, "filter": filter,
220
+ "pow": pow, "divmod": divmod,
221
+ "datetime": datetime,
222
+ }
223
+ result = eval(python_expression, allowed_globals)
224
+ return str(result)
225
+ except Exception as e:
226
+ return f"Calculation error: {str(e)}"
227
+
228
+
229
+ # --- Agent Class ---
230
+ class GaiaAgent:
231
  def __init__(self):
232
+ print("Initializing GaiaAgent...")
233
+
234
+ # Choose model - try LiteLLM with Anthropic first, fall back to HfApiModel
235
+ anthropic_key = os.getenv("ANTHROPIC_API_KEY")
236
+ openai_key = os.getenv("OPENAI_API_KEY")
237
+ hf_token = os.getenv("HF_TOKEN")
238
+
239
+ if anthropic_key:
240
+ print("Using Anthropic Claude via LiteLLM")
241
+ self.model = LiteLLMModel(
242
+ model_id="anthropic/claude-sonnet-4-20250514",
243
+ api_key=anthropic_key,
244
+ max_tokens=4096,
245
+ temperature=0.1,
246
+ )
247
+ elif openai_key:
248
+ print("Using OpenAI via LiteLLM")
249
+ self.model = LiteLLMModel(
250
+ model_id="openai/gpt-4o",
251
+ api_key=openai_key,
252
+ max_tokens=4096,
253
+ temperature=0.1,
254
+ )
255
+ else:
256
+ print("Using HfApiModel (free inference)")
257
+ self.model = HfApiModel(
258
+ model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
259
+ token=hf_token,
260
+ )
261
+
262
+ # Build tools list
263
+ self.tools = [
264
+ DuckDuckGoSearchTool(),
265
+ visit_webpage,
266
+ download_gaia_file,
267
+ read_file_content,
268
+ wikipedia_search,
269
+ perform_calculation,
270
+ ]
271
+
272
+ # Create agent
273
+ self.agent = CodeAgent(
274
+ model=self.model,
275
+ tools=self.tools,
276
+ max_steps=12,
277
+ verbosity_level=1,
278
+ additional_authorized_imports=[
279
+ "json", "re", "math", "datetime", "collections",
280
+ "itertools", "functools", "statistics", "string",
281
+ "csv", "io", "os", "unicodedata",
282
+ ],
283
+ )
284
+
285
+ print("GaiaAgent initialized successfully.")
286
+
287
+ def __call__(self, question: str, task_id: str = None) -> str:
288
+ """Run the agent on a question and extract the final answer."""
289
+ print(f"\n{'='*60}")
290
+ print(f"Question (task_id={task_id}): {question[:100]}...")
291
+ print(f"{'='*60}")
292
+
293
+ # Build the prompt with task context
294
+ prompt = GAIA_SYSTEM_PROMPT + "\n\n"
295
+ if task_id:
296
+ prompt += f"[Note: This question has task_id='{task_id}'. If the question references an attached file, use the download_gaia_file tool with this task_id to get it.]\n\n"
297
+ prompt += f"Question: {question}"
298
+
299
+ try:
300
+ raw_answer = self.agent.run(prompt)
301
+ answer = str(raw_answer).strip()
302
+
303
+ # Extract just the final answer if the agent included "FINAL ANSWER:"
304
+ if "FINAL ANSWER:" in answer:
305
+ answer = answer.split("FINAL ANSWER:")[-1].strip()
306
+
307
+ # Clean up common artifacts
308
+ answer = answer.strip('"\'').strip()
309
+
310
+ print(f"Agent answer: {answer}")
311
+ return answer
312
+
313
+ except Exception as e:
314
+ print(f"Agent error: {e}")
315
+ return f"Error: {str(e)}"
316
+
317
+
318
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
319
  """
320
+ Fetches all questions, runs the GaiaAgent on them, submits all answers,
321
  and displays the results.
322
  """
323
+ space_id = os.getenv("SPACE_ID")
 
324
 
325
  if profile:
326
+ username = f"{profile.username}"
327
  print(f"User logged in: {username}")
328
  else:
329
  print("User not logged in.")
 
333
  questions_url = f"{api_url}/questions"
334
  submit_url = f"{api_url}/submit"
335
 
336
+ # 1. Instantiate Agent
337
  try:
338
+ agent = GaiaAgent()
339
  except Exception as e:
340
  print(f"Error instantiating agent: {e}")
341
  return f"Error initializing agent: {e}", None
342
+
343
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
344
+ print(f"Agent code URL: {agent_code}")
345
 
346
  # 2. Fetch Questions
347
  print(f"Fetching questions from: {questions_url}")
 
350
  response.raise_for_status()
351
  questions_data = response.json()
352
  if not questions_data:
353
+ return "Fetched questions list is empty or invalid format.", None
 
354
  print(f"Fetched {len(questions_data)} questions.")
355
+ except Exception as e:
356
  print(f"Error fetching questions: {e}")
357
  return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
358
 
359
+ # 3. Run Agent on each question
360
  results_log = []
361
  answers_payload = []
362
  print(f"Running agent on {len(questions_data)} questions...")
363
+
364
+ for i, item in enumerate(questions_data):
365
  task_id = item.get("task_id")
366
  question_text = item.get("question")
367
  if not task_id or question_text is None:
368
  print(f"Skipping item with missing task_id or question: {item}")
369
  continue
370
+
371
+ print(f"\n--- Question {i+1}/{len(questions_data)} (task_id: {task_id}) ---")
372
  try:
373
+ submitted_answer = agent(question_text, task_id=task_id)
374
+ answers_payload.append({
375
+ "task_id": task_id,
376
+ "submitted_answer": submitted_answer,
377
+ })
378
+ results_log.append({
379
+ "Task ID": task_id,
380
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
381
+ "Submitted Answer": submitted_answer,
382
+ })
383
  except Exception as e:
384
+ print(f"Error running agent on task {task_id}: {e}")
385
+ results_log.append({
386
+ "Task ID": task_id,
387
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
388
+ "Submitted Answer": f"AGENT ERROR: {e}",
389
+ })
390
 
391
  if not answers_payload:
 
392
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
393
 
394
+ # 4. Submit
395
+ submission_data = {
396
+ "username": username.strip(),
397
+ "agent_code": agent_code,
398
+ "answers": answers_payload,
399
+ }
400
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
401
  print(status_update)
402
 
 
403
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
404
  try:
405
+ response = requests.post(submit_url, json=submission_data, timeout=120)
406
  response.raise_for_status()
407
  result_data = response.json()
408
  final_status = (
 
420
  try:
421
  error_json = e.response.json()
422
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
423
+ except Exception:
424
  error_detail += f" Response: {e.response.text[:500]}"
425
  status_message = f"Submission Failed: {error_detail}"
426
  print(status_message)
427
  results_df = pd.DataFrame(results_log)
428
  return status_message, results_df
 
 
 
 
 
 
 
 
 
 
429
  except Exception as e:
430
  status_message = f"An unexpected error occurred during submission: {e}"
431
  print(status_message)
 
433
  return status_message, results_df
434
 
435
 
436
+ # --- Gradio Interface ---
437
  with gr.Blocks() as demo:
438
+ gr.Markdown("# 🤖 GAIA Benchmark Agent")
439
  gr.Markdown(
440
  """
441
+ **Agent powered by smolagents + CodeAgent**
442
 
443
+ This agent uses web search, Wikipedia, file handling, and code execution to answer GAIA benchmark questions.
 
 
444
 
445
+ **Instructions:**
446
+ 1. Log in to your Hugging Face account below.
447
+ 2. Click 'Run Evaluation & Submit All Answers' to start.
448
+ 3. The agent will process all 20 questions (this may take several minutes).
449
+
450
+ **Tools available:** DuckDuckGo Search, Web Page Reader, Wikipedia, File Download/Reader, Python Calculator
451
  """
452
  )
453
 
454
  gr.LoginButton()
455
 
456
+ run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary")
457
 
458
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
459
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
460
 
461
  run_button.click(
462
  fn=run_and_submit_all,
463
+ outputs=[status_output, results_table],
464
  )
465
 
466
  if __name__ == "__main__":
467
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
468
+ space_host = os.getenv("SPACE_HOST")
469
+ space_id = os.getenv("SPACE_ID")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
470
 
471
+ if space_host:
472
+ print(f"✅ SPACE_HOST: {space_host}")
473
+ if space_id:
474
+ print(f"✅ SPACE_ID: {space_id}")
475
+ print(f" Repo: https://huggingface.co/spaces/{space_id}/tree/main")
476
 
477
+ print("-" * 60)
478
+ print("Launching Gradio Interface...")
479
  demo.launch(debug=True, share=False)