| |
|
|
| import os |
| import time |
| import traceback |
| import requests |
| import pandas as pd |
| import gradio as gr |
|
|
| |
| API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") |
| SPACE_ID = os.getenv("SPACE_ID", "framsouza/Final_Assignment_Template") |
| MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct") |
| HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") |
|
|
| if not HF_TOKEN or not SPACE_ID: |
| raise RuntimeError( |
| "β Please set both SPACE_ID and HUGGINGFACEHUB_API_TOKEN in your Space Secrets." |
| ) |
|
|
| HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} |
|
|
| WELCOME = """ |
| ## GAIA Benchmark Runner π |
| |
| Build your agent, score **β₯30%** to earn your Certificate, |
| and see where you land on the Student Leaderboard! |
| """ |
|
|
| |
| class GAIAAgent: |
| def __init__(self, model_id: str): |
| print(f"[DEBUG] Initializing GAIAAgent with model={model_id}") |
| self.model_id = model_id |
| self.headers = HEADERS |
|
|
| def answer(self, prompt: str) -> str: |
| payload = { |
| "inputs": prompt, |
| "parameters": { |
| "max_new_tokens": 512, |
| "temperature": 0.2 |
| } |
| } |
| url = f"https://api-inference.huggingface.co/models/{self.model_id}" |
| resp = requests.post(url, headers=self.headers, json=payload, timeout=60) |
| resp.raise_for_status() |
| data = resp.json() |
| if isinstance(data, list) and data and "generated_text" in data[0]: |
| return data[0]["generated_text"].strip() |
| return str(data) |
|
|
| |
| def run_and_submit_all(): |
| try: |
| |
| who = requests.get("https://huggingface.co/api/whoami-v2", headers=HEADERS, timeout=10) |
| who.raise_for_status() |
| username = who.json().get("user", {}).get("username") |
| if not username: |
| return "β Could not fetch your HF username. Check your token.", pd.DataFrame() |
|
|
| |
| q_resp = requests.get(f"{API_URL}/questions", timeout=15) |
| q_resp.raise_for_status() |
| questions = q_resp.json() or [] |
| if not questions: |
| return "β No questions returned; check your API_URL.", pd.DataFrame() |
|
|
| |
| agent = GAIAAgent(MODEL_ID) |
| results = [] |
| payload = [] |
| for task in questions: |
| tid = task["task_id"] |
| q = task.get("question", "") |
| try: |
| ans = agent.answer(q) |
| except Exception as e: |
| ans = f"ERROR: {e}" |
| results.append({"Task ID": tid, "Question": q, "Answer": ans}) |
| payload.append({"task_id": tid, "submitted_answer": ans}) |
| time.sleep(0.5) |
|
|
| |
| submission = { |
| "username": username, |
| "agent_code": f"https://huggingface.co/spaces/{SPACE_ID}/tree/main", |
| "answers": payload |
| } |
| s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60) |
| s_resp.raise_for_status() |
| data = s_resp.json() |
|
|
| |
| status = ( |
| f"β
**Submission Successful!**\n\n" |
| f"**User:** {data.get('username')}\n" |
| f"**Score:** {data.get('score')}% " |
| f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n" |
| f"**Message:** {data.get('message')}" |
| ) |
| return status, pd.DataFrame(results) |
|
|
| except Exception as e: |
| tb = traceback.format_exc() |
| print("[ERROR] Unhandled exception:\n", tb) |
| return f"β Unexpected error:\n{e}\n\nSee logs for details.", pd.DataFrame() |
|
|
| |
| with gr.Blocks() as demo: |
| gr.Markdown(WELCOME) |
| run_btn = gr.Button("βΆοΈ Run GAIA Benchmark") |
| status = gr.Markdown() |
| table_df = gr.Dataframe(headers=["Task ID", "Question", "Answer"], wrap=True) |
|
|
| run_btn.click( |
| fn=run_and_submit_all, |
| inputs=[], |
| outputs=[status, table_df] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|