mabelwang21 commited on
Commit
97c9b88
·
1 Parent(s): f3a76b7

init test simple agent

Browse files
Files changed (7) hide show
  1. README.md +2 -0
  2. agent.py +118 -0
  3. app.py +198 -0
  4. metadata.jsonl +0 -0
  5. requirements.txt +10 -0
  6. tool_counts.csv +94 -0
  7. view_jsonfile.ipynb +381 -0
README.md CHANGED
@@ -7,6 +7,8 @@ sdk: gradio
7
  sdk_version: 5.27.0
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
7
  sdk_version: 5.27.0
8
  app_file: app.py
9
  pinned: false
10
+ hf_oauth: true
11
+ hf_oauth_expiration_minutes: 480
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
agent.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import ToolCallingAgent, tool
2
+ from langchain_community.tools import DuckDuckGoSearchRun
3
+ from langchain_community.utilities import WikipediaAPIWrapper
4
+ from langchain.tools import BaseTool
5
+ from PIL import Image
6
+ import pytesseract
7
+ import fitz
8
+ import ast
9
+ import os
10
+
11
+ # -------------------- TOOL DEFINITIONS --------------------
12
+
13
+
14
+ @tool
15
+ def web_search(query: str) -> str:
16
+ """
17
+ Search the web using DuckDuckGo.
18
+
19
+ Args:
20
+ query (str): The search query string.
21
+
22
+ Returns:
23
+ str: Summary of search results.
24
+ """
25
+ search = DuckDuckGoSearchRun()
26
+ return search.run(query)
27
+
28
+ @tool
29
+ def wikipedia_search(query: str) -> str:
30
+ """
31
+ Look up a topic on Wikipedia and return relevant content.
32
+
33
+ Args:
34
+ query (str): The topic or term to search on Wikipedia.
35
+
36
+ Returns:
37
+ str: Extracted Wikipedia content.
38
+ """
39
+ wiki = WikipediaQueryRun()
40
+ return wiki.run(query)
41
+
42
+ @tool
43
+ def image_recognition(image_path: str) -> str:
44
+ """
45
+ Perform OCR on an image to extract text.
46
+
47
+ Args:
48
+ image_path (str): Path to the image file.
49
+
50
+ Returns:
51
+ str: Extracted text from the image.
52
+ """
53
+ img = Image.open(image_path)
54
+ return pytesseract.image_to_string(img)
55
+
56
+ @tool
57
+ def read_pdf(pdf_path: str) -> str:
58
+ """
59
+ Extract all text from a PDF document.
60
+
61
+ Args:
62
+ pdf_path (str): Path to the PDF file.
63
+
64
+ Returns:
65
+ str: Text content of the PDF.
66
+ """
67
+ doc = fitz.open(pdf_path)
68
+ return "".join(page.get_text() for page in doc)
69
+
70
+ @tool
71
+ def calculate(expr: str) -> float:
72
+ """
73
+ Evaluate a simple math expression.
74
+
75
+ Args:
76
+ expr (str): The math expression to evaluate.
77
+
78
+ Returns:
79
+ float: Result of the expression.
80
+ """
81
+ def _eval(node):
82
+ if isinstance(node, ast.BinOp):
83
+ left = _eval(node.left)
84
+ right = _eval(node.right)
85
+ if isinstance(node.op, ast.Add): return left + right
86
+ if isinstance(node.op, ast.Sub): return left - right
87
+ if isinstance(node.op, ast.Mult): return left * right
88
+ if isinstance(node.op, ast.Div): return left / right
89
+ if isinstance(node.op, ast.Pow): return left ** right
90
+ elif isinstance(node, ast.UnaryOp):
91
+ operand = _eval(node.operand)
92
+ if isinstance(node.op, ast.UAdd): return +operand
93
+ if isinstance(node.op, ast.USub): return -operand
94
+ elif isinstance(node, ast.Num):
95
+ return node.n
96
+ else:
97
+ raise TypeError(f"Unsupported type: {node}")
98
+ parsed = ast.parse(expr, mode='eval').body
99
+ return _eval(parsed)
100
+
101
+ # -------------------- AGENT CLASS --------------------
102
+
103
+ tools = [web_search, wikipedia_search, image_recognition, read_pdf, calculate]
104
+
105
+ class MyAgent:
106
+ def __init__(self):
107
+ from smolagents import HfApiModel
108
+ self.agent = ToolCallingAgent(
109
+ tools=tools,
110
+ model=HfApiModel("openai/gpt-3.5-turbo") # or another supported model
111
+ )
112
+
113
+ def __call__(self, question: str) -> str:
114
+ try:
115
+ result = self.agent.run(question)
116
+ return f"FINAL ANSWER: {result.strip()}"
117
+ except Exception as e:
118
+ return f"FINAL ANSWER: ERROR - {e}"
app.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import inspect
5
+ import pandas as pd
6
+ from agent import MyAgent
7
+
8
+ # (Keep Constants as is)
9
+ # --- Constants ---
10
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
+
12
+ # --- Basic Agent Definition ---
13
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
14
+ class BasicAgent:
15
+ def __init__(self):
16
+ print("BasicAgent initialized.")
17
+ def __call__(self, question: str) -> str:
18
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
19
+ fixed_answer = "This is a default answer."
20
+ print(f"Agent returning fixed answer: {fixed_answer}")
21
+ return fixed_answer
22
+
23
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
24
+ """
25
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
26
+ and displays the results.
27
+ """
28
+ # --- Determine HF Space Runtime URL and Repo URL ---
29
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
30
+
31
+ if profile:
32
+ username= f"{profile.username}"
33
+ print(f"User logged in: {username}")
34
+ else:
35
+ print("User not logged in.")
36
+ return "Please Login to Hugging Face with the button.", None
37
+
38
+ api_url = DEFAULT_API_URL
39
+ questions_url = f"{api_url}/questions"
40
+ submit_url = f"{api_url}/submit"
41
+
42
+ # 1. Instantiate Agent ( modify this part to create your agent)
43
+ try:
44
+ agent = MyAgent()
45
+
46
+ except Exception as e:
47
+ print(f"Error instantiating agent: {e}")
48
+ return f"Error initializing agent: {e}", None
49
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
50
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
51
+ print(agent_code)
52
+
53
+ # 2. Fetch Questions
54
+ print(f"Fetching questions from: {questions_url}")
55
+ try:
56
+ response = requests.get(questions_url, timeout=15)
57
+ response.raise_for_status()
58
+ questions_data = response.json()
59
+ if not questions_data:
60
+ print("Fetched questions list is empty.")
61
+ return "Fetched questions list is empty or invalid format.", None
62
+ print(f"Fetched {len(questions_data)} questions.")
63
+ except requests.exceptions.RequestException as e:
64
+ print(f"Error fetching questions: {e}")
65
+ return f"Error fetching questions: {e}", None
66
+ except requests.exceptions.JSONDecodeError as e:
67
+ print(f"Error decoding JSON response from questions endpoint: {e}")
68
+ print(f"Response text: {response.text[:500]}")
69
+ return f"Error decoding server response for questions: {e}", None
70
+ except Exception as e:
71
+ print(f"An unexpected error occurred fetching questions: {e}")
72
+ return f"An unexpected error occurred fetching questions: {e}", None
73
+
74
+ # 3. Run your Agent
75
+ results_log = []
76
+ answers_payload = []
77
+ print(f"Running agent on {len(questions_data)} questions...")
78
+ for item in questions_data:
79
+ task_id = item.get("task_id")
80
+ question_text = item.get("question")
81
+ if not task_id or question_text is None:
82
+ print(f"Skipping item with missing task_id or question: {item}")
83
+ continue
84
+ try:
85
+ submitted_answer = agent(question_text)
86
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
87
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
88
+ except Exception as e:
89
+ print(f"Error running agent on task {task_id}: {e}")
90
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
91
+
92
+ if not answers_payload:
93
+ print("Agent did not produce any answers to submit.")
94
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
95
+
96
+ # 4. Prepare Submission
97
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
98
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
99
+ print(status_update)
100
+
101
+ # 5. Submit
102
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
103
+ try:
104
+ response = requests.post(submit_url, json=submission_data, timeout=60)
105
+ response.raise_for_status()
106
+ result_data = response.json()
107
+ final_status = (
108
+ f"Submission Successful!\n"
109
+ f"User: {result_data.get('username')}\n"
110
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
111
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
112
+ f"Message: {result_data.get('message', 'No message received.')}"
113
+ )
114
+ print("Submission successful.")
115
+ results_df = pd.DataFrame(results_log)
116
+ return final_status, results_df
117
+ except requests.exceptions.HTTPError as e:
118
+ error_detail = f"Server responded with status {e.response.status_code}."
119
+ try:
120
+ error_json = e.response.json()
121
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
122
+ except requests.exceptions.JSONDecodeError:
123
+ error_detail += f" Response: {e.response.text[:500]}"
124
+ status_message = f"Submission Failed: {error_detail}"
125
+ print(status_message)
126
+ results_df = pd.DataFrame(results_log)
127
+ return status_message, results_df
128
+ except requests.exceptions.Timeout:
129
+ status_message = "Submission Failed: The request timed out."
130
+ print(status_message)
131
+ results_df = pd.DataFrame(results_log)
132
+ return status_message, results_df
133
+ except requests.exceptions.RequestException as e:
134
+ status_message = f"Submission Failed: Network error - {e}"
135
+ print(status_message)
136
+ results_df = pd.DataFrame(results_log)
137
+ return status_message, results_df
138
+ except Exception as e:
139
+ status_message = f"An unexpected error occurred during submission: {e}"
140
+ print(status_message)
141
+ results_df = pd.DataFrame(results_log)
142
+ return status_message, results_df
143
+
144
+
145
+ # --- Build Gradio Interface using Blocks ---
146
+ with gr.Blocks() as demo:
147
+ gr.Markdown("# Basic Agent Evaluation Runner")
148
+ gr.Markdown(
149
+ """
150
+ **Instructions:**
151
+
152
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
153
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
154
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
155
+
156
+ ---
157
+ **Disclaimers:**
158
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
159
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
160
+ """
161
+ )
162
+
163
+ gr.LoginButton()
164
+
165
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
166
+
167
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
168
+ # Removed max_rows=10 from DataFrame constructor
169
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
170
+
171
+ run_button.click(
172
+ fn=run_and_submit_all,
173
+ outputs=[status_output, results_table]
174
+ )
175
+
176
+ if __name__ == "__main__":
177
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
178
+ # Check for SPACE_HOST and SPACE_ID at startup for information
179
+ space_host_startup = os.getenv("SPACE_HOST")
180
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
181
+
182
+ if space_host_startup:
183
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
184
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
185
+ else:
186
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
187
+
188
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
189
+ print(f"✅ SPACE_ID found: {space_id_startup}")
190
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
191
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
192
+ else:
193
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
194
+
195
+ print("-"*(60 + len(" App Starting ")) + "\n")
196
+
197
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
198
+ demo.launch(debug=True, share=False)
metadata.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ requests
3
+ langchain
4
+ langchain-community
5
+ smolagents
6
+ duckduckgo-search
7
+ wikipedia
8
+ pytesseract
9
+ Pillow
10
+ pymupdf
tool_counts.csv ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Tool,Count
2
+ Web browser,95
3
+ Search engine,88
4
+ Calculator,30
5
+ Image recognition tools,11
6
+ search engine,9
7
+ None,9
8
+ web browser,8
9
+ A search engine,8
10
+ A web browser,8
11
+ PDF viewer,7
12
+ PDF access,7
13
+ Microsoft Excel,5
14
+ Image recognition,5
15
+ A calculator,4
16
+ calculator,4
17
+ OCR,4
18
+ Web Browser,3
19
+ Excel,3
20
+ Color recognition,3
21
+ Access to Wikipedia,3
22
+ Excel file access,3
23
+ Python,3
24
+ A file interface,3
25
+ Microsoft Excel / Google Sheets,3
26
+ A web browser.,2
27
+ Search Engine,2
28
+ A search engine.,2
29
+ File handling,2
30
+ Video recognition tools,2
31
+ Audio capability,2
32
+ A speech-to-text tool,2
33
+ image recognition/OCR,2
34
+ Word document access,1
35
+ Counter,1
36
+ A word reversal tool / script,1
37
+ Tool to extract text from images,1
38
+ Python compiler,1
39
+ Image recognition tools (to identify and parse a figure with three axes),1
40
+ Unlambda compiler (optional),1
41
+ A calculator.,1
42
+ google search,1
43
+ video recognition tools,1
44
+ JSONLD file access,1
45
+ Video parsing,1
46
+ (Optional) Web browser,1
47
+ code/data analysis tools,1
48
+ Text processing/diff tool,1
49
+ GIF parsing tools,1
50
+ "Access to the Internet Archive, web.archive.org",1
51
+ XML file access,1
52
+ a calculator,1
53
+ PDF reader,1
54
+ Markdown,1
55
+ Google Translate access,1
56
+ Bass note data,1
57
+ Text Editor,1
58
+ XLSX file access,1
59
+ PowerPoint viewer,1
60
+ CSV file access,1
61
+ Calculator (or use Excel),1
62
+ (Optional) Search engine,1
63
+ computer algebra system,1
64
+ Video processing software,1
65
+ Computer vision,1
66
+ Audio processing software,1
67
+ Google Maps,1
68
+ Access to Excel files,1
69
+ Calculator (or ability to count),1
70
+ A Python IDE,1
71
+ Spreadsheet editor,1
72
+ No tools required,1
73
+ Image recognition and processing tools,1
74
+ Computer vision or OCR,1
75
+ C++ compiler,1
76
+ Access to Google Maps,1
77
+ Image recognition/OCR,1
78
+ YouTube player,1
79
+ Natural language processor,1
80
+ Graph interaction tools,1
81
+ Bablyonian cuniform -> arabic legend,1
82
+ Access to YouTube,1
83
+ image recognition tools,1
84
+ image search tools,1
85
+ Calculator or counting function,1
86
+ A speech-to-text audio processing tool,1
87
+ Access to academic journal websites,1
88
+ pdf reader/extracter,1
89
+ Rubik's cube model,1
90
+ Wikipedia,1
91
+ Video capability,1
92
+ Image processing tools,1
93
+ Image recognition software,1
94
+ YouTube,1
view_jsonfile.ipynb ADDED
@@ -0,0 +1,381 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "id": "d0191263",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "#load in metadata.jsonl file to view it\n",
11
+ "import json\n",
12
+ "import jsonlines\n",
13
+ "import pandas as pd\n",
14
+ "import os\n",
15
+ "import numpy as np\n",
16
+ "\n",
17
+ "# Load the metadata.jsonl file\n",
18
+ "with jsonlines.open('metadata.jsonl') as reader:\n",
19
+ " metadata = [obj for obj in reader]"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 5,
25
+ "id": "6820d40d",
26
+ "metadata": {},
27
+ "outputs": [
28
+ {
29
+ "data": {
30
+ "text/html": [
31
+ "<div>\n",
32
+ "<style scoped>\n",
33
+ " .dataframe tbody tr th:only-of-type {\n",
34
+ " vertical-align: middle;\n",
35
+ " }\n",
36
+ "\n",
37
+ " .dataframe tbody tr th {\n",
38
+ " vertical-align: top;\n",
39
+ " }\n",
40
+ "\n",
41
+ " .dataframe thead th {\n",
42
+ " text-align: right;\n",
43
+ " }\n",
44
+ "</style>\n",
45
+ "<table border=\"1\" class=\"dataframe\">\n",
46
+ " <thead>\n",
47
+ " <tr style=\"text-align: right;\">\n",
48
+ " <th></th>\n",
49
+ " <th>task_id</th>\n",
50
+ " <th>Question</th>\n",
51
+ " <th>Level</th>\n",
52
+ " <th>Final answer</th>\n",
53
+ " <th>file_name</th>\n",
54
+ " <th>Annotator Metadata</th>\n",
55
+ " </tr>\n",
56
+ " </thead>\n",
57
+ " <tbody>\n",
58
+ " <tr>\n",
59
+ " <th>0</th>\n",
60
+ " <td>c61d22de-5f6c-4958-a7f6-5e9707bd3466</td>\n",
61
+ " <td>A paper about AI regulation that was originall...</td>\n",
62
+ " <td>2</td>\n",
63
+ " <td>egalitarian</td>\n",
64
+ " <td></td>\n",
65
+ " <td>{'Steps': '1. Go to arxiv.org and navigate to ...</td>\n",
66
+ " </tr>\n",
67
+ " <tr>\n",
68
+ " <th>1</th>\n",
69
+ " <td>17b5a6a3-bc87-42e8-b0fb-6ab0781ef2cc</td>\n",
70
+ " <td>I’m researching species that became invasive a...</td>\n",
71
+ " <td>2</td>\n",
72
+ " <td>34689</td>\n",
73
+ " <td></td>\n",
74
+ " <td>{'Steps': '1. Search the web for “finding nemo...</td>\n",
75
+ " </tr>\n",
76
+ " <tr>\n",
77
+ " <th>2</th>\n",
78
+ " <td>04a04a9b-226c-43fd-b319-d5e89743676f</td>\n",
79
+ " <td>If we assume all articles published by Nature ...</td>\n",
80
+ " <td>2</td>\n",
81
+ " <td>41</td>\n",
82
+ " <td></td>\n",
83
+ " <td>{'Steps': '1. Find how many articles were publ...</td>\n",
84
+ " </tr>\n",
85
+ " <tr>\n",
86
+ " <th>3</th>\n",
87
+ " <td>14569e28-c88c-43e4-8c32-097d35b9a67d</td>\n",
88
+ " <td>In Unlambda, what exact charcter or text needs...</td>\n",
89
+ " <td>2</td>\n",
90
+ " <td>backtick</td>\n",
91
+ " <td></td>\n",
92
+ " <td>{'Steps': '1. Searched \"Unlambda syntax\" onlin...</td>\n",
93
+ " </tr>\n",
94
+ " <tr>\n",
95
+ " <th>4</th>\n",
96
+ " <td>e1fc63a2-da7a-432f-be78-7c4a95598703</td>\n",
97
+ " <td>If Eliud Kipchoge could maintain his record-ma...</td>\n",
98
+ " <td>1</td>\n",
99
+ " <td>17</td>\n",
100
+ " <td></td>\n",
101
+ " <td>{'Steps': '1. Googled Eliud Kipchoge marathon ...</td>\n",
102
+ " </tr>\n",
103
+ " </tbody>\n",
104
+ "</table>\n",
105
+ "</div>"
106
+ ],
107
+ "text/plain": [
108
+ " task_id \\\n",
109
+ "0 c61d22de-5f6c-4958-a7f6-5e9707bd3466 \n",
110
+ "1 17b5a6a3-bc87-42e8-b0fb-6ab0781ef2cc \n",
111
+ "2 04a04a9b-226c-43fd-b319-d5e89743676f \n",
112
+ "3 14569e28-c88c-43e4-8c32-097d35b9a67d \n",
113
+ "4 e1fc63a2-da7a-432f-be78-7c4a95598703 \n",
114
+ "\n",
115
+ " Question Level Final answer \\\n",
116
+ "0 A paper about AI regulation that was originall... 2 egalitarian \n",
117
+ "1 I’m researching species that became invasive a... 2 34689 \n",
118
+ "2 If we assume all articles published by Nature ... 2 41 \n",
119
+ "3 In Unlambda, what exact charcter or text needs... 2 backtick \n",
120
+ "4 If Eliud Kipchoge could maintain his record-ma... 1 17 \n",
121
+ "\n",
122
+ " file_name Annotator Metadata \n",
123
+ "0 {'Steps': '1. Go to arxiv.org and navigate to ... \n",
124
+ "1 {'Steps': '1. Search the web for “finding nemo... \n",
125
+ "2 {'Steps': '1. Find how many articles were publ... \n",
126
+ "3 {'Steps': '1. Searched \"Unlambda syntax\" onlin... \n",
127
+ "4 {'Steps': '1. Googled Eliud Kipchoge marathon ... "
128
+ ]
129
+ },
130
+ "execution_count": 5,
131
+ "metadata": {},
132
+ "output_type": "execute_result"
133
+ }
134
+ ],
135
+ "source": [
136
+ "# Convert the list of dictionaries to a DataFrame\n",
137
+ "df = pd.DataFrame(metadata)\n",
138
+ "df.head()"
139
+ ]
140
+ },
141
+ {
142
+ "cell_type": "code",
143
+ "execution_count": 54,
144
+ "id": "9b7659b2",
145
+ "metadata": {},
146
+ "outputs": [
147
+ {
148
+ "data": {
149
+ "text/plain": [
150
+ "{'Steps': '1. Search the web for “finding nemo main character”.\\n2. Note the results, which state that the main character is a clownfish.\\n3. Search the web for “usgs nonnative species database”.\\n4. Click result for the Nonindigenous Aquatic Species site.\\n5. Click “Marine Fishes”.\\n6. Click “Species List of Nonindigenous Marine Fish”.\\n7. Scroll through the list until I find the clown anenomefish, and click “Collection info”.\\n8. Note the place that a clown anenomefish was found, in Fred Howard Park at the Gulf of Mexico.\\n9. Search the web for “fred howard park florida zip code”.\\n10. Note the zip code, 34689. Since only one clownfish was found before the year 2020, this is the answer.',\n",
151
+ " 'Number of steps': '10',\n",
152
+ " 'How long did this take?': '5 minutes',\n",
153
+ " 'Tools': '1. Search engine\\n2. Web browser',\n",
154
+ " 'Number of tools': '2'}"
155
+ ]
156
+ },
157
+ "execution_count": 54,
158
+ "metadata": {},
159
+ "output_type": "execute_result"
160
+ }
161
+ ],
162
+ "source": [
163
+ "#convert metadata into dataframe and view the annotator column\n",
164
+ "df = pd.DataFrame(metadata)\n",
165
+ "df.head()\n",
166
+ "df['Annotator Metadata'][1]"
167
+ ]
168
+ },
169
+ {
170
+ "cell_type": "code",
171
+ "execution_count": null,
172
+ "id": "54bf634a",
173
+ "metadata": {},
174
+ "outputs": [],
175
+ "source": [
176
+ "# Extract all tools from Annotator Metadata\n",
177
+ "all_tools = []\n",
178
+ "for entry in metadata:\n",
179
+ " tools = entry['Annotator Metadata']['Tools'].split('\\n')\n",
180
+ " # Remove numbering and leading/trailing whitespace from each tool\n",
181
+ " tools = [t.split('. ')[-1].strip() for t in tools]\n",
182
+ " all_tools.extend(tools)\n",
183
+ "\n",
184
+ "# Count unique tools\n",
185
+ "tool_counts = pd.Series(all_tools).value_counts()\n",
186
+ "#print out the top 10 tools with counts greater than 5 as a DataFrame\n",
187
+ "tool_counts_df = tool_counts.reset_index()\n",
188
+ "tool_counts_df.columns = ['Tool', 'Count']\n",
189
+ "tool_counts_df = tool_counts_df.sort_values(by='Count', ascending=False)\n",
190
+ "# print out unique tool names\n",
191
+ "# Save the tool counts to a CSV file\n",
192
+ "tool_counts_df.to_csv('tool_counts.csv', index=False)"
193
+ ]
194
+ },
195
+ {
196
+ "cell_type": "code",
197
+ "execution_count": null,
198
+ "id": "1f0a65e7",
199
+ "metadata": {},
200
+ "outputs": [
201
+ {
202
+ "data": {
203
+ "text/plain": [
204
+ "['Web browser',\n",
205
+ " 'Search engine',\n",
206
+ " 'Calculator',\n",
207
+ " 'Image recognition tools',\n",
208
+ " 'search engine',\n",
209
+ " 'None',\n",
210
+ " 'web browser',\n",
211
+ " 'A search engine',\n",
212
+ " 'A web browser',\n",
213
+ " 'PDF viewer',\n",
214
+ " 'PDF access']"
215
+ ]
216
+ },
217
+ "execution_count": 37,
218
+ "metadata": {},
219
+ "output_type": "execute_result"
220
+ }
221
+ ],
222
+ "source": [
223
+ "tool_counts_df.Tool[tool_counts_df.Count > 5].values.tolist()"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": 48,
229
+ "id": "3674e568",
230
+ "metadata": {},
231
+ "outputs": [
232
+ {
233
+ "data": {
234
+ "text/plain": [
235
+ "True"
236
+ ]
237
+ },
238
+ "execution_count": 48,
239
+ "metadata": {},
240
+ "output_type": "execute_result"
241
+ }
242
+ ],
243
+ "source": [
244
+ "import os\n",
245
+ "import requests\n",
246
+ "import json\n",
247
+ "import base64\n",
248
+ " \n",
249
+ "import numpy as np\n",
250
+ "from smolagents import CodeAgent, HfApiModel, Tool\n",
251
+ "from langchain.agents import load_tools\n",
252
+ "\n",
253
+ "#load env variables\n",
254
+ "from dotenv import load_dotenv\n",
255
+ "load_dotenv()\n"
256
+ ]
257
+ },
258
+ {
259
+ "cell_type": "code",
260
+ "execution_count": 52,
261
+ "id": "00f79e78",
262
+ "metadata": {},
263
+ "outputs": [
264
+ {
265
+ "ename": "PydanticUserError",
266
+ "evalue": "Field 'name' defined on a base class was overridden by a non-annotated attribute. All field definitions, including overrides, require a type annotation.\n\nFor further information visit https://errors.pydantic.dev/2.11/u/model-field-overridden",
267
+ "output_type": "error",
268
+ "traceback": [
269
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
270
+ "\u001b[1;31mPydanticUserError\u001b[0m Traceback (most recent call last)",
271
+ "Cell \u001b[1;32mIn[52], line 38\u001b[0m\n\u001b[0;32m 31\u001b[0m wikipedia_tool \u001b[38;5;241m=\u001b[39m Tool(\n\u001b[0;32m 32\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWikipedia\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 33\u001b[0m description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSearch Wikipedia articles for information\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 34\u001b[0m func\u001b[38;5;241m=\u001b[39mwikipedia\u001b[38;5;241m.\u001b[39mrun\n\u001b[0;32m 35\u001b[0m )\n\u001b[0;32m 37\u001b[0m \u001b[38;5;66;03m# Basic Calculator Tool (free)\u001b[39;00m\n\u001b[1;32m---> 38\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mCalculatorTool\u001b[39;00m(BaseTool):\n\u001b[0;32m 39\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCalculator\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 40\u001b[0m description \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUseful for performing mathematical calculations\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
272
+ "File \u001b[1;32mc:\\Users\\mabel\\Anaconda3\\envs\\hf_agent\\lib\\site-packages\\pydantic\\_internal\\_model_construction.py:112\u001b[0m, in \u001b[0;36mModelMetaclass.__new__\u001b[1;34m(mcs, cls_name, bases, namespace, __pydantic_generic_metadata__, __pydantic_reset_parent_namespace__, _create_model_module, **kwargs)\u001b[0m\n\u001b[0;32m 110\u001b[0m config_wrapper \u001b[38;5;241m=\u001b[39m ConfigWrapper\u001b[38;5;241m.\u001b[39mfor_model(bases, namespace, kwargs)\n\u001b[0;32m 111\u001b[0m namespace[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_config\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m config_wrapper\u001b[38;5;241m.\u001b[39mconfig_dict\n\u001b[1;32m--> 112\u001b[0m private_attributes \u001b[38;5;241m=\u001b[39m \u001b[43minspect_namespace\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 113\u001b[0m \u001b[43m \u001b[49m\u001b[43mnamespace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig_wrapper\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mignored_types\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mclass_vars\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbase_field_names\u001b[49m\n\u001b[0;32m 114\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m private_attributes \u001b[38;5;129;01mor\u001b[39;00m base_private_attributes:\n\u001b[0;32m 116\u001b[0m original_model_post_init \u001b[38;5;241m=\u001b[39m get_model_post_init(namespace, bases)\n",
273
+ "File \u001b[1;32mc:\\Users\\mabel\\Anaconda3\\envs\\hf_agent\\lib\\site-packages\\pydantic\\_internal\\_model_construction.py:449\u001b[0m, in \u001b[0;36minspect_namespace\u001b[1;34m(namespace, ignored_types, base_class_vars, base_class_fields)\u001b[0m\n\u001b[0;32m 447\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m var_name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m raw_annotations:\n\u001b[0;32m 448\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m var_name \u001b[38;5;129;01min\u001b[39;00m base_class_fields:\n\u001b[1;32m--> 449\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m PydanticUserError(\n\u001b[0;32m 450\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mField \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvar_name\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m defined on a base class was overridden by a non-annotated attribute. \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 451\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAll field definitions, including overrides, require a type annotation.\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 452\u001b[0m code\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel-field-overridden\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 453\u001b[0m )\n\u001b[0;32m 454\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value, FieldInfo):\n\u001b[0;32m 455\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m PydanticUserError(\n\u001b[0;32m 456\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mField \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvar_name\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m requires a type annotation\u001b[39m\u001b[38;5;124m'\u001b[39m, code\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel-field-missing-annotation\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 457\u001b[0m )\n",
274
+ "\u001b[1;31mPydanticUserError\u001b[0m: Field 'name' defined on a base class was overridden by a non-annotated attribute. All field definitions, including overrides, require a type annotation.\n\nFor further information visit https://errors.pydantic.dev/2.11/u/model-field-overridden"
275
+ ]
276
+ }
277
+ ],
278
+ "source": [
279
+ "# build tools for the agent with the following functions: websearch,calculator,Image recognition tools, image viewer, pdf viewer, pdf accesser\n",
280
+ "# use langchain and other libraries to build the tools\n",
281
+ "\n",
282
+ "from smolagents import CodeAgent, HfApiModel, Tool\n",
283
+ "from langchain.agents import load_tools\n",
284
+ "from langchain.tools import Tool as LangchainTool\n",
285
+ "\n",
286
+ "# Import required libraries\n",
287
+ "from langchain.tools import DuckDuckGoSearchRun\n",
288
+ "from langchain.utilities import WikipediaAPIWrapper\n",
289
+ "from langchain.tools import BaseTool\n",
290
+ "from langchain.callbacks.manager import CallbackManagerForToolRun\n",
291
+ "from typing import Optional, Type\n",
292
+ "from langchain.tools import ShellTool\n",
293
+ "from PIL import Image\n",
294
+ "import pytesseract\n",
295
+ "import requests\n",
296
+ "from io import BytesIO\n",
297
+ "import fitz # PyMuPDF for PDF handling\n",
298
+ "\n",
299
+ "# Web Search Tool using DuckDuckGo (free)\n",
300
+ "search = DuckDuckGoSearchRun()\n",
301
+ "web_search_tool = Tool(\n",
302
+ " name=\"Web Search\",\n",
303
+ " description=\"Search the web for current information using DuckDuckGo\",\n",
304
+ " func=search.run\n",
305
+ ")\n",
306
+ "\n",
307
+ "# Wikipedia Tool (free)\n",
308
+ "wikipedia = WikipediaAPIWrapper()\n",
309
+ "wikipedia_tool = Tool(\n",
310
+ " name=\"Wikipedia\",\n",
311
+ " description=\"Search Wikipedia articles for information\",\n",
312
+ " func=wikipedia.run\n",
313
+ ")\n",
314
+ "\n",
315
+ "# Basic Calculator Tool (free)\n",
316
+ "class CalculatorTool(BaseTool):\n",
317
+ " name = \"Calculator\"\n",
318
+ " description = \"Useful for performing mathematical calculations\"\n",
319
+ " \n",
320
+ " def _run(self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:\n",
321
+ " client = wolframalpha.Client(WOLFRAM_ALPHA_APPID)\n",
322
+ " res = client.query(query)\n",
323
+ " return next(res.results).text\n",
324
+ "\n",
325
+ "# Image Recognition Tool (using local Tesseract OCR)\n",
326
+ "class ImageRecognitionTool(BaseTool):\n",
327
+ " name = \"Image Recognition\"\n",
328
+ " description = \"Analyze and extract text from images using OCR\"\n",
329
+ " \n",
330
+ " def _run(self, image_path: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:\n",
331
+ " try:\n",
332
+ " img = Image.open(image_path)\n",
333
+ " text = pytesseract.image_to_string(img)\n",
334
+ " return text\n",
335
+ " except Exception as e:\n",
336
+ " return f\"Error processing image: {str(e)}\"\n",
337
+ "\n",
338
+ "# PDF Reader Tool\n",
339
+ "class PDFReaderTool(BaseTool):\n",
340
+ " name = \"PDF Reader\"\n",
341
+ " description = \"Read and extract text from PDF documents\"\n",
342
+ " \n",
343
+ " def _run(self, pdf_path: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:\n",
344
+ " try:\n",
345
+ " doc = fitz.open(pdf_path)\n",
346
+ " text = \"\"\n",
347
+ " for page in doc:\n",
348
+ " text += page.get_text()\n",
349
+ " return text\n",
350
+ " except Exception as e:\n",
351
+ " return f\"Error reading PDF: {str(e)}\"\n",
352
+ "\n",
353
+ "\n",
354
+ "\n",
355
+ "# Test the agent (uncomment to run)\n",
356
+ "# response = agent.run(\"Calculate 234 * 789 and then search for information about the result\")\n"
357
+ ]
358
+ }
359
+ ],
360
+ "metadata": {
361
+ "kernelspec": {
362
+ "display_name": "hf_agent",
363
+ "language": "python",
364
+ "name": "python3"
365
+ },
366
+ "language_info": {
367
+ "codemirror_mode": {
368
+ "name": "ipython",
369
+ "version": 3
370
+ },
371
+ "file_extension": ".py",
372
+ "mimetype": "text/x-python",
373
+ "name": "python",
374
+ "nbconvert_exporter": "python",
375
+ "pygments_lexer": "ipython3",
376
+ "version": "3.10.17"
377
+ }
378
+ },
379
+ "nbformat": 4,
380
+ "nbformat_minor": 5
381
+ }