| import os |
| from dotenv import load_dotenv |
|
|
| |
| from smolagents import CodeAgent, LiteLLMModel, OpenAIServerModel |
|
|
| |
| from smolagents.default_tools import FinalAnswerTool, PythonInterpreterTool |
|
|
| |
| from tools import ( |
| AddDocumentToVectorStoreTool, |
| ArxivSearchTool, |
| DownloadFileFromLinkTool, |
| DuckDuckGoSearchTool, |
| QueryVectorStoreTool, |
| ReadFileContentTool, |
| TranscibeVideoFileTool, |
| TranscribeAudioTool, |
| VisitWebpageTool, |
| WikipediaSearchTool, |
| image_question_answering, |
| ) |
|
|
| |
| from utils import extract_final_answer, replace_tool_mentions |
|
|
|
|
| class BoomBot: |
| def __init__(self, provider="anthropic"): |
| """ |
| Initialize the BoomBot with the specified provider. |
| |
| Args: |
| provider (str): The model provider to use (e.g., "groq", "qwen", "gemma", "anthropic", "deepinfra", "meta") |
| """ |
| load_dotenv() |
| self.provider = provider |
| self.model = self._initialize_model() |
| self.agent = self._create_agent() |
|
|
| def _initialize_model(self): |
| """ |
| Initialize the appropriate model based on the provider. |
| |
| Returns: |
| The initialized model object |
| """ |
| if self.provider == "qwen": |
| qwen_model = "ollama_chat/qwen3:8b" |
| return LiteLLMModel( |
| model_id=qwen_model, |
| device="cuda", |
| num_ctx=32768, |
| temperature=0.6, |
| top_p=0.95, |
| ) |
| elif self.provider == "gemma": |
| gemma_model = "ollama_chat/gemma3:12b-it-qat" |
| return LiteLLMModel( |
| model_id=gemma_model, |
| num_ctx=65536, |
| temperature=1.0, |
| device="cuda", |
| top_k=64, |
| top_p=0.95, |
| min_p=0.0, |
| ) |
| elif self.provider == "anthropic": |
| model_id = "anthropic/claude-3-5-haiku-latest" |
| return LiteLLMModel( |
| model_id=model_id, |
| temperature=0.6, |
| max_tokens=8192, |
| api_key=os.getenv("ANTHROPIC_API_KEY"), |
| ) |
|
|
| elif self.provider == "deepinfra": |
| deepinfra_model = "Qwen/Qwen3-235B-A22B" |
| |
| |
| |
| |
| |
| |
| |
| |
| return LiteLLMModel( |
| model_id="deepinfra/"+ deepinfra_model, |
| api_base="https://api.deepinfra.com/v1/openai", |
| api_key=os.getenv("DEEPINFRA_API_KEY"), |
| flatten_messages_as_text=True, |
| max_tokens=8192, |
| temperature=0.7, |
| ) |
| elif self.provider == "meta": |
| meta_model = "meta-llama/Llama-3.3-70B-Instruct-Turbo" |
| meta_model = "Qwen/Qwen2.5-72B-Instruct" |
| |
| |
| |
| |
| |
| |
| |
| |
| return LiteLLMModel( |
| model_id="deepinfra/"+ meta_model, |
| api_base="https://api.deepinfra.com/v1/openai", |
| api_key=os.getenv("DEEPINFRA_API_KEY"), |
| flatten_messages_as_text=True, |
| max_tokens=8192, |
| temperature=0.7, |
| ) |
| elif self.provider == "google": |
| meta_model = "google/gemini-2.5-flash" |
| |
| |
| |
| |
| |
| |
| |
| |
| return LiteLLMModel( |
| model_id="deepinfra/"+ meta_model, |
| api_base="https://api.deepinfra.com/v1/openai", |
| api_key=os.getenv("DEEPINFRA_API_KEY"), |
| flatten_messages_as_text=True, |
| max_tokens=8192, |
| temperature=0.7, |
| ) |
| elif self.provider == "groq": |
| |
| model_id = "claude-3-opus-20240229" |
| return LiteLLMModel(model_id=model_id, temperature=0.7, max_tokens=8192) |
| else: |
| raise ValueError(f"Unsupported provider: {self.provider}") |
|
|
| def _create_agent(self): |
| """ |
| Create and configure the agent with all necessary tools. |
| |
| Returns: |
| The configured CodeAgent |
| """ |
| |
| download_file = DownloadFileFromLinkTool() |
| read_file_content = ReadFileContentTool() |
| visit_webpage = VisitWebpageTool() |
| |
| transcribe_audio = TranscribeAudioTool() |
| get_wikipedia_info = WikipediaSearchTool() |
| web_searcher = DuckDuckGoSearchTool() |
| arxiv_search = ArxivSearchTool() |
| add_doc_vectorstore = AddDocumentToVectorStoreTool() |
| retrieve_doc_vectorstore = QueryVectorStoreTool() |
|
|
| |
| python_interpreter = PythonInterpreterTool() |
| final_answer = FinalAnswerTool() |
|
|
| |
| agent_tools = [ |
| web_searcher, |
| download_file, |
| read_file_content, |
| visit_webpage, |
| |
| transcribe_audio, |
| get_wikipedia_info, |
| arxiv_search, |
| add_doc_vectorstore, |
| retrieve_doc_vectorstore, |
| |
| python_interpreter, |
| final_answer, |
| ] |
|
|
| |
| additional_imports = [ |
| |
| "json", |
| "os", |
| "glob", |
| "pathlib", |
| "argparse", |
| "pickle", |
| "io", |
| "re", |
| "datetime", |
| "collections", |
| "math", |
| "random", |
| "csv", |
| "zipfile", |
| "itertools", |
| "functools", |
| "requests", |
| "bs4", |
| |
| "pandas", |
| "numpy", |
| "dask", |
| "polars", |
| "pyarrow", |
| "h5py", |
| "openpyxl", |
| "yaml", |
| |
| "matplotlib", |
| "seaborn" |
| ] |
|
|
| |
| agent = CodeAgent( |
| tools=agent_tools, |
| max_steps=15, |
| model=self.model, |
| add_base_tools=False, |
| stream_outputs=True, |
| additional_authorized_imports=additional_imports, |
| ) |
|
|
| |
| modified_prompt = replace_tool_mentions(agent.system_prompt) |
| agent.system_prompt = modified_prompt |
|
|
| return agent |
|
|
| def _get_system_prompt(self): |
| """ |
| Return the system prompt for the agent. |
| |
| Returns: |
| str: The system prompt |
| """ |
| return """ |
| YOUR BEHAVIOR GUIDELINES: |
| • Do NOT make unfounded assumptions—always ground answers in reliable sources or search results. |
| • For math or puzzles: break the problem into code/math, then solve programmatically. |
| |
| RESEARCH WORKFLOW: |
| 1. SEARCH |
| - Begin with web_search, wikipedia_search, or arxiv_search. |
| - Refine your query if results are weak—don't just retry the same terms. |
| - If one search tool yields little, try another before moving on to downloads. |
| |
| 2. VISIT |
| - Use visit_webpage to preview content from promising links. |
| - If the content is long, complex, spans multiple pages, or may be needed later, do NOT rely solely on visit_webpage. |
| - Move quickly to downloading: avoid repeated visits when the content should be archived. |
| |
| 3. DOWNLOAD AND ADD TO VECTORSTORE (MANDATORY IF CONTENT IS LONG, DENSE, COMPLEX, MULTIPLE FILES OR LINKS TO VISIT) |
| - Use download_file_from_link on all valuable resources (including html pages or pdfs). |
| - Especially when a page is detailed, technical, or multi-part, downloading is preferred. |
| - You can (and should) download webpages as HTML. Do this whenever the site might be referenced again later. |
| |
| 4. INDEX & QUERY |
| - Immediately add downloaded files to the vector store using add_document_to_vector_store. |
| - For complex tasks or unclear answers, prefer querying vector store over re-visiting pages. |
| - If you've downloaded a file, **always index it unless clearly irrelevant.** |
| |
| 5. READ |
| - Use read_file_content to analyze file contents (html, pdf, text). |
| - You can also use query_downloaded_documents for deeper understanding. |
| |
| 6. EVALUATE |
| - ✅ If the answer is clear from current sources, respond. |
| - ❌ If not, continue iterating and analyzing downloaded material. |
| |
| FALLBACK & ADAPTATION: |
| • If a tool fails, reformulate or switch tools. |
| • For arXiv: web_search might help you find the paper; follow with direct download of the PDF via download_file_from_link. |
| |
| MANDATORY DOWNLOAD & INDEX WHEN: |
| • The page is lengthy or technical (e.g., research papers, government sites, legal docs, blog posts with code). |
| • You suspect you'll need to return to the content. |
| • You are working on multi-hop reasoning or long-term memory tasks. |
| |
| COMMON TOOL CHAINS: |
| • FACTUAL Qs: |
| web_search → final_answer |
| • CURRENT EVENTS: |
| web_search → visit_webpage → (download + index if needed) → final_answer |
| • DOCUMENT-BASED Qs: |
| web_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents → final_answer |
| • ARXIV PAPERS: |
| arxiv_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents → final_answer |
| • MEDIA ANALYSIS: |
| download_file_from_link → transcribe_audio → final_answer |
| |
| FINAL ANSWER FORMAT: |
| - Begin with "FINAL ANSWER: " |
| - Number → digits only (e.g., 42) |
| - String → exact text (e.g., Pope Francis) without quotation marks |
| - List → comma-separated, no brackets unless specified (e.g., 2, 3, 4) |
| - End with: FINAL ANSWER: <your_answer> |
| """ |
|
|
|
|
| def run(self, question: str, task_id: str, to_download) -> str: |
| """ |
| Run the agent with the given question, task_id, and download flag. |
| |
| Args: |
| question (str): The question or task for the agent to process |
| task_id (str): A unique identifier for the task |
| to_download (Bool): Flag indicating whether to download resources |
| |
| Returns: |
| str: The agent's response |
| """ |
| prompt = self._get_system_prompt() |
| |
| prompt += "\nHere is the Task you need to solve:\n\n" |
| prompt += f"Task: {question}\n\n" |
|
|
| |
| if to_download: |
| link = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}" |
| prompt += ( |
| "IMPORTANT: Before solving the task, you must download a required file.\n" |
| f"Use the `download_file_from_link` tool with this link: {link}\n" |
| "After downloading, use the appropriate tool to read or process the file " |
| "before attempting to solve the task.\n\n" |
| ) |
|
|
| |
| result = self.agent.run(prompt) |
|
|
| |
| final_answer = extract_final_answer(result) |
|
|
| return final_answer |
|
|
|
|
|
|
| if __name__ == "__main__": |
| import os |
| import csv |
| import time |
| import requests |
| from utils import load_online_qas, extract_final_answer |
|
|
| CSV_FILE = "evals/llm_eval.csv" |
| FIELDNAMES = ["model", "task_id", "question", "llm_answer", "processed_answer", "real_answer"] |
|
|
| def ensure_csv(): |
| """Create the CSV file with header if it doesn't exist.""" |
| if not os.path.isfile(CSV_FILE): |
| with open(CSV_FILE, mode="w", newline="", encoding="utf-8") as f: |
| writer = csv.DictWriter(f, fieldnames=FIELDNAMES) |
| writer.writeheader() |
|
|
| def append_results(rows): |
| """Append a list of dict rows to the CSV.""" |
| with open(CSV_FILE, mode="a", newline="", encoding="utf-8") as f: |
| writer = csv.DictWriter(f, fieldnames=FIELDNAMES) |
| for row in rows: |
| writer.writerow(row) |
|
|
| agent = BoomBot(provider="deepinfra") |
| model_name = agent.provider |
|
|
| file_online = load_online_qas(file_path=r"../../Final_Assignment_Template/allqas.jsonl", has_file=True) |
| nofile_online = load_online_qas(file_path=r"../../Final_Assignment_Template/allqas.jsonl", has_file=False) |
|
|
| excluded_keywords = ["youtube", "video", "chess"] |
| rows_to_append = [] |
|
|
| |
| for entry in file_online: |
| task_id = entry["task_id"] |
| question = entry["Question"] |
| real_answer = entry["Final answer"] |
| file_name = entry.get("file_name", "") |
| to_download = bool(file_name) |
| link = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}" |
|
|
| if any(kw in question.lower() for kw in excluded_keywords): |
| llm_answer = processed = "NOT ATTEMPTED" |
| else: |
| try: |
| resp = requests.get(link) |
| if resp.status_code != 200: |
| llm_answer = processed = "NOT ATTEMPTED" |
| else: |
| llm_answer = agent.run(question, task_id, to_download) |
| processed = extract_final_answer(llm_answer).strip() |
| |
| except Exception as e: |
| llm_answer = processed = f"[Error] {e}" |
| |
|
|
| rows_to_append.append({ |
| "model": model_name, |
| "task_id": task_id, |
| "question": question, |
| "llm_answer": llm_answer, |
| "processed_answer": processed, |
| "real_answer": real_answer, |
| }) |
| print("REAL ANSWER:", real_answer) |
|
|
| |
| for entry in nofile_online: |
| task_id = entry["task_id"] |
| question = entry["Question"] |
| real_answer = entry["Final answer"] |
|
|
| if any(kw in question.lower() for kw in excluded_keywords): |
| llm_answer = processed = "NOT ATTEMPTED" |
| else: |
| try: |
| llm_answer = agent.run(question, task_id, to_download=False) |
| processed = extract_final_answer(llm_answer).strip() |
| |
| except Exception as e: |
| llm_answer = processed = f"[Error] {e}" |
| |
|
|
| rows_to_append.append({ |
| "model": model_name, |
| "task_id": task_id, |
| "question": question, |
| "llm_answer": llm_answer, |
| "processed_answer": processed, |
| "real_answer": real_answer, |
| }) |
| print("REAL ANSWER:", real_answer) |
|
|
| |
| ensure_csv() |
| append_results(rows_to_append) |
|
|
| print(f"✅ Appended {len(rows_to_append)} rows to {CSV_FILE}") |