Final_Assignment_Template

Sleeping

App Files Files Community

ABVM commited on Jun 7, 2025

Commit

882238c

verified ·

1 Parent(s): a4dd17a

Upload 2 files

Browse files

Files changed (2) hide show

multi_agent.py +173 -0
vision_tool.py +70 -0

multi_agent.py ADDED Viewed

	@@ -0,0 +1,173 @@

+from smolagents import (
+    CodeAgent,
+    VisitWebpageTool,
+    WebSearchTool,
+    WikipediaSearchTool,
+    PythonInterpreterTool,
+    FinalAnswerTool,
+)
+from groq import Groq
+from vision_tool import image_reasoning_tool
+import os
+import time
+from types import SimpleNamespace
+# ---- TOOLS ----
+# ---- GROQ MODEL WRAPPER ----
+class GroqModel:
+    def __init__(self, model_name= str):
+        self.model_name = model_name
+        self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+    def __call__(self, messages, max_tokens=8096):
+        params = {
+            "model": self.model_name,
+            "messages": messages,
+            "stream": False,
+            "max_completion_tokens": max_tokens,
+        }
+        for attempt in range(3):
+            try:
+                return self.client.chat.completions.create(**params)
+            except Exception as e:
+                msg = str(e).lower()
+                if "rate limit" in str(e).lower() and attempt < 2:
+                    time.sleep(10 * (attempt + 1))
+                else:
+                    raise
+    def generate(self, prompt, max_tokens=8096, **kwargs):
+        messages=prompt if not isinstance(prompt, str) else [
+            {"role":"user", "content": prompt}
+        ]
+        response = self._chat(messages, max_tokens, **extra)
+        return response.choices[0].message
+    def __call__(self, prompt, max_tokens=8_096, **extra):
+        return self.generate(prompt, max_tokens, **extra).content
+# ---- MULTI-AGENT SYSTEM ----
+class MultyAgentSystem:
+    def __init__(self):
+        self.primary_model_name = "deepseek-r1-distill-llama-70b"
+        self.fallback_model_name = "llama3-70b-8k"
+        self.deepseek_model = GroqModel(self.primary_model_name)
+        qwen_model = GroqModel("qwen-qwq-32b")
+        self.verification_limit = int(os.getenv("VERIFY_WORD_LIMIT", "75"))
+        # --- Web agent definition ---
+        self.web_agent = CodeAgent(
+            model=qwen_model,
+            tools=[WebSearchTool(), VisitWebpageTool(), WikipediaSearchTool()],
+            name="web_agent",
+            description=(
+                "You are a web browsing agent. Whenever the given {task} involves browsing "
+                "the web or a specific website such as Wikipedia or YouTube, you will use "
+                "the provided tools. For web-based factual and retrieval tasks, be as precise and source-reliable as possible."
+            ),
+            additional_authorized_imports=[
+                "markdownify",
+                "json",
+                "requests",
+                "urllib.request",
+                "urllib.parse",
+                "wikipedia-api",
+            ],
+            verbosity_level=0,
+            max_steps=10,
+        )
+        # --- Info agent definition ---
+        self.info_agent = CodeAgent(
+            model=qwen_model,
+            tools=[PythonInterpreterTool(), image_reasoning_tool],
+            name="info_agent",
+            description=(
+                "You are an agent tasked with cleaning, parsing, calculating information, and performing OCR if images are provided in the {task}. "
+                "You can also analyze images using a vision model. You handle all math, code, and data manipulation. Use numpy, math, and available libraries. "
+                "For image or chess tasks, use pytesseract, PIL, chess, or the image_reasoning_tool as required."
+            ),
+            additional_authorized_imports=[
+                "numpy",
+                "math",
+                "pytesseract",
+                "PIL",
+                "chess",
+            ],
+        )
+        # --- Manager agent definition ---
+        manager_planning_interval = int(os.getenv("MANAGER_PLANNING_INTERVAL", "3"))
+        manager_max_steps = int(os.getenv("MANAGER_MAX_STEPS", "8"))
+        self.manager_agent = CodeAgent(
+            model=qwen_model,
+            tools=[FinalAnswerTool()],
+            managed_agents=[self.web_agent, self.info_agent],
+            name="manager_agent",
+            description=(
+                "You are the manager. Given a {task}, plan which agent to use: "
+                "If web data is needed, delegate to web_agent. If math, parsing, image reasoning, or code is needed, use info_agent. "
+                "After collecting outputs, optionally cross-validate and check correctness, then finalize and submit the best answer using FinalAnswerTool. "
+                "For each task, explicitly explain your planning steps and reasons for choosing which agent, and always prefer the most accurate and complete answer possible."
+            ),
+            additional_authorized_imports=[
+                "json",
+                "pandas",
+                "numpy",
+            ],
+            planning_interval=manager_planning_interval,
+            verbosity_level=2,
+            max_steps=manager_max_steps,
+        )
+        # runtime tracking for fallback switching
+        self.total_runtime = 0.0
+        self.first_call_duration = None
+        self.model_switched = False
+    def _switch_to_fallback(self):
+        if self.model_switched:
+            return
+        self.manager_agent.model = GroqModel(self.fallback_model_name)
+        self.model_switched = True
+    def run(self, question, high_stakes: bool = False, **kwargs):
+        start_time = time.time()
+        print("Generating initial answer with Qwen-32B")
+        initial_answer = self.manager_agent(question, **kwargs)
+        call_duration = time.time() - start_time
+        answer = initial_answer
+        if high_stakes or len(initial_answer.split()) > self.verification_limit:
+            print("Verifying answer using DeepSeek-70B")
+            verification_prompt = (
+                "Review the following answer for accuracy and rewrite if needed:"
+                f"\n\n{initial_answer}"
+            )
+            try:
+                answer = self.deepseek_model(verification_prompt)
+            except Exception as e:
+                print(f"Verification failed: {e}. Using initial answer.")
+                answer = initial_answer
+        if self.first_call_duration is None:
+            self.first_call_duration = call_duration
+            if self.first_call_duration > 30:
+                self._switch_to_fallback()
+        self.total_runtime += call_duration
+        if self.total_runtime > 300 and not self.model_switched:
+            self._switch_to_fallback()
+        return answer
+    def __call__(self, question, high_stakes: bool = False, **kwargs):
+        return self.run(question, high_stakes=high_stakes, **kwargs)

vision_tool.py ADDED Viewed

	@@ -0,0 +1,70 @@

+# Vision tool using Groq's Meta-Llama Scout model
+from smolagents import tool
+from groq import Groq
+import os
+def _llama_analyze(image_b64: str, prompt: str) -> str:
+    """Internal helper to query the Llama vision model."""
+    client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": prompt},
+                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}},
+            ],
+        }
+    ]
+    response = client.chat.completions.create(
+        model="meta-llama/llama-4-scout-17b-16e-instruct",
+        messages=messages,
+        stream=False,
+        max_completion_tokens=512,
+    )
+    return response.choices[0].message.content
+@tool
+def image_reasoning_tool(image_file: str, prompt: str | None = None) -> dict:
+    """Perform OCR and optional vision analysis on an image.
+    This single entry point unifies OCR extraction and Llama vision reasoning so
+    the planner only sees one image tool.
+    Args:
+        image_file: Path to the image file to analyze.
+        prompt: Optional instruction for the vision model. If omitted, only OCR
+            is performed.
+    Returns:
+        Dictionary with OCR text, base64 image data and optional vision model
+        response.
+    """
+    try:
+        from PIL import Image
+        from smolagents.utils import encode_image_base64
+        import pytesseract
+        image = Image.open(image_file)
+        b64 = encode_image_base64(image)
+        ocr_text = pytesseract.image_to_string(image)
+        vision_text = ""
+        if prompt:
+            try:
+                vision_text = _llama_analyze(b64, prompt)
+            except Exception as e:  # vision errors shouldn't break OCR result
+                vision_text = f"Error processing image with vision model: {e}"
+        return {"ocr_text": ocr_text, "vision_text": vision_text, "base64_image": b64}
+    except Exception as e:
+        return {
+            "ocr_text": "",
+            "vision_text": "",
+            "base64_image": "",
+            "error": str(e),
+        }