| from dotenv import load_dotenv |
| from typing import TypedDict, List, Dict, Any, Optional, Annotated |
|
|
| from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings |
| from langchain_google_genai import ChatGoogleGenerativeAI |
| from langchain_groq import ChatGroq |
|
|
| from langgraph.graph import StateGraph, MessagesState, START, END |
| from langgraph.graph.message import add_messages |
| from langchain_core.messages import SystemMessage, HumanMessage, AnyMessage, AIMessage |
| from langchain_core.messages.ai import subtract_usage |
|
|
| from langchain.tools import Tool |
| from langchain_core.tools import tool |
| from langchain_community.tools.tavily_search import TavilySearchResults |
| from langchain_community.document_loaders import WikipediaLoader |
| from langchain_community.document_loaders import ArxivLoader |
| from langchain_community.retrievers import BM25Retriever |
|
|
| from langgraph.prebuilt import ToolNode, tools_condition |
|
|
| from prompts import system_prompt |
|
|
|
|
| |
| load_dotenv() |
|
|
|
|
| |
| def get_arxiv_url(content: str) -> str: |
| """Extract arXiv ID from text content and format as a URL.""" |
| lines = content.split('\n') |
| for line in lines: |
| if line.strip().startswith('arXiv:'): |
| parts = line.strip().split() |
| if parts: |
| arxiv_id_with_prefix = parts[0] |
| |
| arxiv_id = arxiv_id_with_prefix.replace('arXiv:', '').strip() |
| |
| |
| base_arxiv_id = arxiv_id.split('v')[0] |
| return f"https://arxiv.org/abs/{base_arxiv_id}" |
| return "unknown" |
|
|
|
|
| |
| @tool |
| def search_wiki(query: str) -> Dict[str, str]: |
| """Search Wikipedia for a query and return maximum 2 results. |
| |
| Args: |
| query: The search query.""" |
| print(f" executing search_wiki with query: {query}") |
| try: |
| search_docs = WikipediaLoader(query=query, load_max_docs=2).load() |
| print(f"Found {len(search_docs)} documents for query '{query}'") |
| formatted_search_docs = "\n\n---\n\n".join( |
| [ |
| f'<Document source="{doc.metadata.get("source", "unknown")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' |
| for doc in search_docs |
| ] |
| ) |
| if not formatted_search_docs: |
| print ("Empty search results") |
| return {"wiki_results": formatted_search_docs} |
| except Exception as e: |
| print(f"Error in search_wiki: {e}") |
| return {"wiki_results": f"Search error: {str(e)}"} |
|
|
|
|
| |
| @tool |
| def search_web(query: str) -> Dict[str, str]: |
| """Search Tavily for a query and return maximum 3 results. |
| |
| Args: |
| query: The search query.""" |
| print(f" executing search_web with query: {query}") |
| |
| try: |
| search_docs = TavilySearchResults(max_results=3).run(query) |
| print(f"DEBUG: search_docs type: {type(search_docs)}") |
| print(f"DEBUG: search_docs content: {search_docs}") |
| print(f"Found {len(search_docs)} documents for query '{query}'") |
| |
| formatted_search_docs = "\n\n---\n\n".join( |
| [ |
| f'<Document source="{doc.get("url", "unknown")}" page="{doc.get("page", "N/A")}"/>\n{doc.get("content", "")}\n</Document>' |
| for doc in search_docs |
| ] |
| ) |
| if not formatted_search_docs: |
| print ("Empty search results") |
| return {"web_results": formatted_search_docs} |
| except Exception as e: |
| print(f"Error in search_web: {e}") |
| return {"web_results": f"Search error: {str(e)}"} |
|
|
|
|
| |
| @tool |
| def search_arxiv(query: str) -> Dict[str, str]: |
| """Search Arxiv for a query and return maximum 3 result. |
| |
| Args: |
| query: The search query.""" |
| print(f" executing search_arxiv with query: {query}") |
| try: |
| search_docs = ArxivLoader(query=query, load_max_docs=3).load() |
| print(f"DEBUG: search_docs type: {type(search_docs)}") |
| print(f"DEBUG: search_docs content: {search_docs}") |
| print(f"Found {len(search_docs)} documents for query '{query}'") |
| formatted_search_docs = "\n\n---\n\n".join( |
| [ |
| f'<Document source="{get_arxiv_url(doc.page_content)}" page="{doc.metadata.get("page", "N/A")}"/>\n{doc.page_content[:1000]}\n</Document>' |
| for doc in search_docs |
| ] |
| ) |
| if not formatted_search_docs: |
| print ("Empty search results") |
| return {"arxiv_results": formatted_search_docs} |
| except Exception as e: |
| print(f"Error in search_arxiv: {e}") |
| return {"arxiv_results": f"Search error: {str(e)}"} |
|
|
|
|
| |
| |
|
|
|
|
| |
| sys_msg = SystemMessage(content=system_prompt) |
|
|
|
|
| tools = [ |
| search_web, |
| search_wiki, |
| search_arxiv |
| ] |
|
|
|
|
| |
| def build_graph(): |
| |
| llm = ChatGoogleGenerativeAI( |
| model="gemini-2.5-flash-preview-04-17", |
| temperature=0 |
| ) |
| print(f"DEBUG: llm object = {llm}") |
|
|
| |
| llm_with_tools = llm.bind_tools(tools) |
| print(f"DEBUG: llm_with_tools object = {llm_with_tools}") |
|
|
| |
| class AgentState(TypedDict): |
| messages: Annotated[list[AnyMessage], add_messages] |
|
|
| def assistant(state: AgentState): |
| result = llm_with_tools.invoke(state["messages"]) |
| print(f"DEBUG: LLM result = {result}") |
| |
| |
| if isinstance(result, AIMessage) and result.usage_metadata is None: |
| |
| result.usage_metadata = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} |
|
|
| return { |
| "messages": [result] |
| } |
|
|
|
|
| |
| builder = StateGraph(AgentState) |
|
|
| |
| builder.add_node("assistant", assistant) |
| builder.add_node("tools", ToolNode(tools)) |
|
|
| |
| builder.add_edge(START, "assistant") |
| builder.add_conditional_edges( |
| "assistant", |
| tools_condition, |
| { |
| |
| "tools": "tools", |
| |
| END: END, |
| } |
| ) |
| builder.add_edge("tools", "assistant") |
|
|
| return builder.compile() |
|
|
|
|
| if __name__ == "__main__": |
| |
| question = "latest research on quantum computing" |
| graph = build_graph() |
| messages = [HumanMessage(content=question)] |
| print(f"Running graph with question: {question}") |
| messages = graph.invoke({"messages": messages}) |
| print("Graph execution finished. Messages:") |
| for m in messages["messages"]: |
| m.pretty_print() |
|
|