| import wikipedia |
| from .search_base import SearchBase |
| from .tool import Tool,Toolkit |
| from typing import Dict, Any, Optional, List |
| from pydantic import Field |
| from ..core.logging import logger |
|
|
|
|
| class SearchWiki(SearchBase): |
|
|
| max_summary_sentences: Optional[int] = Field(default=None, description="Maximum number of sentences in the summary. Default None means return all available content.") |
| |
| def __init__( |
| self, |
| name: str = 'SearchWiki', |
| num_search_pages: Optional[int] = 5, |
| max_content_words: Optional[int] = None, |
| max_summary_sentences: Optional[int] = None, |
| **kwargs |
| ): |
| """ |
| Initialize the Wikipedia Search tool. |
| |
| Args: |
| name (str): The name of the search tool |
| num_search_pages (int): Number of search results to retrieve |
| max_content_words (int, optional): Maximum number of words to include in content, None means no limit |
| max_summary_sentences (int, optional): Maximum number of sentences in the summary, None means no limit |
| **kwargs: Additional data to pass to the parent class |
| """ |
|
|
| super().__init__( |
| name=name, |
| num_search_pages=num_search_pages, |
| max_content_words=max_content_words, |
| max_summary_sentences=max_summary_sentences, |
| **kwargs |
| ) |
|
|
| def search(self, query: str, num_search_pages: int = None, max_content_words: int = None, max_summary_sentences: int = None) -> Dict[str, Any]: |
| """ |
| Searches Wikipedia for the given query and returns the summary and truncated full content. |
| |
| Args: |
| query (str): The search query. |
| num_search_pages (int): Number of search results to retrieve |
| max_content_words (int): Maximum number of words to include in content, None means no limit |
| max_summary_sentences (int): Maximum number of sentences in the summary, None means no limit |
| |
| Returns: |
| dict: A dictionary with the title, summary, truncated content, and Wikipedia page link. |
| """ |
| num_search_pages = num_search_pages or self.num_search_pages |
| max_content_words = max_content_words or self.max_content_words |
| max_summary_sentences = max_summary_sentences or self.max_summary_sentences |
| |
| try: |
| logger.info(f"Searching wikipedia: {query}, num_results={num_search_pages}, max_content_words={max_content_words}, max_summary_sentences={max_summary_sentences}") |
| |
| search_results = wikipedia.search(query, results=num_search_pages) |
| logger.info(f"Search results: {search_results}") |
| if not search_results: |
| return {"results": [], "error": "No search results found."} |
|
|
| |
| results = [] |
| for title in search_results: |
| try: |
| page = wikipedia.page(title, auto_suggest=False) |
| |
| |
| if max_summary_sentences is not None and max_summary_sentences > 0: |
| summary = wikipedia.summary(title, sentences=max_summary_sentences) |
| else: |
| |
| summary = wikipedia.summary(title) |
|
|
| |
| display_content = self._truncate_content(page.content, max_content_words) |
| |
| results.append({ |
| "title": page.title, |
| "summary": summary, |
| "content": display_content, |
| "url": page.url, |
| }) |
| except wikipedia.exceptions.DisambiguationError: |
| |
| continue |
| except wikipedia.exceptions.PageError: |
| |
| continue |
| |
| |
| return {"results": results, "error": None} |
| |
| except Exception as e: |
| logger.error(f"Error searching Wikipedia: {str(e)}") |
| return {"results": [], "error": str(e)} |
| |
|
|
| class WikipediaSearchTool(Tool): |
| name: str = "wikipedia_search" |
| description: str = "Search Wikipedia for relevant articles and content" |
| inputs: Dict[str, Dict[str, str]] = { |
| "query": { |
| "type": "string", |
| "description": "The search query to look up on Wikipedia" |
| }, |
| "num_search_pages": { |
| "type": "integer", |
| "description": "Number of search results to retrieve. Default: 5" |
| }, |
| "max_content_words": { |
| "type": "integer", |
| "description": "Maximum number of words to include in content per result. None means no limit. Default: None" |
| }, |
| "max_summary_sentences": { |
| "type": "integer", |
| "description": "Maximum number of sentences in the summary. None means no limit. Default: None" |
| } |
| } |
| required: Optional[List[str]] = ["query"] |
| |
| def __init__(self, search_wiki: SearchWiki = None): |
| super().__init__() |
| self.search_wiki = search_wiki |
| |
| def __call__(self, query: str, num_search_pages: int = None, max_content_words: int = None, max_summary_sentences: int = None) -> Dict[str, Any]: |
| """Execute Wikipedia search using the SearchWiki instance.""" |
| if not self.search_wiki: |
| raise RuntimeError("Wikipedia search instance not initialized") |
| |
| try: |
| return self.search_wiki.search(query, num_search_pages, max_content_words, max_summary_sentences) |
| except Exception as e: |
| return {"results": [], "error": f"Error executing Wikipedia search: {str(e)}"} |
|
|
|
|
| class WikipediaSearchToolkit(Toolkit): |
| def __init__( |
| self, |
| name: str = "WikipediaSearchToolkit", |
| num_search_pages: Optional[int] = 5, |
| max_content_words: Optional[int] = None, |
| max_summary_sentences: Optional[int] = None, |
| **kwargs |
| ): |
| |
| search_wiki = SearchWiki( |
| name="SearchWiki", |
| num_search_pages=num_search_pages, |
| max_content_words=max_content_words, |
| max_summary_sentences=max_summary_sentences, |
| **kwargs |
| ) |
| |
| |
| tools = [ |
| WikipediaSearchTool(search_wiki=search_wiki) |
| ] |
| |
| |
| super().__init__(name=name, tools=tools) |
| |
| |
| self.search_wiki = search_wiki |
| |
|
|
|
|