Spaces:

Intel
/

vacaigent

Sleeping

vacaigent / tools /browser_tools.py

Benjamin Consolvo

init1

78797ac 12 months ago

1.56 kB

	import json

	import requests
	import streamlit as st
	from crewai import Agent, Task
	from langchain.tools import tool
	from unstructured.partition.html import partition_html


	class BrowserTools():

	@tool("Scrape website content")
	def scrape_and_summarize_website(website):
	"""Useful to scrape and summarize a website content"""
	url = f"http://api.scrapingant.com/v2/general?url{website}&x-api-key={st.secrets['SCRAPINGANT_API_KEY']}"
	payload = json.dumps({"url": website})
	headers = {'cache-control': 'no-cache', 'content-type': 'application/json'}
	response = requests.request("GET", url, headers=headers, data=payload)
	elements = partition_html(text=response.text)
	content = "\n\n".join([str(el) for el in elements])
	content = [content[i:i + 8000] for i in range(0, len(content), 8000)]
	summaries = []
	for chunk in content:
	agent = Agent(
	role='Principal Researcher',
	goal=
	'Do amazing researches and summaries based on the content you are working with',
	backstory=
	"You're a Principal Researcher at a big company and you need to do a research about a given topic.",
	allow_delegation=False)
	task = Task(
	agent=agent,
	description=
	f'Analyze and summarize the content bellow, make sure to include the most relevant information in the summary, return only the summary nothing else.\n\nCONTENT\n----------\n{chunk}'
	)
	summary = task.execute()
	summaries.append(summary)
	return "\n\n".join(summaries)