Create model.py

2f81cdc verified 9 months ago

9.7 kB

	import numpy as np
	import pandas as pd
	import requests
	from io import StringIO
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import speech_recognition as sr
	import pyttsx3
	from googlesearch import search
	from bs4 import BeautifulSoup
	import urllib.request
	from urllib.parse import quote

	class HybridChatBot:
	def __init__(self, dataset_url=None):
	self.dataset_url = dataset_url
	self.qa_pairs = {}
	self.vectorizer = TfidfVectorizer()
	self.X = None
	self.recognizer = sr.Recognizer()
	self.engine = pyttsx3.init()

	# Voice engine settings
	voices = self.engine.getProperty('voices')
	self.engine.setProperty('voice', voices[0].id)
	self.engine.setProperty('rate', 150)

	if dataset_url:
	self.load_dataset()
	self.train()

	def load_dataset(self):
	"""Load dataset from web resource"""
	try:
	response = requests.get(self.dataset_url)
	response.raise_for_status()

	if self.dataset_url.endswith('.csv'):
	data = pd.read_csv(StringIO(response.text))
	elif self.dataset_url.endswith('.json'):
	data = pd.read_json(StringIO(response.text))
	else:
	print("File format not supported")
	return

	for _, row in data.iterrows():
	self.qa_pairs[row["question"].lower()] = row["answer"]

	print(f"Loaded {len(self.qa_pairs)} question-answer pairs")

	except Exception as e:
	print(f"Error loading dataset: {e}")

	def train(self):
	"""Train the model on loaded data"""
	if not self.qa_pairs:
	print("No data available for training!")
	return

	questions = list(self.qa_pairs.keys())
	self.X = self.vectorizer.fit_transform(questions)
	print("Model trained on loaded data")

	def add_qa_pair(self, question, answer):
	"""Add new question-answer pair"""
	self.qa_pairs[question.lower()] = answer
	self.train()

	def web_search(self, query, num_results=3):
	"""Perform web search and extract information"""
	try:
	print(f"\nSearching the web: {query}")
	search_results = []

	# Perform Google search
	for url in search(query, num_results=num_results, lang='en'):
	try:
	# Get page content
	req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
	with urllib.request.urlopen(req, timeout=5) as response:
	html = response.read()

	# Parse HTML
	soup = BeautifulSoup(html, 'html.parser')

	# Remove unwanted elements (scripts, styles, etc.)
	for script in soup(["script", "style", "iframe", "nav", "footer"]):
	script.extract()

	# Get page text
	text = soup.get_text(separator=' ', strip=True)
	text = ' '.join(text.split()[:200]) # Take first 200 words

	search_results.append({
	'url': url,
	'content': text
	})

	except Exception as e:
	print(f"Error processing {url}: {e}")
	continue

	return search_results

	except Exception as e:
	print(f"Search error: {e}")
	return None

	def get_response(self, user_input):
	"""Get response to user input"""
	if not self.qa_pairs:
	return "I'm not trained yet. Please add questions and answers."

	# Check if user wants to perform web search
	if "search the web for" in user_input.lower() or "find online" in user_input.lower():
	query = user_input.replace("search the web for", "").replace("find online", "").strip()
	search_results = self.web_search(query)
	if search_results:
	response = "Here's what I found online:\n"
	for i, result in enumerate(search_results, 1):
	response += f"\n{i}. {result['content']}\n(Source: {result['url']})\n"
	return response[:2000] # Limit response length
	else:
	return "Couldn't find any information online."

	# Regular question-answer search
	user_vec = self.vectorizer.transform([user_input.lower()])
	similarities = cosine_similarity(user_vec, self.X)
	best_match_idx = np.argmax(similarities)
	best_match_score = similarities[0, best_match_idx]

	if best_match_score > 0.5:
	best_question = list(self.qa_pairs.keys())[best_match_idx]
	return self.qa_pairs[best_question]
	else:
	return "I don't know the answer to this question. Would you like me to search online? (Say 'search the web for...')"

	def text_to_speech(self, text):
	"""Convert text to speech"""
	self.engine.say(text)
	self.engine.runAndWait()

	def speech_to_text(self):
	"""Convert speech from microphone to text"""
	with sr.Microphone() as source:
	print("\nSpeak now...")
	self.recognizer.adjust_for_ambient_noise(source)
	try:
	audio = self.recognizer.listen(source, timeout=5)
	text = self.recognizer.recognize_google(audio, language="en-US")
	print(f"Recognized: {text}")
	return text
	except sr.UnknownValueError:
	print("Speech not recognized")
	return None
	except sr.RequestError:
	print("Recognition service error")
	return None
	except sr.WaitTimeoutError:
	print("Timeout expired")
	return None

	def run(self):
	"""Improved interaction interface"""
	print("\n" + "="*50)
	print("WELCOME TO INTELLIGENT CHATBOT".center(50))
	print("="*50)

	current_mode = "text"
	while True:
	print("\n" + "-"*50)
	print(f"Current input mode: {current_mode.upper()}")
	print("[1] Send text message")
	print("[2] Speak to the bot")
	print("[3] Switch input mode")
	print("[4] Teach the bot a new answer")
	print("[5] Web search")
	print("[6] Exit")

	try:
	choice = input("Choose action (1-6): ").strip()

	if choice == "1":
	user_input = input("\nYour message: ")
	if user_input.lower() in ["exit", "stop"]:
	break

	response = self.get_response(user_input)
	if response:
	print(f"\nBot: {response}")
	self.text_to_speech(response)
	else:
	print("\nBot: I don't know what to say. Would you like to teach me?")

	elif choice == "2":
	user_input = self.speech_to_text()
	if user_input:
	if user_input.lower() in ["exit", "stop"]:
	break

	response = self.get_response(user_input)
	if response:
	print(f"\nBot: {response}")
	self.text_to_speech(response)
	else:
	print("\nBot: I don't know how to respond to that.")
	self.text_to_speech("I don't know how to respond to that")

	elif choice == "3":
	current_mode = "voice" if current_mode == "text" else "text"
	print(f"\nMode changed to: {current_mode.upper()}")

	elif choice == "4":
	print("\nTeaching the bot:")
	question = input("Enter question: ")
	answer = input("Enter answer: ")
	self.add_qa_pair(question, answer)
	print("Bot successfully trained!")

	elif choice == "5":
	query = input("\nEnter search query: ")
	search_results = self.web_search(query)
	if search_results:
	print("\nSearch results:")
	for i, result in enumerate(search_results, 1):
	print(f"\n{i}. {result['content']}\n(Source: {result['url']})\n")
	else:
	print("\nNothing found.")

	elif choice == "6":
	print("\nShutting down...")
	break

	else:
	print("\nPlease choose an option between 1 and 6")

	except KeyboardInterrupt:
	print("\nShutting down...")
	break

	if __name__ == "__main__":

	DATASET_URL = "https://raw.githubusercontent.com/user/repo/main/qa_data.csv"

	bot = HybridChatBot(DATASET_URL)
	bot.run()