Spaces:

HussainM899
/

AG_News_Text_Classifier_Using_FAST_API

Sleeping

App Files Files Community

AG_News_Text_Classifier_Using_FAST_API / app.py

HussainM899

Create app.py

7261c03 verified almost 2 years ago

raw

history blame contribute delete

3.27 kB

	# -- coding: utf-8 --
	"""Text_Classification_Model_Deployment.ipynb
	Automatically generated by Colaboratory.
	Original file is located at
	https://colab.research.google.com/drive/16FpeDQ0i5k_mttZZgxLDHVOMEd-6qGRU
	# Text Classification Model Deployment using FastAPI and Gradio
	"""

	"""- ### Importing Libraries"""

	# Basic imports for data manipulation and visualization
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns

	# scikit-learn imports for model loading and possibly preprocessing
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler, LabelEncoder
	from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

	# Joblib or Pickle for loading your trained model
	import joblib
	import pickle

	import os
	print("Current Working Directory: ", os.getcwd())
	# List files in the current directory
	print("Files in Current Directory: ", os.listdir('.'))

	import nltk
	nltk.download('punkt')
	nltk.download('wordnet')
	nltk.download('omw-1.4')
	nltk.download('stopwords')

	# Import the necessary libraries for preprocessing and deployment
	import re
	import joblib
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer
	from sklearn.feature_extraction.text import TfidfVectorizer
	from fastapi import FastAPI

	# Define the custom function for text cleaning
	def clean_text(text):
	# Remove HTML tags
	text = re.sub(r'<.*?>', '', text)
	# Remove non-alphabetic characters and lowercase the text
	text = re.sub(r'[^a-zA-Z\s]', '', text, re.I\|re.A).lower()
	# Tokenization
	tokens = text.split()
	# Remove stopwords and lemmatize
	lemmatizer = WordNetLemmatizer()
	stop_words = set(stopwords.words('english'))
	tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
	return ' '.join(tokens)

	# Load your trained model
	model = joblib.load('text_classification_LR_model (1).joblib')

	# Load the TF-IDF vectorizer
	tfidf_vectorizer = joblib.load('tfidf_vectorizer.joblib')

	# Preprocessing function for input text
	def preprocess(input_text):
	# Apply text cleaning
	input_text = clean_text(input_text)
	input_text = [input_text]
	# Transform input text using TF-IDF vectorizer
	input_text = tfidf_vectorizer.transform(input_text)
	return input_text

	# Predict the class for the input text
	def predict_class(input_text):
	input_text = preprocess(input_text)
	prediction = model.predict(input_text)
	classes = ['World', 'Sports', 'Business', 'Sci/Tech']
	predicted_class = classes[prediction[0]]
	return predicted_class

	# FastAPI app
	app = FastAPI()

	@app.get('/')
	async def welcome():
	return "Welcome to the Text Classification API"

	@app.post('/classify_text')
	async def classify_text(input_text: str):
	prediction = predict_class(input_text)
	return {"classification": prediction}

	import gradio as gr

	# Create Gradio interface
	iface = gr.Interface(fn=predict_class,
	inputs="text",
	outputs="text",
	title="Text Classification API",
	description="Enter text to classify it into categories: World, Sports, Business, Sci/Tech.")
	iface.launch()