Spaces:

JatinAutonomousLabs
/

Excel_AI_Assistant

Paused

Upload 4 files

9d08bab verified 6 months ago

879 Bytes

	#!/usr/bin/env python3
	"""Text Processing Plugin"""
	import re
	from typing import List

	class TextProcessor:
	"""Clean and process text data."""
	def clean_text(self, text: str) -> str:
	"""Remove extra whitespace, special chars."""
	text = re.sub(r'\s+', ' ', text) # Multiple spaces to one
	text = text.strip()
	return text

	def extract_emails(self, text: str) -> List[str]:
	"""Extract email addresses from text."""
	pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
	return re.findall(pattern, text)

	def extract_urls(self, text: str) -> List[str]:
	"""Extract URLs from text."""
	pattern = r'https?://[^\s]+'
	return re.findall(pattern, text)

	def tokenize(self, text: str) -> List[str]:
	"""Simple word tokenization."""
	return text.lower().split()