washeed
/

ocr

Model card Files Files and versions

ocr / ocr.py

washeed's picture

Upload 18 files

b692870 verified almost 2 years ago

history blame contribute delete

1.35 kB

	import easyocr
	import cv2
	import os


	def extract_text_from_image(image_path, language='en'):
	"""
	Extracts text from an image using EasyOCR.

	Args:
	image_path (str): Path to the image file.
	language (str, optional): Language(s) to be recognized. Defaults to 'en' (English).

	Returns:
	list: List of recognized text strings.
	"""

	reader = easyocr.Reader([language])
	reader.detector = reader.initDetector('best\BEST.pth')

	image = cv2.imread(image_path)
	result = reader.readtext(image, detail=0) # Extract only recognized texts

	return result


	if __name__ == '__main__':
	# Define the folder path containing images
	folder_path = "inference_results\Anil Maheshwari - Data analytics-McGraw-Hill Education (2017)"

	# Create an empty string to store all concatenated text
	all_extracted_text = ""

	# Loop through all files in the folder
	for filename in os.listdir(folder_path):
	if filename.endswith(".jpg") or filename.endswith(".png"):
	image_path = os.path.join(folder_path, filename)

	# Extract text for current image
	extracted_text = extract_text_from_image(image_path)

	# Concatenate extracted text with a newline character
	all_extracted_text += "\n".join(extracted_text) + "\n\n" # Add double newlines for separation