Spaces:

Hammad712
/

OCR-APP

Sleeping

App Files Files Community

OCR-APP / app.py

Hammad712

Update app.py

245632e verified about 1 year ago

raw

history blame contribute delete

2.1 kB

	import streamlit as st
	import requests
	from PIL import Image

	st.title("OCR Extraction Client")
	st.write(
	"""
	This app lets you upload a PDF or image file. The file is sent to a FastAPI endpoint for OCR extraction,
	and then the extracted text is returned as a Markdown file.
	"""
	)

	# Sidebar for uploading the document and processing
	st.sidebar.header("Upload Document")
	uploaded_file = st.sidebar.file_uploader("Upload a PDF or image file", type=["pdf", "png", "jpg", "jpeg", "webp"])
	process_button = st.sidebar.button("Process Document")

	if uploaded_file is not None and process_button:
	st.info(f"Processing file: {uploaded_file.name}")

	# If the file is an image, display it
	if uploaded_file.name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
	try:
	image = Image.open(uploaded_file)
	st.image(image, caption="Uploaded Image", use_column_width=True)
	except Exception as e:
	st.error(f"Error displaying image: {e}")

	with st.spinner("Sending file to OCR service..."):
	# Prepare the file payload using getvalue() to obtain file bytes.
	files = {"file": (uploaded_file.name, uploaded_file.getvalue(), uploaded_file.type)}
	api_url = "https://hammad712-urdu-ocr-app.hf.space/upload"
	response = requests.post(api_url, files=files)

	if response.status_code == 200:
	st.success("OCR extraction complete!")
	md_content = response.content.decode("utf-8")

	# Display output based on file type.
	if uploaded_file.name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
	st.markdown("### Extracted Text from Image")
	st.markdown(md_content)
	else:
	st.markdown("### Extracted Markdown Text")
	st.markdown(md_content)

	st.download_button(
	label="Download Markdown File",
	data=md_content,
	file_name="output.md",
	mime="text/markdown"
	)
	else:
	st.error(f"Error: {response.status_code} {response.text}")