| import streamlit as st |
| import requests |
| from PIL import Image |
|
|
| st.title("OCR Extraction Client") |
| st.write( |
| """ |
| This app lets you upload a PDF or image file. The file is sent to a FastAPI endpoint for OCR extraction, |
| and then the extracted text is returned as a Markdown file. |
| """ |
| ) |
|
|
| |
| st.sidebar.header("Upload Document") |
| uploaded_file = st.sidebar.file_uploader("Upload a PDF or image file", type=["pdf", "png", "jpg", "jpeg", "webp"]) |
| process_button = st.sidebar.button("Process Document") |
|
|
| if uploaded_file is not None and process_button: |
| st.info(f"Processing file: **{uploaded_file.name}**") |
| |
| |
| if uploaded_file.name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')): |
| try: |
| image = Image.open(uploaded_file) |
| st.image(image, caption="Uploaded Image", use_column_width=True) |
| except Exception as e: |
| st.error(f"Error displaying image: {e}") |
| |
| with st.spinner("Sending file to OCR service..."): |
| |
| files = {"file": (uploaded_file.name, uploaded_file.getvalue(), uploaded_file.type)} |
| api_url = "https://hammad712-urdu-ocr-app.hf.space/upload" |
| response = requests.post(api_url, files=files) |
| |
| if response.status_code == 200: |
| st.success("OCR extraction complete!") |
| md_content = response.content.decode("utf-8") |
| |
| |
| if uploaded_file.name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')): |
| st.markdown("### Extracted Text from Image") |
| st.markdown(md_content) |
| else: |
| st.markdown("### Extracted Markdown Text") |
| st.markdown(md_content) |
| |
| st.download_button( |
| label="Download Markdown File", |
| data=md_content, |
| file_name="output.md", |
| mime="text/markdown" |
| ) |
| else: |
| st.error(f"Error: {response.status_code} {response.text}") |
|
|