| | """ |
| | File: module_ocr.py |
| | Description: Use a vision language model for Optical Character Recognition (OCR) tasks. |
| | Author: Didier Guillevic |
| | Date: 2025-04-06 |
| | """ |
| |
|
| | import gradio as gr |
| | import ocr |
| | import pdf2image |
| | import tempfile |
| | import os |
| |
|
| | |
| | |
| | |
| | def process(input_file: str): |
| | """Process given file with OCR." |
| | """ |
| | return ocr.process_file(input_file) |
| |
|
| | |
| | |
| | |
| | def preview_file(file): |
| | if file is None: |
| | return None, None |
| | |
| | file_path = file.name |
| | file_extension = file_path.lower().split('.')[-1] |
| | |
| | if file_extension in ['jpg', 'jpeg', 'png', 'gif', 'bmp']: |
| | |
| | return file_path, None |
| | |
| | elif file_extension == 'pdf': |
| | |
| | try: |
| | |
| | pages = pdf2image.convert_from_path( |
| | file_path, |
| | first_page=1, |
| | last_page=1, |
| | dpi=150 |
| | ) |
| | |
| | if pages: |
| | |
| | with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_file: |
| | pages[0].save(tmp_file.name, 'PNG') |
| | return tmp_file.name, f"PDF Preview: {os.path.basename(file_path)}" |
| | else: |
| | return None, "<p>Could not convert PDF to image</p>" |
| | |
| | except Exception as e: |
| | return None, f"<p>Error previewing PDF: {str(e)}</p>" |
| | |
| | else: |
| | return None, f"<p>Preview not available for {file_extension} files</p>" |
| |
|
| |
|
| | |
| | |
| | |
| | with gr.Blocks() as demo: |
| |
|
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | input_file = gr.File( |
| | label="Upload a PDF or image file", |
| | file_types=[".pdf", ".jpg", ".jpeg", ".png", ".gif", ".bmp"], |
| | scale=1) |
| | preview_image = gr.Image(label="Preview", show_label=True) |
| | preview_text = gr.HTML(label="Status") |
| | output_text = gr.Textbox(label="OCR output", scale=2) |
| |
|
| | |
| | with gr.Row(): |
| | ocr_btn = gr.Button(value="OCR", variant="primary") |
| | clear_btn = gr.Button("Clear", variant="secondary") |
| | |
| | |
| | with gr.Accordion("Examples", open=False): |
| | examples = gr.Examples( |
| | [ |
| | ['./scanned_doc.pdf',], |
| | ['./passport_jp.png',] |
| | ], |
| | inputs=[input_file,], |
| | outputs=[output_text,], |
| | fn=process, |
| | cache_examples=False, |
| | label="Examples" |
| | ) |
| | |
| | |
| | input_file.change( |
| | fn=preview_file, |
| | inputs=[input_file], |
| | outputs=[preview_image, preview_text] |
| | ) |
| | |
| | |
| | ocr_btn.click( |
| | fn=process, |
| | inputs=[input_file,], |
| | outputs=[output_text,] |
| | ) |
| | clear_btn.click( |
| | fn=lambda : (None, ''), |
| | inputs=[], |
| | outputs=[input_file, output_text] |
| | ) |
| |
|
| | if __name__ == '__main__': |
| | demo.launch() |
| |
|