| |
|
|
| import base64 |
| import json |
| from pathlib import Path |
| import gradio as gr |
| from openai import OpenAI |
|
|
| API_KEY = "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" |
| MODEL = "gpt-5.1" |
|
|
| client = OpenAI(api_key=API_KEY) |
|
|
|
|
| def upload_pdf(path): |
| return client.files.create(file=open(path, "rb"), purpose="assistants").id |
|
|
|
|
| |
| def prompt(): |
| return ( |
| "Extract structured JSON from the attached logistics document. Return ONLY valid JSON.\n" |
| "{\n" |
| " \"po_number\": string|null,\n" |
| " \"ship_from_name\": string|null,\n" |
| " \"ship_from_email\": string|null,\n" |
| " \"carrier_type\": string|null,\n" |
| " \"rail_car_number\": string|null,\n" |
| " \"total_quantity\": number|null,\n" |
| " \"inventories\": [\n" |
| " {\n" |
| " \"productName\": string|null,\n" |
| " \"productCode\": string|null,\n" |
| " \"variants\": [\n" |
| " {\n" |
| " \"dimensions\": string|null,\n" |
| " \"pcs_per_pkg\": number|null,\n" |
| " \"length_ft\": number|null,\n" |
| " \"width\": number|null,\n" |
| " \"packages\": number|null,\n" |
| " \"pieces\": number|null,\n" |
| " \"fbm\": number|string|null\n" |
| " }\n" |
| " ],\n" |
| " \"total_pcs\": number|null,\n" |
| " \"total_fbm\": number|string|null\n" |
| " }\n" |
| " ],\n" |
| " \"custom_fields\": {}\n" |
| "}\n\n" |
| "SHIP FROM RULES:\n" |
| "- If explicit fields like 'Origin', 'Ship From' exist, extract that value.\n" |
| "- If the document is an email-style inbound notice (header block) and shows:\n" |
| " From: Name <email>\n" |
| " then ship_from_name = Name, ship_from_email = email.\n" |
| "- If only an email exists and no human name, set both fields to that email.\n" |
| "- If both Origin and an email sender exist, use Origin for ship_from_name and still capture the email under ship_from_email.\n" |
| "- Priority: Origin β Email Name β Mill β Sender block β null.\n\n" |
| "CARRIER / EQUIPMENT RULE:\n" |
| "- If the table contains:\n" |
| " Equipment id = <value>\n" |
| " Mark = <value>\n" |
| " then ALWAYS treat 'Equipment id' as the railcar number.\n" |
| "- NEVER use 'Mark' as railcar number.\n" |
| "- Carrier type must match the carrier text exactly (e.g., CHICAGO RAIL LINK).\n\n" |
| "INVENTORY RULES:\n" |
| "- Do not merge length groups. Each unique length or dimension is its own variant.\n" |
| "- Extract pcs_per_pkg, packages, pieces, fbm exactly as written.\n" |
| "- total_pcs = sum of pieces.\n" |
| "- total_fbm = sum of fbm.\n\n" |
| "TOTAL QUANTITY RULE:\n" |
| "- Use explicit totals if they appear.\n" |
| "- If no explicit total quantity appears, leave null.\n\n" |
| "CUSTOM FIELDS RULE:\n" |
| "- Capture all meaningful leftover fields not part of main schema.\n\n" |
| "Return ONLY the JSON." |
| ) |
|
|
|
|
|
|
| |
| def extract(path): |
| suffix = Path(path).suffix.lower() |
|
|
| if suffix == ".pdf": |
| fid = upload_pdf(path) |
| content = [ |
| {"type": "text", "text": prompt()}, |
| {"type": "file", "file": {"file_id": fid}} |
| ] |
| else: |
| b64 = base64.b64encode(Path(path).read_bytes()).decode() |
| ext = suffix[1:] |
| content = [ |
| {"type": "text", "text": prompt()}, |
| {"type": "image_url", "image_url": {"url": f"data:image/{ext};base64,{b64}"}} |
| ] |
|
|
| r = client.chat.completions.create( |
| model=MODEL, |
| messages=[{"role": "user", "content": content}] |
| ) |
|
|
| text = r.choices[0].message.content |
| return text[text.find("{"): text.rfind("}") + 1] |
|
|
|
|
| def ui(image_input, pdf_input): |
| if image_input: |
| return extract(image_input) |
| if pdf_input: |
| return extract(pdf_input.name) |
| return "{}" |
|
|
|
|
| |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown("# **Logistics OCR Data Extractor (GPT-5.1)**") |
|
|
| with gr.Row(): |
| img = gr.Image(label="Upload Image", type="filepath") |
| pdf = gr.File(label="Upload PDF", file_types=["pdf"]) |
|
|
| out = gr.JSON(label="Extracted JSON") |
| btn = gr.Button("Submit") |
|
|
| btn.click(fn=ui, inputs=[img, pdf], outputs=out) |
|
|
| gr.Examples( |
| examples=[ |
| ["IMG_0001.jpg", None], |
| ["IMG_0002.jpg", None] |
| ], |
| inputs=[img, pdf], |
| label="Sample Images" |
| ) |
|
|
| demo.launch(share=True) |
|
|