File size: 1,687 Bytes
638efb3
 
 
 
0175efa
f06d29f
 
 
 
638efb3
 
 
0175efa
 
 
638efb3
 
 
 
 
 
 
0175efa
a170004
3877d00
a170004
 
0175efa
 
a170004
0175efa
 
638efb3
 
01e827e
638efb3
0175efa
638efb3
a170004
f06d29f
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import torch
from PIL import Image
import io
import base64
from transformers import GlmOcrProcessor, GlmOcrForConditionalGeneration
from fastapi import FastAPI, Request

app = FastAPI()
handler = EndpointHandler("/repository")

class EndpointHandler():
    def __init__(self, path=""):
        # Native 5.1.0 classes specifically for GLM-OCR
        self.processor = GlmOcrProcessor.from_pretrained(path)
        self.model = GlmOcrForConditionalGeneration.from_pretrained(
            path, 
            device_map="auto", 
            torch_dtype=torch.bfloat16
        )
        self.model.eval()

    def __call__(self, data):
        # Extract base64 image from the 'inputs' field sent by Google Sheets
        inputs_data = data.pop("inputs", data)
        image_bytes = base64.b64decode(inputs_data)
        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
        
        # Bookkeeping prompt - Native formatting
        prompt = "Extract receipt items into JSON: [{date, vendor, description, qty, price, total}]"
        
        # New 5.1.0 process workflow
        inputs = self.processor(images=image, text=prompt, return_tensors="pt").to(self.model.device)
        
        with torch.no_grad():
            generated_ids = self.model.generate(**inputs, max_new_tokens=1024)
        
        # Decode results
        result = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        return [{"generated_text": result}]


@app.post("/")
@app.post("/v1/chat/completions")
async def handle(request: Request):
    data = await request.json()
    return handler(data)

@app.get("/health")
async def health():
    return {"status": "ok"}