Spaces:

docparser
/

Text_Captcha_breaker

Paused

App Files Files Community

Text_Captcha_breaker / app.py

docparser

Update app.py

9606b04 verified over 1 year ago

raw

history blame contribute delete

2.1 kB

	import torch
	import onnx
	import onnxruntime as rt
	from torchvision import transforms as T
	from PIL import Image
	from tokenizer_base import Tokenizer
	import pathlib
	import os
	import gradio as gr
	from huggingface_hub import Repository



	model_file = "captcha.onnx"
	img_size = (32,128)
	charset = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{\|}~"
	tokenizer_base = Tokenizer(charset)

	def get_transform(img_size):
	transforms = []
	transforms.extend([
	T.Resize(img_size, T.InterpolationMode.BICUBIC),
	T.ToTensor(),
	T.Normalize(0.5, 0.5)
	])
	return T.Compose(transforms)

	def to_numpy(tensor):
	return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

	def initialize_model(model_file):
	transform = get_transform(img_size)
	# Onnx model loading
	onnx_model = onnx.load(model_file)
	onnx.checker.check_model(onnx_model)
	ort_session = rt.InferenceSession(model_file)
	return transform,ort_session

	def get_text(img_org):
	# img_org = Image.open(image_path)
	# Preprocess. Model expects a batch of images with shape: (B, C, H, W)
	x = transform(img_org.convert('RGB')).unsqueeze(0)

	# compute ONNX Runtime output prediction
	ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
	logits = ort_session.run(None, ort_inputs)[0]
	probs = torch.tensor(logits).softmax(-1)
	preds, probs = tokenizer_base.decode(probs)
	preds = preds[0]
	print(preds)
	return preds

	transform,ort_session = initialize_model(model_file=model_file)

	gr.Interface(
	get_text,
	inputs=gr.Image(type="pil"),
	outputs=gr.Textbox(),
	title="Text Captcha Reader",
	examples=["8000.png","11JW29.png","2a8486.jpg","2nbcx.png",
	"000679.png","000HU.png","00Uga.png.jpg","00bAQwhAZU.jpg",
	"00h57kYf.jpg","0EoHdtVb.png","0JS21.png","0p98z.png","10010.png"]
	).launch()

	# if __name__ == "__main__":
	# image_path = "8000.png"
	# preds,probs = get_text(image_path)
	# print(preds[0])