ds4sd-CodeFormula-onnx / CodeFormula.yaml

mac

Initial release: Docling CodeFormula ONNX models with JPQD quantization

41bd4f5 7 months ago

2.54 kB

	name: CodeFormula_jpqd
	description: CodeFormula vision-language model for code and formula recognition, optimized with JPQD quantization
	framework: ONNX
	task: image-to-text
	domain: multimodal
	subdomain: vision-language

	model_info:
	architecture: Vision-Language Transformer
	paper: "Docling Technical Report"
	paper_url: "https://arxiv.org/abs/2408.09869"
	original_source: DS4SD CodeFormula
	original_repo: "https://huggingface.co/ds4sd/CodeFormula"
	optimization: JPQD quantization

	specifications:
	input_shape: [1, 10]
	input_type: int64
	input_format: Token sequences
	output_shape: [1, 10, 50827]
	output_type: float32
	vocabulary_size: 50827
	sequence_length: 10
	batch_size: dynamic

	performance:
	original_size_gb: "~2+" # Estimated original size
	optimized_size_mb: 526.19
	compression_ratio: "~4x"
	inference_time_cpu_ms: 6.6
	throughput_fps: ~150
	accuracy_retention: ">95%"

	deployment:
	runtime: onnxruntime
	hardware: CPU-optimized
	precision: INT8 weights, FP32 activations
	memory_usage_gb: ~1

	usage:
	preprocessing:
	- Load image at 120 DPI resolution
	- Resize and enhance image quality
	- Convert to token sequence input
	postprocessing:
	- Decode logits to token IDs
	- Convert tokens to text
	- Apply language-specific formatting

	capabilities:
	code_recognition:
	- Multi-language programming code
	- Indentation preservation
	- Syntax highlighting support
	- Output format: "<_language_> code_content"
	formula_recognition:
	- Mathematical expressions
	- Scientific notation
	- Chemical formulas
	- Output format: LaTeX code

	supported_languages:
	programming:
	- Python
	- Java
	- JavaScript
	- C/C++
	- Go
	- Rust
	- And many more
	markup:
	- LaTeX (mathematical formulas)
	- Chemical notation
	- Scientific expressions

	applications:
	- Document digitization
	- Educational content processing
	- Code plagiarism detection
	- Mathematical problem solving
	- Technical documentation conversion
	- Research paper processing

	benchmarks:
	accuracy: ">95% code recognition accuracy"
	speed: "150 FPS on modern CPUs"
	memory: "Efficient 1GB memory usage"

	training_data:
	type: "Code snippets and mathematical formulas"
	resolution: "120 DPI images"
	diversity: "Multiple programming languages and notation systems"

	license: mit
	tags:
	- code-recognition
	- formula-recognition
	- vision-language
	- multimodal
	- ocr
	- latex
	- onnx
	- quantized
	- jpqd
	- programming-languages