| | """ |
| | PaddleOCR Engine |
| | |
| | High-accuracy OCR using PaddleOCR. |
| | Supports detection, recognition, and angle classification. |
| | """ |
| |
|
| | import time |
| | from typing import List, Optional, Tuple |
| | import numpy as np |
| | from loguru import logger |
| |
|
| | from .base import OCREngine, OCRConfig, OCRResult |
| | from ..schemas.core import BoundingBox, OCRRegion |
| |
|
| | |
| | try: |
| | from paddleocr import PaddleOCR |
| | HAS_PADDLEOCR = True |
| | except ImportError: |
| | HAS_PADDLEOCR = False |
| | logger.warning( |
| | "PaddleOCR not installed. Install with: " |
| | "pip install paddleocr paddlepaddle-gpu (or paddlepaddle for CPU)" |
| | ) |
| |
|
| |
|
| | class PaddleOCREngine(OCREngine): |
| | """ |
| | OCR engine using PaddleOCR. |
| | |
| | Features: |
| | - High accuracy text detection and recognition |
| | - Multi-language support |
| | - GPU acceleration |
| | - Angle classification for rotated text |
| | """ |
| |
|
| | |
| | LANGUAGE_MAP = { |
| | "en": "en", |
| | "ch": "ch", |
| | "chinese_cht": "chinese_cht", |
| | "fr": "french", |
| | "german": "german", |
| | "es": "es", |
| | "it": "it", |
| | "pt": "pt", |
| | "ru": "ru", |
| | "japan": "japan", |
| | "korean": "korean", |
| | "ar": "ar", |
| | "hi": "hi", |
| | "latin": "latin", |
| | } |
| |
|
| | def __init__(self, config: Optional[OCRConfig] = None): |
| | """Initialize PaddleOCR engine.""" |
| | super().__init__(config) |
| | self._ocr: Optional[PaddleOCR] = None |
| |
|
| | def initialize(self): |
| | """Initialize PaddleOCR model.""" |
| | if not HAS_PADDLEOCR: |
| | raise RuntimeError( |
| | "PaddleOCR not installed. Install with: " |
| | "pip install paddleocr paddlepaddle-gpu" |
| | ) |
| |
|
| | if self._initialized: |
| | return |
| |
|
| | logger.info("Initializing PaddleOCR engine...") |
| |
|
| | |
| | lang = self.config.languages[0] if self.config.languages else "en" |
| | paddle_lang = self.LANGUAGE_MAP.get(lang, "en") |
| |
|
| | try: |
| | self._ocr = PaddleOCR( |
| | use_angle_cls=self.config.use_angle_cls, |
| | lang=paddle_lang, |
| | use_gpu=self.config.use_gpu, |
| | gpu_mem=500, |
| | det_db_thresh=self.config.det_db_thresh, |
| | det_db_box_thresh=self.config.det_db_box_thresh, |
| | rec_batch_num=self.config.rec_batch_num, |
| | drop_score=self.config.drop_score, |
| | show_log=False, |
| | ) |
| | self._initialized = True |
| | logger.info(f"PaddleOCR initialized (lang={paddle_lang}, gpu={self.config.use_gpu})") |
| |
|
| | except Exception as e: |
| | logger.error(f"Failed to initialize PaddleOCR: {e}") |
| | raise |
| |
|
| | def recognize( |
| | self, |
| | image: np.ndarray, |
| | page_number: int = 0, |
| | ) -> OCRResult: |
| | """ |
| | Perform OCR on an image using PaddleOCR. |
| | |
| | Args: |
| | image: Image as numpy array (RGB, HWC format) |
| | page_number: Page number for multi-page documents |
| | |
| | Returns: |
| | OCRResult with recognized text and regions |
| | """ |
| | if not self._initialized: |
| | self.initialize() |
| |
|
| | start_time = time.time() |
| |
|
| | try: |
| | |
| | results = self._ocr.ocr(image, cls=self.config.use_angle_cls) |
| |
|
| | |
| | regions = [] |
| | all_texts = [] |
| | total_confidence = 0.0 |
| |
|
| | |
| | if results and results[0]: |
| | for idx, line in enumerate(results[0]): |
| | if line is None: |
| | continue |
| |
|
| | box_points = line[0] |
| | text, confidence = line[1] |
| |
|
| | |
| | if confidence < self.config.min_confidence: |
| | continue |
| |
|
| | |
| | bbox = self._polygon_to_bbox(box_points, image.shape[:2]) |
| |
|
| | |
| | polygon = [(float(p[0]), float(p[1])) for p in box_points] |
| |
|
| | region = OCRRegion( |
| | text=text, |
| | confidence=float(confidence), |
| | bbox=bbox, |
| | polygon=polygon, |
| | page=page_number, |
| | line_id=idx, |
| | engine="paddleocr", |
| | ) |
| | regions.append(region) |
| | all_texts.append(text) |
| | total_confidence += confidence |
| |
|
| | processing_time = (time.time() - start_time) * 1000 |
| |
|
| | return OCRResult( |
| | regions=regions, |
| | full_text="\n".join(all_texts), |
| | confidence_avg=total_confidence / len(regions) if regions else 0.0, |
| | processing_time_ms=processing_time, |
| | engine="paddleocr", |
| | success=True, |
| | ) |
| |
|
| | except Exception as e: |
| | logger.error(f"PaddleOCR recognition failed: {e}") |
| | return OCRResult( |
| | regions=[], |
| | full_text="", |
| | confidence_avg=0.0, |
| | processing_time_ms=(time.time() - start_time) * 1000, |
| | engine="paddleocr", |
| | success=False, |
| | error=str(e), |
| | ) |
| |
|
| | def _polygon_to_bbox( |
| | self, |
| | points: List[List[float]], |
| | image_shape: Tuple[int, int], |
| | ) -> BoundingBox: |
| | """Convert polygon points to bounding box.""" |
| | x_coords = [p[0] for p in points] |
| | y_coords = [p[1] for p in points] |
| |
|
| | height, width = image_shape |
| |
|
| | return BoundingBox( |
| | x_min=max(0, min(x_coords)), |
| | y_min=max(0, min(y_coords)), |
| | x_max=min(width, max(x_coords)), |
| | y_max=min(height, max(y_coords)), |
| | normalized=False, |
| | page_width=width, |
| | page_height=height, |
| | ) |
| |
|
| | def get_supported_languages(self) -> List[str]: |
| | """Return list of supported language codes.""" |
| | return list(self.LANGUAGE_MAP.keys()) |
| |
|
| | def recognize_with_structure( |
| | self, |
| | image: np.ndarray, |
| | page_number: int = 0, |
| | ) -> Tuple[OCRResult, Optional[dict]]: |
| | """ |
| | Perform OCR with structure analysis (tables, layout). |
| | |
| | Args: |
| | image: Image as numpy array |
| | page_number: Page number |
| | |
| | Returns: |
| | Tuple of (OCRResult, structure_info) |
| | """ |
| | |
| | ocr_result = self.recognize(image, page_number) |
| |
|
| | |
| | |
| | structure_info = None |
| |
|
| | return ocr_result, structure_info |
| |
|