| """ |
| Cross-reference checker. |
| |
| Validates that: |
| - All figures and tables are referenced in text |
| - All labels have corresponding references |
| - Appendix sections are referenced in main text |
| """ |
| import re |
| from typing import List, Set, Tuple |
|
|
| from .base import BaseChecker, CheckResult, CheckSeverity |
|
|
|
|
| class ReferenceChecker(BaseChecker): |
| """Check cross-reference integrity in the document.""" |
| |
| name = "reference" |
| display_name = "Cross-References" |
| description = "Verify all figures, tables, and sections are properly referenced" |
| |
| |
| LABEL_PATTERN = re.compile(r'\\label\{([^}]+)\}') |
| |
| |
| REF_PATTERNS = [ |
| re.compile(r'\\ref\{([^}]+)\}'), |
| re.compile(r'\\autoref\{([^}]+)\}'), |
| re.compile(r'\\cref\{([^}]+)\}'), |
| re.compile(r'\\Cref\{([^}]+)\}'), |
| re.compile(r'\\eqref\{([^}]+)\}'), |
| re.compile(r'\\pageref\{([^}]+)\}'), |
| re.compile(r'\\nameref\{([^}]+)\}'), |
| re.compile(r'\\Sref\{([^}]+)\}'), |
| ] |
| |
| |
| APPENDIX_START_PATTERN = re.compile(r'\\appendix\b|\\begin\{appendix\}') |
| SECTION_PATTERN = re.compile(r'\\section\*?\{([^}]+)\}') |
| |
| def check(self, tex_content: str, config: dict = None) -> List[CheckResult]: |
| results = [] |
| |
| |
| labels = self._extract_labels(tex_content) |
| |
| |
| references = self._extract_references(tex_content) |
| |
| |
| for label, (line_num, line_content) in labels.items(): |
| if label not in references: |
| |
| severity = self._get_severity_for_label(label) |
| label_type = self._get_label_type(label) |
| |
| results.append(self._create_result( |
| passed=False, |
| severity=severity, |
| message=f"Unreferenced {label_type}: '{label}'", |
| line_number=line_num, |
| line_content=line_content, |
| suggestion=f"Add \\ref{{{label}}} or \\autoref{{{label}}} where appropriate" |
| )) |
| |
| |
| for ref, (line_num, line_content) in references.items(): |
| if ref not in labels: |
| results.append(self._create_result( |
| passed=False, |
| severity=CheckSeverity.ERROR, |
| message=f"Reference to undefined label: '{ref}'", |
| line_number=line_num, |
| line_content=line_content, |
| suggestion=f"Add \\label{{{ref}}} to the target element or fix the reference" |
| )) |
| |
| |
| appendix_results = self._check_appendix_references(tex_content, labels, references) |
| results.extend(appendix_results) |
| |
| return results |
| |
| def _extract_labels(self, content: str) -> dict: |
| """Extract all labels with their line numbers.""" |
| labels = {} |
| for match in self.LABEL_PATTERN.finditer(content): |
| if not self._is_commented(content, match.start()): |
| label = match.group(1) |
| line_num = self._find_line_number(content, match.start()) |
| line_content = self._get_line_content(content, line_num) |
| labels[label] = (line_num, line_content) |
| return labels |
| |
| def _extract_references(self, content: str) -> dict: |
| """Extract all references with their line numbers.""" |
| references = {} |
| for pattern in self.REF_PATTERNS: |
| for match in pattern.finditer(content): |
| if not self._is_commented(content, match.start()): |
| |
| refs_str = match.group(1) |
| for ref in refs_str.split(','): |
| ref = ref.strip() |
| if ref and ref not in references: |
| |
| if ref.startswith('#') and len(ref) == 2 and ref[1].isdigit(): |
| continue |
| |
| |
| line_num = self._find_line_number(content, match.start()) |
| line_content = self._get_line_content(content, line_num) |
| if re.search(r'\\(new|renew|provide)command', line_content): |
| continue |
| |
| references[ref] = (line_num, line_content) |
| return references |
| |
| def _get_label_type(self, label: str) -> str: |
| """Determine the type of a label based on its prefix.""" |
| if ':' in label: |
| prefix = label.split(':')[0].lower() |
| type_map = { |
| 'fig': 'figure', |
| 'tab': 'table', |
| 'sec': 'section', |
| 'eq': 'equation', |
| 'alg': 'algorithm', |
| 'lst': 'listing', |
| 'app': 'appendix', |
| } |
| return type_map.get(prefix, 'label') |
| return 'label' |
| |
| def _get_severity_for_label(self, label: str) -> CheckSeverity: |
| """Determine severity based on label type.""" |
| label_type = self._get_label_type(label) |
| |
| |
| if label_type in ('figure', 'table'): |
| return CheckSeverity.WARNING |
| |
| |
| if label_type == 'equation': |
| return CheckSeverity.INFO |
| |
| return CheckSeverity.WARNING |
| |
| def _check_appendix_references( |
| self, |
| content: str, |
| labels: dict, |
| references: dict |
| ) -> List[CheckResult]: |
| """Check that appendix sections are referenced in main text.""" |
| results = [] |
| |
| |
| appendix_match = self.APPENDIX_START_PATTERN.search(content) |
| if not appendix_match: |
| return results |
| |
| appendix_start = appendix_match.start() |
| main_content = content[:appendix_start] |
| appendix_content = content[appendix_start:] |
| |
| |
| for match in self.LABEL_PATTERN.finditer(appendix_content): |
| if self._is_commented(appendix_content, match.start()): |
| continue |
| |
| label = match.group(1) |
| |
| |
| if 'sec' in label.lower() or 'app' in label.lower(): |
| |
| is_referenced = False |
| for pattern in self.REF_PATTERNS: |
| if pattern.search(main_content) and label in main_content: |
| for m in pattern.finditer(main_content): |
| if label in m.group(1): |
| is_referenced = True |
| break |
| if is_referenced: |
| break |
| |
| if not is_referenced: |
| line_num = self._find_line_number(content, appendix_start + match.start()) |
| results.append(self._create_result( |
| passed=False, |
| severity=CheckSeverity.WARNING, |
| message=f"Appendix section '{label}' is not referenced in main text", |
| line_number=line_num, |
| suggestion="Add a reference to this appendix section in the main text" |
| )) |
| |
| return results |
|
|