Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Diagnostic: print paragraphs of target clauses from a TS DOCX with indices and styles. | |
| Usage: | |
| python3 map_sections.py <ts_path.docx> "11.1.22.3.2" "14.5.6" | |
| """ | |
| import sys | |
| from docx import Document | |
| def main(): | |
| if len(sys.argv) < 3: | |
| print("Usage: map_sections.py <ts.docx> <clause1> [clause2 ...]") | |
| sys.exit(1) | |
| ts_path = sys.argv[1] | |
| clauses = sys.argv[2:] | |
| doc = Document(ts_path) | |
| in_section = None | |
| for i, para in enumerate(doc.paragraphs): | |
| text = para.text.strip() | |
| style = para.style.name | |
| matched = False | |
| for clause in clauses: | |
| if clause in text and ('Heading' in style or 'heading' in style.lower()): | |
| in_section = clause | |
| print(f'\n=== [{i}] SECTION {clause} | style={style!r} ===') | |
| print(f' [{i}] style={style!r:16s} | "{text}"') | |
| matched = True | |
| break | |
| if not matched and in_section: | |
| if 'Heading' in style and text: | |
| print(f' --- section ends at [{i}] style={style!r}: "{text[:60]}"') | |
| in_section = None | |
| elif text: | |
| print(f' [{i}] style={style!r:16s} | "{text[:100]}"') | |
| if __name__ == '__main__': | |
| main() | |