#!/usr/bin/env python3 """ Diagnostic: print paragraphs of target clauses from a TS DOCX with indices and styles. Usage: python3 map_sections.py "11.1.22.3.2" "14.5.6" """ import sys from docx import Document def main(): if len(sys.argv) < 3: print("Usage: map_sections.py [clause2 ...]") sys.exit(1) ts_path = sys.argv[1] clauses = sys.argv[2:] doc = Document(ts_path) in_section = None for i, para in enumerate(doc.paragraphs): text = para.text.strip() style = para.style.name matched = False for clause in clauses: if clause in text and ('Heading' in style or 'heading' in style.lower()): in_section = clause print(f'\n=== [{i}] SECTION {clause} | style={style!r} ===') print(f' [{i}] style={style!r:16s} | "{text}"') matched = True break if not matched and in_section: if 'Heading' in style and text: print(f' --- section ends at [{i}] style={style!r}: "{text[:60]}"') in_section = None elif text: print(f' [{i}] style={style!r:16s} | "{text[:100]}"') if __name__ == '__main__': main()