ApplyCRs / scripts /map_sections.py
heymenn's picture
init
7eedaf8
#!/usr/bin/env python3
"""
Diagnostic: print paragraphs of target clauses from a TS DOCX with indices and styles.
Usage:
python3 map_sections.py <ts_path.docx> "11.1.22.3.2" "14.5.6"
"""
import sys
from docx import Document
def main():
if len(sys.argv) < 3:
print("Usage: map_sections.py <ts.docx> <clause1> [clause2 ...]")
sys.exit(1)
ts_path = sys.argv[1]
clauses = sys.argv[2:]
doc = Document(ts_path)
in_section = None
for i, para in enumerate(doc.paragraphs):
text = para.text.strip()
style = para.style.name
matched = False
for clause in clauses:
if clause in text and ('Heading' in style or 'heading' in style.lower()):
in_section = clause
print(f'\n=== [{i}] SECTION {clause} | style={style!r} ===')
print(f' [{i}] style={style!r:16s} | "{text}"')
matched = True
break
if not matched and in_section:
if 'Heading' in style and text:
print(f' --- section ends at [{i}] style={style!r}: "{text[:60]}"')
in_section = None
elif text:
print(f' [{i}] style={style!r:16s} | "{text[:100]}"')
if __name__ == '__main__':
main()