Spaces:
Sleeping
Sleeping
File size: 1,290 Bytes
7eedaf8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | #!/usr/bin/env python3
"""
Diagnostic: print paragraphs of target clauses from a TS DOCX with indices and styles.
Usage:
python3 map_sections.py <ts_path.docx> "11.1.22.3.2" "14.5.6"
"""
import sys
from docx import Document
def main():
if len(sys.argv) < 3:
print("Usage: map_sections.py <ts.docx> <clause1> [clause2 ...]")
sys.exit(1)
ts_path = sys.argv[1]
clauses = sys.argv[2:]
doc = Document(ts_path)
in_section = None
for i, para in enumerate(doc.paragraphs):
text = para.text.strip()
style = para.style.name
matched = False
for clause in clauses:
if clause in text and ('Heading' in style or 'heading' in style.lower()):
in_section = clause
print(f'\n=== [{i}] SECTION {clause} | style={style!r} ===')
print(f' [{i}] style={style!r:16s} | "{text}"')
matched = True
break
if not matched and in_section:
if 'Heading' in style and text:
print(f' --- section ends at [{i}] style={style!r}: "{text[:60]}"')
in_section = None
elif text:
print(f' [{i}] style={style!r:16s} | "{text[:100]}"')
if __name__ == '__main__':
main()
|