#!/usr/bin/env python3 """ finalize_ts.py — Add tracked-change metadata updates to a TS DOCX after CR application. Three edits are made (all as tracked changes): 1. New row in the Change History table (second-to-last table, Annex V) 2. New row in the History table (last table, last page) 3. Version + date update in the first paragraph (title) Usage: python3 finalize_ts.py [--author "Name"] [--output ] """ import argparse import re import sys from datetime import date, timedelta from pathlib import Path import docx sys.path.insert(0, str(Path(__file__).parent)) from docx_helpers import ( RevCounter, tracked_insert_table_row, tracked_modify_para_multi, AUTHOR, DATE, ) # ── Path helpers ────────────────────────────────────────────────────────────── def to_wsl_path(p: str) -> str: """Convert Windows paths (C:\\...) to WSL paths (/mnt/c/...).""" if p.startswith(('C:\\', 'c:\\', 'D:\\', 'd:\\')): drive = p[0].lower() rest = p[2:].replace('\\', '/') return f'/mnt/{drive}{rest}' return p # ── Date / version helpers ──────────────────────────────────────────────────── def compute_pub_date(): """ Return (yyyy-mm, "Month YYYY") using the 5-day rule: if today is within 5 days of the next month's first day, use next month; otherwise use the current month. """ today = date.today() first_next = (today.replace(day=1) + timedelta(days=32)).replace(day=1) days_until = (first_next - today).days target = first_next if days_until <= 5 else today.replace(day=1) return target.strftime('%Y-%m'), target.strftime('%B %Y') def derive_new_version(v: str) -> str: """Increment middle component of X.Y.Z → X.(Y+1).0.""" parts = v.split('.') parts[1] = str(int(parts[1]) + 1) parts[2] = '0' return '.'.join(parts) # ── CR metadata extraction ──────────────────────────────────────────────────── def extract_cr_metadata(cr_docx_path: str) -> dict: """ Open the CR DOCX and read metadata from tables[0] (cover page table). Returns dict with keys: meeting_id, uid, cr_num, rev, cat, title, current_version """ doc = docx.Document(cr_docx_path) if not doc.tables: raise ValueError('CR has no tables — cannot extract metadata') tbl = doc.tables[0] # Collect all cell texts for scanning cells = [] for row in tbl.rows: for cell in row.cells: cells.append(cell.text.strip()) meta = { 'meeting_id': '', 'uid': '', 'cr_num': '', 'rev': '', 'cat': '', 'title': '', 'current_version': '', } # --- Meeting ID --- # Find cell containing "Meeting #" and parse e.g. "ETSI TC SET Meeting #121, Edinburgh..." meeting_text = '' for c in cells: if 'Meeting #' in c or 'Meeting#' in c: meeting_text = c break if meeting_text: # Body: word before "Meeting" (e.g. "SET") body_match = re.search(r'(\w+)\s+Meeting\s*#', meeting_text) body = body_match.group(1) if body_match else '' # Number: digits after "#" num_match = re.search(r'Meeting\s*#\s*(\d+)', meeting_text) number = num_match.group(1) if num_match else '' meta['meeting_id'] = f'{body}-{number}' if body and number else meeting_text # --- UID --- # Pattern like SET(26)000019r1 or similar uid_pat = re.compile(r'[A-Z]+\(\d+\)\d+\S*') for c in cells: m = uid_pat.search(c) if m: meta['uid'] = m.group(0) break # --- Label-value scanning --- # Scan pairs: if a cell matches a label, the next non-empty cell is the value label_map = { 'CR': 'cr_num', 'Rev': 'rev', 'Curr. vers': 'current_version', 'Current version': 'current_version', 'Cat': 'cat', 'Category': 'cat', } title_next = False for i, c in enumerate(cells): stripped = c.strip().rstrip(':') # Title may span its own cell or be labelled if stripped.lower() in ('title', 'title of change'): title_next = True continue if title_next: if c.strip(): meta['title'] = c.strip() title_next = False continue for label, key in label_map.items(): if stripped == label or stripped.startswith(label): # Value is in the next non-empty cell for j in range(i + 1, min(i + 4, len(cells))): val = cells[j].strip() if val: meta[key] = val break break return meta # ── Meeting ID format detection ─────────────────────────────────────────────── def _detect_meeting_separator(tbl): """ Scan the meeting column (col index 1) of the Change History table bottom-up. Find the last non-empty cell and detect the separator between body letters and number, e.g. '#' in 'SET#115' or '-' in 'SET-119'. Returns the detected separator character, defaulting to '#'. """ for row in reversed(tbl.rows): cells = row.cells if len(cells) > 1: text = cells[1].text.strip() if text: m = re.search(r'[A-Za-z]([^A-Za-z0-9])\d', text) if m: return m.group(1) return '#' # ── TS table locators ───────────────────────────────────────────────────────── def find_change_history_table(ts_doc): """Return ts_doc.tables[-2] (Change History / Annex V). Accepts 8 or 9 columns.""" tables = ts_doc.tables if len(tables) < 2: raise ValueError('TS has fewer than 2 tables') tbl = tables[-2] ncols = len(tbl.rows[-1].cells) if ncols not in (8, 9): raise ValueError( f'Change History table has {ncols} columns, expected 8 or 9' ) return tbl def find_history_table(ts_doc): """Return ts_doc.tables[-1] (History / last page). Validates 3 columns.""" tbl = ts_doc.tables[-1] last_row = tbl.rows[-1] if len(last_row.cells) != 3: raise ValueError( f'History table has {len(last_row.cells)} columns, expected 3' ) return tbl # ── Update functions ────────────────────────────────────────────────────────── def update_change_history_table(ts_doc, meta, pub_yyyy_mm, old_v, new_v, rev, author, date_str): tbl = find_change_history_table(ts_doc) ncols = len(tbl.rows[-1].cells) # Detect separator used in existing rows (e.g. '#' in 'SET#115', '-' in 'SET-119') # and reformat meeting_id accordingly so it matches the existing style. sep = _detect_meeting_separator(tbl) meeting_id = meta['meeting_id'] # always 'BODY-NUMBER' from extract_cr_metadata if sep != '-' and '-' in meeting_id: body, number = meeting_id.split('-', 1) meeting_id = f'{body}{sep}{number}' if ncols == 9: # Standard ETSI format: date | meeting | uid | cr | rev | cat | title | old_v | new_v cell_texts = [ pub_yyyy_mm, meeting_id, meta['uid'], meta['cr_num'], meta['rev'], meta['cat'], meta['title'], old_v, new_v, ] elif ncols == 8: # Detect 8-column variant by first column header first_header = tbl.rows[0].cells[0].text.strip() if tbl.rows else '' if re.search(r'[Dd]ate', first_header): # Date | meeting | uid | cr | rev | cat | title | new_v (no old_v) cell_texts = [ pub_yyyy_mm, meeting_id, meta['uid'], meta['cr_num'], meta['rev'], meta['cat'], meta['title'], new_v, ] else: # meeting | uid | wg_doc | cr | rev | cat | title | new_v (no date, no old_v) cell_texts = [ meeting_id, meta['uid'], '', meta['cr_num'], meta['rev'], meta['cat'], meta['title'], new_v, ] else: cell_texts = ([pub_yyyy_mm, meeting_id, meta['uid'], meta['cr_num'], meta['rev'], meta['cat'], meta['title'], old_v, new_v])[:ncols] tracked_insert_table_row(tbl, cell_texts, rev, author, date_str) return cell_texts def update_history_table(ts_doc, new_v, pub_month_year, rev, author, date_str): tbl = find_history_table(ts_doc) cell_texts = [f'V{new_v}', pub_month_year, 'Publication'] tracked_insert_table_row(tbl, cell_texts, rev, author, date_str) return cell_texts def update_title_para(ts_doc, old_v, new_v, old_date_str, new_date_str, rev, author, date_str): """ Update first paragraph: V→V and (old_date_str)→(new_date_str). Both replacements are applied in a single tracked multi-replace pass. """ para = ts_doc.paragraphs[0] replacements = [ (f'V{old_v}', f'V{new_v}'), (f'({old_date_str})', f'({new_date_str})'), ] tracked_modify_para_multi(para, replacements, rev, author, date_str) # ── Main ────────────────────────────────────────────────────────────────────── def main(): parser = argparse.ArgumentParser( description='Add tracked-change metadata updates to a TS DOCX after CR application.' ) parser.add_argument('ts_docx', help='TS DOCX file to update') parser.add_argument('cr_docx', help='CR DOCX file to read metadata from') parser.add_argument('--author', default=AUTHOR, help='Tracked change author name') parser.add_argument('--output', default=None, help='Output path (default: _finalized.docx)') args = parser.parse_args() ts_path = to_wsl_path(args.ts_docx) cr_path = to_wsl_path(args.cr_docx) # Determine output path if args.output: out_path = to_wsl_path(args.output) else: p = Path(ts_path) out_path = str(p.parent / (p.stem + '_finalized.docx')) print(f'TS: {ts_path}') print(f'CR: {cr_path}') print(f'Output: {out_path}') print() # Open documents ts_doc = docx.Document(ts_path) cr_doc = docx.Document(cr_path) # Extract metadata print('Extracting CR metadata...') meta = extract_cr_metadata(cr_path) print(f" Meeting ID: {meta['meeting_id']}") print(f" UID: {meta['uid']}") print(f" CR#: {meta['cr_num']}") print(f" Rev: {meta['rev']}") print(f" Category: {meta['cat']}") print(f" Title: {meta['title']}") print(f" Current version: {meta['current_version']}") print() # Compute derived values pub_ym, pub_month_year = compute_pub_date() old_v = meta['current_version'] new_v = derive_new_version(old_v) print(f'Old version: {old_v} → New version: {new_v}') print(f'Publication: {pub_month_year} ({pub_ym})') print() # Extract old date from first paragraph title_text = ts_doc.paragraphs[0].text date_match = re.search(r'\((\d{4}-\d{2})\)', title_text) if not date_match: print(f'WARNING: Could not find date pattern (YYYY-MM) in first paragraph:') print(f' {title_text!r}') old_date_str = '' else: old_date_str = date_match.group(1) print(f'Title paragraph: {title_text!r}') print(f'Old date: {old_date_str} → New date: {pub_ym}') print() # Set up revision counter and tracked change date rev = RevCounter(ts_doc) tc_date = DATE # ISO 8601 from docx_helpers # Apply changes print('Inserting row in Change History table (Annex V)...') ch_cells = update_change_history_table(ts_doc, meta, pub_ym, old_v, new_v, rev, args.author, tc_date) print(f' Row: {ch_cells}') print('Inserting row in History table (last page)...') h_cells = update_history_table(ts_doc, new_v, pub_month_year, rev, args.author, tc_date) print(f' Row: {h_cells}') if old_date_str: print('Updating title paragraph...') update_title_para(ts_doc, old_v, new_v, old_date_str, pub_ym, rev, args.author, tc_date) print(f' V{old_v} → V{new_v}, ({old_date_str}) → ({pub_ym})') else: print('Skipping title paragraph update (no date found).') # Save ts_doc.save(out_path) print() print(f'Saved: {out_path}') print() print('Summary of tracked changes:') print(f' [Change History] New row: {ch_cells}') print(f' [History] New row: {h_cells}') if old_date_str: print(f' [Title] V{old_v} → V{new_v}, ({old_date_str}) → ({pub_ym})') if __name__ == '__main__': main()