Spaces:

OrganizedProgrammers
/

ApplyCRs

Sleeping

App Files Files Community

ApplyCRs / scripts /finalize_ts.py

heymenn

init

7eedaf8 4 days ago

raw

history blame contribute delete

13.5 kB

	#!/usr/bin/env python3
	"""
	finalize_ts.py — Add tracked-change metadata updates to a TS DOCX after CR application.

	Three edits are made (all as tracked changes):
	1. New row in the Change History table (second-to-last table, Annex V)
	2. New row in the History table (last table, last page)
	3. Version + date update in the first paragraph (title)

	Usage:
	python3 finalize_ts.py <ts_docx> <cr_docx> [--author "Name"] [--output <path>]
	"""

	import argparse
	import re
	import sys
	from datetime import date, timedelta
	from pathlib import Path

	import docx

	sys.path.insert(0, str(Path(__file__).parent))
	from docx_helpers import (
	RevCounter,
	tracked_insert_table_row,
	tracked_modify_para_multi,
	AUTHOR,
	DATE,
	)


	# ── Path helpers ──────────────────────────────────────────────────────────────

	def to_wsl_path(p: str) -> str:
	"""Convert Windows paths (C:\\...) to WSL paths (/mnt/c/...)."""
	if p.startswith(('C:\\', 'c:\\', 'D:\\', 'd:\\')):
	drive = p[0].lower()
	rest = p[2:].replace('\\', '/')
	return f'/mnt/{drive}{rest}'
	return p


	# ── Date / version helpers ────────────────────────────────────────────────────

	def compute_pub_date():
	"""
	Return (yyyy-mm, "Month YYYY") using the 5-day rule:
	if today is within 5 days of the next month's first day, use next month;
	otherwise use the current month.
	"""
	today = date.today()
	first_next = (today.replace(day=1) + timedelta(days=32)).replace(day=1)
	days_until = (first_next - today).days
	target = first_next if days_until <= 5 else today.replace(day=1)
	return target.strftime('%Y-%m'), target.strftime('%B %Y')


	def derive_new_version(v: str) -> str:
	"""Increment middle component of X.Y.Z → X.(Y+1).0."""
	parts = v.split('.')
	parts[1] = str(int(parts[1]) + 1)
	parts[2] = '0'
	return '.'.join(parts)


	# ── CR metadata extraction ────────────────────────────────────────────────────

	def extract_cr_metadata(cr_docx_path: str) -> dict:
	"""
	Open the CR DOCX and read metadata from tables[0] (cover page table).
	Returns dict with keys:
	meeting_id, uid, cr_num, rev, cat, title, current_version
	"""
	doc = docx.Document(cr_docx_path)
	if not doc.tables:
	raise ValueError('CR has no tables — cannot extract metadata')

	tbl = doc.tables[0]

	# Collect all cell texts for scanning
	cells = []
	for row in tbl.rows:
	for cell in row.cells:
	cells.append(cell.text.strip())

	meta = {
	'meeting_id': '',
	'uid': '',
	'cr_num': '',
	'rev': '',
	'cat': '',
	'title': '',
	'current_version': '',
	}

	# --- Meeting ID ---
	# Find cell containing "Meeting #" and parse e.g. "ETSI TC SET Meeting #121, Edinburgh..."
	meeting_text = ''
	for c in cells:
	if 'Meeting #' in c or 'Meeting#' in c:
	meeting_text = c
	break

	if meeting_text:
	# Body: word before "Meeting" (e.g. "SET")
	body_match = re.search(r'(\w+)\s+Meeting\s*#', meeting_text)
	body = body_match.group(1) if body_match else ''
	# Number: digits after "#"
	num_match = re.search(r'Meeting\s#\s(\d+)', meeting_text)
	number = num_match.group(1) if num_match else ''
	meta['meeting_id'] = f'{body}-{number}' if body and number else meeting_text

	# --- UID ---
	# Pattern like SET(26)000019r1 or similar
	uid_pat = re.compile(r'[A-Z]+\(\d+\)\d+\S*')
	for c in cells:
	m = uid_pat.search(c)
	if m:
	meta['uid'] = m.group(0)
	break

	# --- Label-value scanning ---
	# Scan pairs: if a cell matches a label, the next non-empty cell is the value
	label_map = {
	'CR': 'cr_num',
	'Rev': 'rev',
	'Curr. vers': 'current_version',
	'Current version': 'current_version',
	'Cat': 'cat',
	'Category': 'cat',
	}
	title_next = False
	for i, c in enumerate(cells):
	stripped = c.strip().rstrip(':')

	# Title may span its own cell or be labelled
	if stripped.lower() in ('title', 'title of change'):
	title_next = True
	continue
	if title_next:
	if c.strip():
	meta['title'] = c.strip()
	title_next = False
	continue

	for label, key in label_map.items():
	if stripped == label or stripped.startswith(label):
	# Value is in the next non-empty cell
	for j in range(i + 1, min(i + 4, len(cells))):
	val = cells[j].strip()
	if val:
	meta[key] = val
	break
	break

	return meta


	# ── Meeting ID format detection ───────────────────────────────────────────────

	def _detect_meeting_separator(tbl):
	"""
	Scan the meeting column (col index 1) of the Change History table bottom-up.
	Find the last non-empty cell and detect the separator between body letters and
	number, e.g. '#' in 'SET#115' or '-' in 'SET-119'.
	Returns the detected separator character, defaulting to '#'.
	"""
	for row in reversed(tbl.rows):
	cells = row.cells
	if len(cells) > 1:
	text = cells[1].text.strip()
	if text:
	m = re.search(r'[A-Za-z]([^A-Za-z0-9])\d', text)
	if m:
	return m.group(1)
	return '#'


	# ── TS table locators ─────────────────────────────────────────────────────────

	def find_change_history_table(ts_doc):
	"""Return ts_doc.tables[-2] (Change History / Annex V). Accepts 8 or 9 columns."""
	tables = ts_doc.tables
	if len(tables) < 2:
	raise ValueError('TS has fewer than 2 tables')
	tbl = tables[-2]
	ncols = len(tbl.rows[-1].cells)
	if ncols not in (8, 9):
	raise ValueError(
	f'Change History table has {ncols} columns, expected 8 or 9'
	)
	return tbl


	def find_history_table(ts_doc):
	"""Return ts_doc.tables[-1] (History / last page). Validates 3 columns."""
	tbl = ts_doc.tables[-1]
	last_row = tbl.rows[-1]
	if len(last_row.cells) != 3:
	raise ValueError(
	f'History table has {len(last_row.cells)} columns, expected 3'
	)
	return tbl


	# ── Update functions ──────────────────────────────────────────────────────────

	def update_change_history_table(ts_doc, meta, pub_yyyy_mm, old_v, new_v, rev, author, date_str):
	tbl = find_change_history_table(ts_doc)
	ncols = len(tbl.rows[-1].cells)

	# Detect separator used in existing rows (e.g. '#' in 'SET#115', '-' in 'SET-119')
	# and reformat meeting_id accordingly so it matches the existing style.
	sep = _detect_meeting_separator(tbl)
	meeting_id = meta['meeting_id'] # always 'BODY-NUMBER' from extract_cr_metadata
	if sep != '-' and '-' in meeting_id:
	body, number = meeting_id.split('-', 1)
	meeting_id = f'{body}{sep}{number}'

	if ncols == 9:
	# Standard ETSI format: date \| meeting \| uid \| cr \| rev \| cat \| title \| old_v \| new_v
	cell_texts = [
	pub_yyyy_mm, meeting_id, meta['uid'],
	meta['cr_num'], meta['rev'], meta['cat'],
	meta['title'], old_v, new_v,
	]
	elif ncols == 8:
	# Detect 8-column variant by first column header
	first_header = tbl.rows[0].cells[0].text.strip() if tbl.rows else ''
	if re.search(r'[Dd]ate', first_header):
	# Date \| meeting \| uid \| cr \| rev \| cat \| title \| new_v (no old_v)
	cell_texts = [
	pub_yyyy_mm, meeting_id, meta['uid'],
	meta['cr_num'], meta['rev'], meta['cat'],
	meta['title'], new_v,
	]
	else:
	# meeting \| uid \| wg_doc \| cr \| rev \| cat \| title \| new_v (no date, no old_v)
	cell_texts = [
	meeting_id, meta['uid'], '',
	meta['cr_num'], meta['rev'], meta['cat'],
	meta['title'], new_v,
	]
	else:
	cell_texts = ([pub_yyyy_mm, meeting_id, meta['uid'],
	meta['cr_num'], meta['rev'], meta['cat'],
	meta['title'], old_v, new_v])[:ncols]

	tracked_insert_table_row(tbl, cell_texts, rev, author, date_str)
	return cell_texts


	def update_history_table(ts_doc, new_v, pub_month_year, rev, author, date_str):
	tbl = find_history_table(ts_doc)
	cell_texts = [f'V{new_v}', pub_month_year, 'Publication']
	tracked_insert_table_row(tbl, cell_texts, rev, author, date_str)
	return cell_texts


	def update_title_para(ts_doc, old_v, new_v, old_date_str, new_date_str, rev, author, date_str):
	"""
	Update first paragraph: V<old_v>→V<new_v> and (old_date_str)→(new_date_str).
	Both replacements are applied in a single tracked multi-replace pass.
	"""
	para = ts_doc.paragraphs[0]
	replacements = [
	(f'V{old_v}', f'V{new_v}'),
	(f'({old_date_str})', f'({new_date_str})'),
	]
	tracked_modify_para_multi(para, replacements, rev, author, date_str)


	# ── Main ──────────────────────────────────────────────────────────────────────

	def main():
	parser = argparse.ArgumentParser(
	description='Add tracked-change metadata updates to a TS DOCX after CR application.'
	)
	parser.add_argument('ts_docx', help='TS DOCX file to update')
	parser.add_argument('cr_docx', help='CR DOCX file to read metadata from')
	parser.add_argument('--author', default=AUTHOR, help='Tracked change author name')
	parser.add_argument('--output', default=None, help='Output path (default: <ts>_finalized.docx)')
	args = parser.parse_args()

	ts_path = to_wsl_path(args.ts_docx)
	cr_path = to_wsl_path(args.cr_docx)

	# Determine output path
	if args.output:
	out_path = to_wsl_path(args.output)
	else:
	p = Path(ts_path)
	out_path = str(p.parent / (p.stem + '_finalized.docx'))

	print(f'TS: {ts_path}')
	print(f'CR: {cr_path}')
	print(f'Output: {out_path}')
	print()

	# Open documents
	ts_doc = docx.Document(ts_path)
	cr_doc = docx.Document(cr_path)

	# Extract metadata
	print('Extracting CR metadata...')
	meta = extract_cr_metadata(cr_path)
	print(f" Meeting ID: {meta['meeting_id']}")
	print(f" UID: {meta['uid']}")
	print(f" CR#: {meta['cr_num']}")
	print(f" Rev: {meta['rev']}")
	print(f" Category: {meta['cat']}")
	print(f" Title: {meta['title']}")
	print(f" Current version: {meta['current_version']}")
	print()

	# Compute derived values
	pub_ym, pub_month_year = compute_pub_date()
	old_v = meta['current_version']
	new_v = derive_new_version(old_v)
	print(f'Old version: {old_v} → New version: {new_v}')
	print(f'Publication: {pub_month_year} ({pub_ym})')
	print()

	# Extract old date from first paragraph
	title_text = ts_doc.paragraphs[0].text
	date_match = re.search(r'\((\d{4}-\d{2})\)', title_text)
	if not date_match:
	print(f'WARNING: Could not find date pattern (YYYY-MM) in first paragraph:')
	print(f' {title_text!r}')
	old_date_str = ''
	else:
	old_date_str = date_match.group(1)
	print(f'Title paragraph: {title_text!r}')
	print(f'Old date: {old_date_str} → New date: {pub_ym}')
	print()

	# Set up revision counter and tracked change date
	rev = RevCounter(ts_doc)
	tc_date = DATE # ISO 8601 from docx_helpers

	# Apply changes
	print('Inserting row in Change History table (Annex V)...')
	ch_cells = update_change_history_table(ts_doc, meta, pub_ym, old_v, new_v, rev, args.author, tc_date)
	print(f' Row: {ch_cells}')

	print('Inserting row in History table (last page)...')
	h_cells = update_history_table(ts_doc, new_v, pub_month_year, rev, args.author, tc_date)
	print(f' Row: {h_cells}')

	if old_date_str:
	print('Updating title paragraph...')
	update_title_para(ts_doc, old_v, new_v, old_date_str, pub_ym, rev, args.author, tc_date)
	print(f' V{old_v} → V{new_v}, ({old_date_str}) → ({pub_ym})')
	else:
	print('Skipping title paragraph update (no date found).')

	# Save
	ts_doc.save(out_path)
	print()
	print(f'Saved: {out_path}')
	print()
	print('Summary of tracked changes:')
	print(f' [Change History] New row: {ch_cells}')
	print(f' [History] New row: {h_cells}')
	if old_date_str:
	print(f' [Title] V{old_v} → V{new_v}, ({old_date_str}) → ({pub_ym})')


	if __name__ == '__main__':
	main()