syntax-model / inference /metrics_visualization.py

add files for inference

e4194f4 2 months ago

13.1 kB

	"""
	Визуализация предсказаний SYNTAX:
	- точки (SYNTAX GT vs предсказания модели) для нескольких датасетов;
	- зоны риска (низкий / высокий риск);
	- области ±σ и ±2σ вокруг диагонали;
	- логистические тренды для каждого датасета.

	Скрипт не зависит от PyTorch/Lightning и используется на этапе инференса.
	Сохранение осуществляется в папку `visualizations/` внутри проекта.
	"""

	import os
	import numpy as np
	import plotly.graph_objects as go
	from scipy.optimize import curve_fit # type: ignore


	def visualize_final_syntax_plotly_multi(
	datasets,
	r2_values,
	gt_row,
	postfix=None,
	threshold=22.0,
	recall_values=None,
	backbone=False,
	):
	"""
	Единая визуализация SYNTAX: точки, зоны риска и логистические тренды.

	Параметры
	---------
	datasets : dict[str, tuple[list[float], list[float]]]
	Словарь {имя_датасета: (syntax_true_list, syntax_pred_list)}.
	r2_values : dict[str, float]
	Словарь R^2 по датасетам.
	gt_row : str
	Строка, попадающая в заголовок (например, "ENSEMBLE" или "BOTH").
	postfix : str \| None
	Суффикс для имени сохраняемого файла.
	threshold : float
	Порог SYNTAX (обычно 22.0) для разделения зон риска.
	recall_values : dict[str, float] \| None
	Словарь Recall по датасетам (может быть None).
	backbone : bool
	Если True, сохраняет в `visualizations/backbone`, иначе в `visualizations/`.
	"""
	# ========== КОНСТАНТЫ ДЛЯ НАСТРОЙКИ ==========
	DATA_MIN = 0.0
	DATA_MAX = 60.0

	PADDING = 0.5

	SIGMA_SLOPE = 0.15
	SIGMA_BASE = 1.4

	PLOT_WIDTH = 980
	PLOT_HEIGHT = 980

	BASE_FONT_SIZE = 16
	TITLE_FONT_SIZE = 22
	AXIS_LABEL_FONT_SIZE = BASE_FONT_SIZE
	AXIS_TICK_FONT_SIZE = 15
	LEGEND_FONT_SIZE = 14

	MARKER_SIZE = 11
	MARKER_LINE_WIDTH = 1.1
	LINE_WIDTH = 2
	TREND_LINE_WIDTH = 3

	PLOT_BG_COLOR = "rgba(235,238,245,1)"
	PAPER_BG_COLOR = "white"
	LEGEND_BG_COLOR = "rgba(255,255,255,0.94)"
	GRID_COLOR = "rgba(100,116,139,0.18)"

	MARGIN_LEFT = 70
	MARGIN_RIGHT = 24
	MARGIN_TOP = 78
	MARGIN_BOTTOM = 70

	LEGEND_X = 0.04
	LEGEND_Y = 0.99

	COLORS = ["#1E88E5", "#8E24AA", "#A0D137", "#EA1D1D", "#06EE0D", "#FB8C00"]
	SYMBOLS = ["circle", "x", "square", "diamond", "triangle-up", "star"]

	SIGMA_POINTS = 400
	TREND_POINTS = 500

	# ========== ВСПОМОГАТЕЛЬНЫЕ ФУНКЦИИ ==========

	def _logistic_time(t, R0, Rmax, t50, k):
	"""Логистическая функция по времени/оценке SYNTAX."""
	t = np.asarray(t, dtype=float)
	t_safe = np.where(t <= 0, 1e-3, t)
	return R0 + (Rmax - R0) / (1.0 + (t50 / t_safe) ** k)

	def _fit_logistic(x, y, domain=(DATA_MIN, DATA_MAX), n=TREND_POINTS):
	"""
	Аппроксимация логистической кривой.
	Возвращает X, Y или (None, None), если фит не удался.
	"""
	x = np.asarray(x, dtype=float)
	y = np.asarray(y, dtype=float)
	m = np.isfinite(x) & np.isfinite(y)
	if m.sum() < 4:
	return None, None

	x_m, y_m = x[m], y[m]
	x_min = max(float(np.min(x_m)), float(domain[0]))
	x_max = min(float(np.max(x_m)), float(domain[1]))
	if not np.isfinite(x_min) or not np.isfinite(x_max) or x_max <= x_min:
	return None, None

	x_pos = x_m[x_m > 0]
	if x_pos.size == 0:
	return None, None

	R0_init = float(np.percentile(y_m, 10))
	Rmax_init = float(np.percentile(y_m, 90))
	t50_init = float(np.median(x_pos))
	k_init = 1.0

	lower = [-10.0, 0.0, 1e-3, 0.01]
	upper = [60.0, 80.0, 60.0, 10.0]

	try:
	popt, _ = curve_fit(
	_logistic_time,
	x_m,
	y_m,
	p0=[R0_init, Rmax_init, t50_init, k_init],
	bounds=(lower, upper),
	maxfev=20000,
	)
	except Exception:
	return None, None

	X = np.linspace(x_min, x_max, n)
	Y = _logistic_time(X, *popt)
	return X, Y

	# ========== ОСНОВНОЙ КОД ==========
	fig = go.Figure()

	line_min = DATA_MIN - PADDING
	line_max = DATA_MAX + PADDING
	domain = (line_min, line_max)

	base_font = dict(
	family="Inter, Roboto, Helvetica Neue, Arial, sans-serif",
	size=BASE_FONT_SIZE,
	)

	# ---------- Пороги и линии (legendrank=0) ----------
	fig.add_trace(
	go.Scatter(
	x=[line_min, threshold, threshold, line_min],
	y=[line_min, line_min, threshold, threshold],
	fill="toself",
	fillcolor="rgba(255, 82, 82, 0.12)",
	line=dict(color="rgba(0,0,0,0)"),
	name="Low-risk zone",
	legendgroup="zones",
	legendgrouptitle_text="Пороги и линии",
	showlegend=True,
	hoverinfo="skip",
	legendrank=0,
	)
	)
	fig.add_trace(
	go.Scatter(
	x=[threshold, line_max, line_max, threshold],
	y=[threshold, threshold, line_max, line_max],
	fill="toself",
	fillcolor="rgba(76, 175, 80, 0.14)",
	line=dict(color="rgba(0,0,0,0)"),
	name="High-risk zone",
	legendgroup="zones",
	showlegend=True,
	hoverinfo="skip",
	legendrank=0,
	)
	)

	fig.add_trace(
	go.Scatter(
	x=[threshold, threshold, None, line_min, line_max],
	y=[line_min, line_max, None, threshold, threshold],
	mode="lines",
	name=rf"$\mathrm{{SYNTAX}}={threshold}$",
	legendgroup="zones",
	showlegend=True,
	line=dict(color="rgba(46,125,50,0.85)", width=LINE_WIDTH, dash="dash"),
	legendrank=0,
	hoverinfo="skip",
	)
	)

	x_vals = np.linspace(line_min, line_max, SIGMA_POINTS)
	sigma_upper = x_vals + SIGMA_BASE + SIGMA_SLOPE * x_vals
	sigma_lower = x_vals - SIGMA_BASE - SIGMA_SLOPE * x_vals
	two_sigma_upper = x_vals + 2 * SIGMA_BASE + 2 * SIGMA_SLOPE * x_vals
	two_sigma_lower = x_vals - 2 * SIGMA_BASE - 2 * SIGMA_SLOPE * x_vals

	fig.add_trace(
	go.Scatter(
	x=np.concatenate([x_vals, x_vals[::-1]]),
	y=np.concatenate([two_sigma_lower, two_sigma_upper[::-1]]),
	fill="toself",
	fillcolor="rgba(255,193,7,0.18)",
	line=dict(color="rgba(0,0,0,0)"),
	name=r"$\pm 2\sigma$",
	legendgroup="zones",
	showlegend=True,
	hoverinfo="skip",
	legendrank=0,
	)
	)
	fig.add_trace(
	go.Scatter(
	x=np.concatenate([x_vals, x_vals[::-1]]),
	y=np.concatenate([sigma_lower, sigma_upper[::-1]]),
	fill="toself",
	fillcolor="rgba(255,152,0,0.30)",
	line=dict(color="rgba(0,0,0,0)"),
	name=r"$\pm \sigma$",
	legendgroup="zones",
	showlegend=True,
	hoverinfo="skip",
	legendrank=0,
	)
	)

	fig.add_trace(
	go.Scatter(
	x=[line_min, line_max],
	y=[line_min, line_max],
	mode="lines",
	name=r"$y=x$",
	legendgroup="zones",
	showlegend=True,
	line=dict(color="rgba(30,30,30,0.85)", width=LINE_WIDTH),
	legendrank=0,
	)
	)

	# ---------- Датасеты (legendrank=20) ----------
	first_dataset = True
	for i, (label, (syntax_true, syntax_pred)) in enumerate(datasets.items()):
	x = np.array(syntax_true, dtype=float)
	y = np.array(syntax_pred, dtype=float)
	if x.size == 0 or y.size == 0:
	continue

	r2 = r2_values.get(label, None)
	recall = recall_values.get(label, None) if recall_values else None
	hover_lines = [f"<b>{label}</b>"]
	if r2 is not None:
	hover_lines.append(f"R² = {r2:.3f}")
	if recall is not None:
	hover_lines.append(f"Recall = {recall:.3f}")
	hovertemplate = (
	"<br>".join(hover_lines)
	+ "<br>GT: %{x:.3f}<br>Pred: %{y:.3f}<extra></extra>"
	)

	fig.add_trace(
	go.Scatter(
	x=x,
	y=y,
	mode="markers",
	name=label,
	legendgroup="datasets",
	legendgrouptitle_text=("Датасеты" if first_dataset else None),
	showlegend=True,
	marker=dict(
	color=COLORS[i % len(COLORS)],
	size=MARKER_SIZE,
	opacity=0.96,
	symbol=SYMBOLS[i % len(SYMBOLS)],
	line=dict(
	width=MARKER_LINE_WIDTH, color="rgba(255,255,255,0.95)"
	),
	),
	hovertemplate=hovertemplate,
	legendrank=20,
	)
	)
	first_dataset = False

	# ---------- Тренды: логистические (legendrank=30) ----------
	first_trend = True
	for i, (label, (syntax_true, syntax_pred)) in enumerate(datasets.items()):
	x = np.array(syntax_true, dtype=float)
	y = np.array(syntax_pred, dtype=float)
	if x.size == 0 or y.size == 0:
	continue

	Xc, Yc = _fit_logistic(x, y, domain=domain)
	if Xc is not None:
	fig.add_trace(
	go.Scatter(
	x=Xc,
	y=Yc,
	mode="lines",
	name=label, # без коротких alias, полное имя датасета
	legendgroup="trends",
	legendgrouptitle_text=(
	"Тренды (логистические)" if first_trend else None
	),
	showlegend=True,
	line=dict(
	color=COLORS[i % len(COLORS)], width=TREND_LINE_WIDTH
	),
	hoverinfo="skip",
	legendrank=30,
	)
	)
	first_trend = False

	# ---------- оформление ----------
	title_text = f"SYNTAX predictions ({gt_row})"
	if postfix:
	title_text += f" {postfix}"

	fig.update_layout(
	title=dict(
	text=title_text,
	x=0.5,
	xanchor="center",
	font=dict(
	size=TITLE_FONT_SIZE,
	family=base_font["family"],
	color="rgba(15,23,42,1)",
	),
	),
	font=base_font,
	xaxis_title=r"$\mathrm{SYNTAX\ GT}$",
	yaxis_title=r"$\mathrm{SYNTAX\ predictions}$",
	width=PLOT_WIDTH,
	height=PLOT_HEIGHT,
	plot_bgcolor=PLOT_BG_COLOR,
	paper_bgcolor=PAPER_BG_COLOR,
	legend=dict(
	x=LEGEND_X,
	y=LEGEND_Y,
	bgcolor=LEGEND_BG_COLOR,
	bordercolor="#CBD5E1",
	borderwidth=1,
	font=dict(size=LEGEND_FONT_SIZE, family=base_font["family"]),
	tracegroupgap=8,
	itemclick="toggle",
	itemdoubleclick="toggleothers",
	groupclick="toggleitem",
	),
	xaxis=dict(
	showgrid=True,
	gridcolor=GRID_COLOR,
	gridwidth=1,
	zeroline=False,
	tickfont=dict(size=AXIS_TICK_FONT_SIZE),
	range=[line_min, line_max],
	constrain="domain",
	),
	yaxis=dict(
	showgrid=True,
	gridcolor=GRID_COLOR,
	gridwidth=1,
	zeroline=False,
	tickfont=dict(size=AXIS_TICK_FONT_SIZE),
	range=[line_min, line_max],
	scaleanchor="x",
	scaleratio=1,
	constrain="domain",
	),
	margin=dict(
	l=MARGIN_LEFT,
	r=MARGIN_RIGHT,
	t=MARGIN_TOP,
	b=MARGIN_BOTTOM,
	),
	)

	# ---------- сохранение ----------
	save_dir = "visualizations"
	if backbone:
	save_dir = os.path.join(save_dir, "backbone")
	os.makedirs(save_dir, exist_ok=True)

	postfix_html = f"{postfix}" if postfix else "syntax"
	save_path_html = os.path.join(save_dir, f"{postfix_html}.html")
	fig.write_html(save_path_html, include_mathjax="cdn")
	print(f"Saved visualization with logistic trends: {save_path_html}")