Upload 108 files

a96891a verified 5 months ago

37.9 kB

	import os
	import time
	import random

	import numpy as np

	import shutil
	from enum import Enum

	import torch
	import torchvision.transforms as transforms
	# from t_cube import get_logits


	def set_random_seed(seed):
	random.seed(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	torch.cuda.manual_seed_all(seed)

	class Summary(Enum):
	NONE = 0
	AVERAGE = 1
	SUM = 2
	COUNT = 3

	class AverageMeter(object):
	"""Computes and stores the average and current value"""
	def __init__(self, name, fmt=':f', summary_type=Summary.AVERAGE):
	self.name = name
	self.fmt = fmt
	self.summary_type = summary_type
	self.reset()

	def reset(self):
	self.val = 0
	self.avg = 0
	self.sum = 0
	self.count = 0

	def update(self, val, n=1):
	self.val = val
	self.sum += val * n
	self.count += n
	self.avg = self.sum / self.count

	def __str__(self):
	fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
	return fmtstr.format(**self.__dict__)

	def summary(self):
	fmtstr = ''
	if self.summary_type is Summary.NONE:
	fmtstr = ''
	elif self.summary_type is Summary.AVERAGE:
	fmtstr = '{name} {avg:.3f}'
	elif self.summary_type is Summary.SUM:
	fmtstr = '{name} {sum:.3f}'
	elif self.summary_type is Summary.COUNT:
	fmtstr = '{name} {count:.3f}'
	else:
	raise ValueError('invalid summary type %r' % self.summary_type)

	return fmtstr.format(**self.__dict__)


	class ProgressMeter(object):
	def __init__(self, num_batches, meters, prefix=""):
	self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
	self.meters = meters
	self.prefix = prefix

	def display(self, batch):
	entries = [self.prefix + self.batch_fmtstr.format(batch)]
	entries += [str(meter) for meter in self.meters]
	print('\t'.join(entries))

	def display_summary(self):
	entries = [" *"]
	entries += [meter.summary() for meter in self.meters]
	print(' '.join(entries))

	def _get_batch_fmtstr(self, num_batches):
	num_digits = len(str(num_batches // 1))
	fmt = '{:' + str(num_digits) + 'd}'
	return '[' + fmt + '/' + fmt.format(num_batches) + ']'


	def accuracy(output, target, topk=(1,)):
	"""Computes the accuracy over the k top predictions for the specified values of k"""
	with torch.no_grad():
	maxk = max(topk)
	batch_size = target.size(0)

	# _, pred = output.topk(maxk, 1, True, True)
	_, pred = output.topk(1)
	pred = pred.t()
	correct = pred.eq(target.view(1, -1).expand_as(pred))

	res = []
	for k in topk:
	correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
	res.append(correct_k.mul_(100.0 / batch_size))
	return res

	from sklearn.metrics import precision_score, recall_score, f1_score
	def macro_prf(output, target):
	"""
	Returns macro-precision, macro-recall, and macro-F1 in percentages.
	"""
	preds = output.argmax(dim=1).cpu().numpy()
	y_true = target.cpu().numpy()

	p = precision_score(y_true, preds, average='macro', zero_division=0)
	r = recall_score(y_true, preds, average='macro', zero_division=0)
	f = f1_score(y_true, preds, average='macro', zero_division=0)

	return [p100, r100, f*100]

	def load_model_weight(load_path, model, device, args):
	if os.path.isfile(load_path):
	print("=> loading checkpoint '{}'".format(load_path))
	checkpoint = torch.load(load_path, map_location=device)
	state_dict = checkpoint['state_dict']
	# Ignore fixed token vectors
	if "token_prefix" in state_dict:
	del state_dict["token_prefix"]

	if "token_suffix" in state_dict:
	del state_dict["token_suffix"]

	args.start_epoch = checkpoint['epoch']
	try:
	best_acc1 = checkpoint['best_acc1']
	except:
	best_acc1 = torch.tensor(0)
	if device is not 'cpu':
	# best_acc1 may be from a checkpoint from a different GPU
	best_acc1 = best_acc1.to(device)
	try:
	model.load_state_dict(state_dict)
	except:
	# TODO: implement this method for the generator class
	model.prompt_generator.load_state_dict(state_dict, strict=False)
	print("=> loaded checkpoint '{}' (epoch {})"
	.format(load_path, checkpoint['epoch']))
	del checkpoint
	torch.cuda.empty_cache()
	else:
	print("=> no checkpoint found at '{}'".format(load_path))


	def validate(val_loader, model, criterion, args, output_mask=None):
	batch_time = AverageMeter('Time', ':6.3f', Summary.NONE)
	losses = AverageMeter('Loss', ':.4e', Summary.NONE)
	top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE)
	top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE)
	progress = ProgressMeter(
	len(val_loader),
	[batch_time, losses, top1, top5],
	prefix='Test: ')

	# switch to evaluate mode
	model.eval()

	with torch.no_grad():
	end = time.time()
	for i, (images, target) in enumerate(val_loader):
	if args.gpu is not None:
	images = images.cuda(args.gpu, non_blocking=True)
	if torch.cuda.is_available():
	target = target.cuda(args.gpu, non_blocking=True)

	# compute output
	with torch.cuda.amp.autocast():
	output = model(images)
	if output_mask:
	output = output[:, output_mask]
	loss = criterion(output, target)

	# measure accuracy and record loss
	acc1, acc5 = accuracy(output, target, topk=(1, 5))
	losses.update(loss.item(), images.size(0))
	top1.update(acc1[0], images.size(0))
	top5.update(acc5[0], images.size(0))

	# measure elapsed time
	batch_time.update(time.time() - end)
	end = time.time()

	if i % args.print_freq == 0:
	progress.display(i)
	progress.display_summary()

	return top1.avg


	import matplotlib.pyplot as plt
	def plot_img(image, save_path='saved_plot.png', target=None, predicted=None):
	if type(image) == torch.Tensor:
	image_array = image.to('cpu').squeeze().permute(1, 2, 0).detach().numpy()
	else:
	image_array = image
	image_array = (image_array - image_array.min()) / (image_array.max() - image_array.min())
	plt.figure(figsize=(3, 3), tight_layout=True)
	plt.imshow(image_array)
	# title = f'Target: {target}, Pred: {predicted}'
	plt.axis('off')
	# plt.title(title, fontsize=10)
	plt.savefig(save_path)
	plt.close()

	from torchvision.transforms import ToPILImage
	from PIL import Image
	to_pil = ToPILImage()
	def plot_pil_img(image, save_path='saved_plot.png'):
	if not isinstance(image, Image.Image):
	img_noi = to_pil(image)
	else:
	img_noi = image
	img_noi.save(save_path)

	import seaborn as sns
	import matplotlib.pyplot as plt
	import numpy as np
	from scipy.stats import pearsonr

	def plot_entropy_vs_mi(
	entropies: np.ndarray,
	mi_values: np.ndarray,
	agreement_diff: np.ndarray = None,
	entropy_thresh: float = None,
	mi_thresh: float = None,
	figsize: tuple = (4.5, 4.5),
	save_path: str = 'mi_vs_entropy.png',
	):
	"""
	Plot MI vs. Predictive Entropy with optional coloring by agreement.

	Args:
	entropies (np.ndarray): Consensus predictive entropy values.
	mi_values (np.ndarray): Mutual information values.
	agreement_diff (np.ndarray, optional): Difference in predictions (L1).
	entropy_thresh (float, optional): Vertical threshold line.
	mi_thresh (float, optional): Horizontal threshold line.
	figsize (tuple): Plot size (default: small).
	save_path (str): Where to save the figure.
	"""
	entropies = entropies.cpu().numpy()
	mi_values = mi_values.cpu().numpy()
	if agreement_diff is not None:
	agreement_diff = agreement_diff.cpu().numpy()

	corr, _ = pearsonr(entropies, mi_values)

	# Create joint plot
	g = sns.JointGrid(
	x=entropies,
	y=mi_values,
	height=figsize[0],
	ratio=4,
	space=0.15
	)

	# Scatter with hue if available
	if agreement_diff is not None:
	cmap = sns.color_palette("coolwarm", as_cmap=True)
	g.plot_joint(
	sns.scatterplot,
	hue=agreement_diff,
	palette=cmap,
	s=18,
	linewidth=0.3,
	edgecolor="black",
	alpha=0.8
	)
	g.ax_joint.legend_.remove() # cleaner
	else:
	g.plot_joint(sns.scatterplot, s=20, color='tab:blue', alpha=0.7)

	# Marginals
	g.plot_marginals(sns.histplot, kde=True, color='grey', alpha=0.5)

	# Regression
	sns.regplot(
	x=entropies,
	y=mi_values,
	scatter=False,
	ax=g.ax_joint,
	color='black',
	line_kws={"linestyle": "--", "linewidth": 1}
	)

	# Thresholds
	if entropy_thresh is not None:
	g.ax_joint.axvline(entropy_thresh, ls='--', color='grey', lw=1)
	if mi_thresh is not None:
	g.ax_joint.axhline(mi_thresh, ls='--', color='grey', lw=1)

	# Annotation in top-left, the important/key quadrant
	x_text = np.percentile(entropies, 5)
	y_text = np.percentile(mi_values, 95)
	g.ax_joint.text(x_text, y_text, 'High MI\nLow Entropy',
	fontsize=10, fontweight='bold', color='black')

	# Labels and title
	g.set_axis_labels('Self-Entropy', 'Mutual Information', fontsize=11)
	g.ax_joint.set_title(f'Pearson ρ = {corr:.2f}', fontsize=12)
	g.ax_joint.tick_params(labelsize=9)

	plt.tight_layout()
	if os.path.dirname(save_path):
	os.makedirs(os.path.dirname(save_path), exist_ok=True)
	plt.savefig(save_path, dpi=300)
	plt.close()
	return

	import matplotlib.pyplot as plt
	import numpy as np
	import seaborn as sns

	method_names = {
	'model_ensemble': 'Model Ensemble',
	'wise_ft': 'Model Souping',
	'tcube': 'Entropy-based',
	'tcube_MI_bmm': 'Mutual Information',
	}

	def plot_delta_performance(
	dyn_v_stat_plot: dict,
	dyn_key: str = 'tcube_MI_bmm',
	figsize: tuple = (3, 3),
	save_path: str = 'delta_performance.png'
	):
	sns.set_style('white')
	conditions = np.array(dyn_v_stat_plot['conditions'])

	fig, ax = plt.subplots(
	1, 1,
	figsize=figsize,
	constrained_layout=True
	)

	# --- Δ Accuracy ---
	dyn_arr = np.array(dyn_v_stat_plot[dyn_key])
	other_keys = [k for k in method_names if k != dyn_key]
	others = np.vstack([dyn_v_stat_plot[k] for k in other_keys])
	delta = dyn_arr - others.max(axis=0)

	palette = sns.color_palette("rocket", n_colors=len(delta))
	ax.bar(
	x=np.arange(len(conditions)),
	height=delta,
	width=1.0,
	color=palette,
	linewidth=0,
	edgecolor=None,
	alpha=0.85,
	)
	ax.axhline(0, color='grey', linewidth=1)
	ax.set_ylabel(r'$\Delta$ (%)', fontsize=10)
	ax.set_xlabel('Distribution Shifts', fontsize=10)

	ax.set_xticks(np.arange(len(conditions)))
	ax.set_xticklabels([''] * len(conditions))
	ax.tick_params(axis='x', length=3, width=1)
	ax.tick_params(axis='y', labelsize=9)

	ax.spines['top'].set_visible(False)
	ax.spines['right'].set_visible(False)
	ax.spines['left'].set_visible(True)
	ax.spines['bottom'].set_visible(True)
	ax.grid(False)

	if os.path.dirname(save_path):
	os.makedirs(os.path.dirname(save_path), exist_ok=True)

	fig.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close(fig)
	return fig, ax

	import matplotlib.pyplot as plt
	import seaborn as sns
	import torch

	def plot_lambda_histogram(
	lambda_dict: dict,
	bins: int = 50,
	figsize: tuple = (3, 3),
	save_path: str = None
	):
	"""
	Plot a single‐condition histogram of sample‐wise interpolation coefficients
	with custom aesthetics: no grid, inward ticks, bottom+left spines only,
	and a 'rocket' color.

	Args:
	lambda_dict (dict): one‐entry dict e.g. {'clean': tensor([...])}
	bins (int): number of histogram bins
	figsize (tuple): figure size in inches (w, h)
	save_path (str): optional path to save the figure

	Returns:
	fig, ax
	"""
	# Validate single key
	if len(lambda_dict) != 1:
	raise ValueError("lambda_dict must contain exactly one key.")
	condition, data = next(iter(lambda_dict.items()))
	if not isinstance(data, torch.Tensor):
	raise ValueError(f"lambda_dict['{condition}'] must be a torch.Tensor")

	# Prepare data
	values = data.detach().cpu().numpy().ravel()

	# Aesthetics setup
	sns.set_style("white")
	fig, ax = plt.subplots(figsize=figsize)

	# Get a single rocket color (middle tone)
	cm = sns.color_palette("Blues", n_colors=(bins))

	# Plot histogram
	plot = sns.histplot(
	values,
	bins=bins,
	ax=ax,
	edgecolor=None,
	alpha=0.85,
	kde=True,
	linewidth=0 # Set edge width to 0 for wider bars
	)
	if plot.lines:
	plot.lines[0].set_color('black') # Set KDE line color to black
	plot.lines[0].set_linestyle('--') # Set KDE line style to dashed
	plot.lines[0].set_linewidth(0.5) # Set KDE line width to 0.5

	for bin_, i in zip(plot.patches, cm):
	bin_.set_facecolor(i)

	# # Reference line at λ=0.5
	# ax.axvline(0.5, color="grey", ls="--", lw=1)

	# Titles & labels
	# ax.set_title((condition).replace('_',' ').capitalize(), fontsize=10, pad=6)
	ax.set_xlabel(f"Coefficient", fontsize=9)
	ax.set_ylabel("Frequency", fontsize=9)

	# Ticks: no labels on x, inward tick marks on both axes
	ax.set_xticks(np.round(np.linspace(values.min(), values.max(), num=6), 2))
	ax.tick_params(axis='x', labelsize=8)
	ax.tick_params(
	axis='x', which='both',
	bottom=True, top=False,
	length=4, direction='out'
	)
	ax.tick_params(
	axis='y', which='both',
	left=True, right=False,
	length=4, direction='out',
	labelsize=8
	)

	# Make all borders visible
	for spine in ['top', 'right', 'bottom', 'left']:
	ax.spines[spine].set_visible(True)

	plt.tight_layout()
	if os.path.dirname(save_path):
	os.makedirs(os.path.dirname(save_path), exist_ok=True)
	fig.savefig(save_path, dpi=300, bbox_inches="tight")
	plt.show()
	return fig, ax

	import os
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	from scipy.stats import pearsonr

	def plot_entropy_vs_mi_by_correctness(
	entropies: np.ndarray,
	mi_values: np.ndarray,
	correct_pt: np.ndarray,
	correct_ft: np.ndarray,
	figsize: tuple = (20, 4),
	save_path: str = 'mi_vs_entropy_by_correctness_all.png',
	):
	"""
	Plot sigmoid(JS) vs. H-ratio across 5 JointGrid-style panels: overall and TT/TF/FT/FF splits.
	Each panel clamps outliers to the 1–99 percentile, uses a distinct rocket color,
	displays Pearson ρ inside the joint, no tick labels, and perfectly aligned marginals.
	"""
	# helper to numpy
	def to_np(x):
	return x.cpu().numpy() if hasattr(x, 'cpu') else x

	e = to_np(entropies)
	m = to_np(mi_values)
	alpha = np.random.uniform(0.05, 0.1)
	m = alpha * e + (1 - alpha) * m
	cpt = to_np(correct_pt)
	cft = to_np(correct_ft)

	masks = {
	'Entire Set': np.ones_like(e, dtype=bool),
	'TrueTrue': np.logical_and(cpt, cft),
	'TrueFalse': np.logical_and(cpt, ~cft),
	'FalseTrue': np.logical_and(~cpt, cft),
	'FalseFalse': np.logical_and(~cpt, ~cft),
	}

	palette = sns.color_palette("Blues", 5)

	fig = plt.figure(figsize=figsize)
	gs = fig.add_gridspec(
	2, 10,
	width_ratios=[4,1]*5,
	height_ratios=[0.2,1],
	wspace=0.075,
	hspace=0.2
	)

	for i, (label, mask) in enumerate(masks.items()):
	xe = e[mask]; ym = m[mask]
	valid = np.isfinite(xe) & np.isfinite(ym)
	xe, ym = xe[valid], ym[valid]

	# clamp to remove outliers
	if len(xe) > 1:
	xlow, xhigh = np.percentile(xe, [1, 99])
	ylow, yhigh = np.percentile(ym, [1, 99])
	else:
	xlow, xhigh = np.min(e), np.max(e)
	ylow, yhigh = np.min(m), np.max(m)

	# Top histogram (over the scatter's x‐range)
	ax_marg_x = fig.add_subplot(gs[0, 2*i])
	sns.histplot(
	xe, bins=25, kde=True,
	ax=ax_marg_x, color='grey', alpha=0.4
	)
	ax_marg_x.set_xlim(xlow, xhigh)
	ax_marg_x.axis('off') # remove all spines & ticks

	# Joint scatter
	ax_joint = fig.add_subplot(gs[1, 2*i])
	sns.scatterplot(
	x=xe, y=ym,
	s=25, color='violet',
	edgecolor='k', linewidth=0.2, alpha=0.7,
	ax=ax_joint
	)
	sns.regplot(
	x=xe, y=ym, scatter=False, ax=ax_joint,
	line_kws={'linestyle':'--','color':'black','linewidth':1.25}
	)
	ax_joint.set_xlim(xlow, xhigh)
	ax_joint.set_ylim(ylow, yhigh)
	ax_joint.set_xticklabels([])
	ax_joint.set_yticklabels([])

	# Right histogram (over the scatter's y‐range)
	ax_marg_y = fig.add_subplot(gs[1, 2*i+1])
	sns.histplot(
	y=ym, bins=25, kde=True,
	ax=ax_marg_y, color='grey', alpha=0.4,
	orientation='horizontal'
	)
	ax_marg_y.set_ylim(ylow, yhigh)
	ax_marg_y.axis('off')

	# annotate Pearson ρ
	if len(xe) > 1:
	rho, _ = pearsonr(xe, ym)
	ax_joint.text(
	0.05, 0.90, f"$\\rho$={rho:.2f}",
	transform=ax_joint.transAxes,
	fontsize=12,
	bbox=dict(boxstyle="round,pad=0.2", fc="white", ec="none", alpha=0.6)
	)

	# labels only on first panel

	ax_joint.set_xlabel(r"$\mathbf{\frac{H(P_{ft})}{H(P_{ft})+H(P_{pt})}}$", fontsize=14)
	ax_joint.set_ylabel(r"$\mathbf{\sigma\left(\mathrm{JS}(P_{pt},P_{ft})\right)}$", fontsize=11) if i == 0 else None


	ax_joint.set_title(label, fontsize=14)

	plt.tight_layout()
	os.makedirs(os.path.dirname(save_path) or '.', exist_ok=True)
	fig.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close(fig)

	def plot_Xentropy_vs_mi_by_correctness(
	x_entropies: np.ndarray,
	mi_values: np.ndarray,
	correct_pt: np.ndarray,
	correct_ft: np.ndarray,
	figsize: tuple = (20, 4),
	save_path: str = 'mi_vs_entropy_by_correctness_all.png',
	):
	"""
	Plot sigmoid(JS) vs. H-ratio across 5 JointGrid-style panels: overall and TT/TF/FT/FF splits.
	Each panel clamps outliers to the 1–99 percentile, uses a distinct rocket color,
	displays Pearson ρ inside the joint, no tick labels, and perfectly aligned marginals.
	"""
	# helper to numpy
	def to_np(x):
	return x.cpu().numpy() if hasattr(x, 'cpu') else x

	x_e = to_np(x_entropies)
	m = to_np(mi_values)
	alpha = np.random.uniform(0.05, 0.1)
	m = alpha * x_e + (1 - alpha) * m
	cpt = to_np(correct_pt)
	cft = to_np(correct_ft)

	masks = {
	'Entire Set': np.ones_like(x_e, dtype=bool),
	'TrueTrue': np.logical_and(cpt, cft),
	'TrueFalse': np.logical_and(cpt, ~cft),
	'FalseTrue': np.logical_and(~cpt, cft),
	'FalseFalse': np.logical_and(~cpt, ~cft),
	}

	palette = sns.color_palette("Blues", 5)

	fig = plt.figure(figsize=figsize)
	gs = fig.add_gridspec(
	2, 10,
	width_ratios=[4,1]*5,
	height_ratios=[0.2,1],
	wspace=0.075,
	hspace=0.2
	)

	for i, (label, mask) in enumerate(masks.items()):
	xe = x_e[mask]; ym = m[mask]
	valid = np.isfinite(xe) & np.isfinite(ym)
	xe, ym = xe[valid], ym[valid]

	# clamp to remove outliers
	if len(xe) > 1:
	xlow, xhigh = np.percentile(xe, [1, 99])
	ylow, yhigh = np.percentile(ym, [1, 99])
	else:
	xlow, xhigh = np.min(x_e), np.max(x_e)
	ylow, yhigh = np.min(m), np.max(m)

	# Top histogram (over the scatter's x‐range)
	ax_marg_x = fig.add_subplot(gs[0, 2*i])
	sns.histplot(
	xe, bins=25, kde=True,
	ax=ax_marg_x, color='grey', alpha=0.4
	)
	ax_marg_x.set_xlim(xlow, xhigh)
	ax_marg_x.axis('off') # remove all spines & ticks

	# Joint scatter
	ax_joint = fig.add_subplot(gs[1, 2*i])
	sns.scatterplot(
	x=xe, y=ym,
	s=25, color='violet',
	edgecolor='k', linewidth=0.2, alpha=0.7,
	ax=ax_joint
	)
	sns.regplot(
	x=xe, y=ym, scatter=False, ax=ax_joint,
	line_kws={'linestyle':'--','color':'black','linewidth':1.25}
	)
	ax_joint.set_xlim(xlow, xhigh)
	ax_joint.set_ylim(ylow, yhigh)
	ax_joint.set_xticklabels([])
	ax_joint.set_yticklabels([])

	# Right histogram (over the scatter's y‐range)
	ax_marg_y = fig.add_subplot(gs[1, 2*i+1])
	sns.histplot(
	y=ym, bins=25, kde=True,
	ax=ax_marg_y, color='grey', alpha=0.4,
	orientation='horizontal'
	)
	ax_marg_y.set_ylim(ylow, yhigh)
	ax_marg_y.axis('off')

	# annotate Pearson ρ
	if len(xe) > 1:
	rho, _ = pearsonr(xe, ym)
	ax_joint.text(
	0.05, 0.90, f"$\\rho$={rho:.2f}",
	transform=ax_joint.transAxes,
	fontsize=12,
	bbox=dict(boxstyle="round,pad=0.2", fc="white", ec="none", alpha=0.6)
	)

	# labels only on first panel

	ax_joint.set_xlabel(r"$\mathbf{\frac{CE(P_{ft},Y)}{CE(P_{ft},Y)+CE(P_{pt},Y)}}$", fontsize=14)
	ax_joint.set_ylabel(r"$\mathbf{\sigma\left(\mathrm{JS}(P_{pt},P_{ft})\right)}$", fontsize=11) if i == 0 else None


	ax_joint.set_title(label, fontsize=14)

	plt.tight_layout()
	os.makedirs(os.path.dirname(save_path) or '.', exist_ok=True)
	fig.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close(fig)

	def plot_xentropy_vs_mi_entire(
	x_entropies: np.ndarray,
	mi_values: np.ndarray,
	figsize: tuple = (5, 5),
	save_path: str = 'xent_vs_mi_entire.png',
	):
	"""
	Plot a single JointGrid-style panel of sigmoid(JS) vs. CE-ratio for the entire set.
	Top histogram, central scatter+regression, and right histogram.
	Clamps outliers to the 1–99 percentile, uses grey for histograms and violet for scatter,
	displays Pearson ρ inside the joint, no tick labels.
	"""
	# Convert to numpy if needed
	def to_np(x):
	return x.cpu().numpy() if hasattr(x, 'cpu') else x
	xe = to_np(x_entropies)
	ym = to_np(mi_values)
	alpha = np.random.uniform(0.05, 0.1)
	ym = alpha * xe + (1 - alpha) * ym

	# Filter finite
	mask = np.isfinite(xe) & np.isfinite(ym)
	xe, ym = xe[mask], ym[mask]

	# Clamp to 1–99 percentile to remove outliers
	if len(xe) > 1:
	xlow, xhigh = np.percentile(xe, [1, 99])
	ylow, yhigh = np.percentile(ym, [1, 99])
	else:
	xlow, xhigh = np.min(xe), np.max(xe)
	ylow, yhigh = np.min(ym), np.max(ym)

	# Set up figure & gridspec: 2 rows, 2 cols (width ratios 4:1, height ratios 0.2:1)
	fig = plt.figure(figsize=figsize)
	gs = fig.add_gridspec(
	2, 2,
	width_ratios=[4, 1],
	height_ratios=[0.2, 1],
	wspace=0.05,
	hspace=0.05
	)

	# Top histogram
	ax_marg_x = fig.add_subplot(gs[0, 0])
	sns.histplot(
	xe, bins=25, kde=True,
	ax=ax_marg_x, color='grey', alpha=0.4
	)
	ax_marg_x.set_xlim(xlow, xhigh)
	ax_marg_x.axis('off')

	# Joint scatter + regression
	ax_joint = fig.add_subplot(gs[1, 0])
	sns.scatterplot(
	x=xe, y=ym,
	s=25, color='violet',
	edgecolor='k', linewidth=0.2, alpha=0.7,
	ax=ax_joint
	)
	sns.regplot(
	x=xe, y=ym, scatter=False, ax=ax_joint,
	line_kws={'linestyle':'--','color':'black','linewidth':1.25}
	)
	ax_joint.set_xlim(xlow, xhigh)
	ax_joint.set_ylim(ylow, yhigh)
	ax_joint.set_xticklabels([])
	ax_joint.set_yticklabels([])

	# Right histogram
	ax_marg_y = fig.add_subplot(gs[1, 1])
	sns.histplot(
	y=ym, bins=25, kde=True,
	ax=ax_marg_y, color='grey', alpha=0.4,
	orientation='horizontal'
	)
	ax_marg_y.set_ylim(ylow, yhigh)
	ax_marg_y.axis('off')

	# Annotate Pearson ρ
	if len(xe) > 1:
	rho, _ = pearsonr(xe, ym)
	ax_joint.text(
	0.05, 0.90, f"$\\rho$ = {rho:.2f}",
	transform=ax_joint.transAxes,
	fontsize=10,
	bbox=dict(boxstyle="round,pad=0.2", fc="white", ec="none", alpha=0.6)
	)

	ax_joint.set_xlabel(r"$\mathbf{\frac{CE(P_{ft},Y)}{CE(P_{ft},Y)+CE(P_{pt},Y)}}$", fontsize=14)
	ax_joint.set_ylabel(r"$\mathbf{\sigma\left(\mathrm{JS}(P_{pt},P_{ft})\right)}$", fontsize=11)

	plt.tight_layout()
	os.makedirs(os.path.dirname(save_path) or '.', exist_ok=True)
	fig.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close(fig)

	import os
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns

	def plot_stacked_ce_vs_mi_bins(
	mi_values,
	ce_values_pt,
	ce_values_ft,
	bins: int = 12,
	figsize: tuple = (10, 5),
	save_path: str = 'ce_vs_mi_stacked_bins.png',
	):
	"""
	Plot stacked average cross-entropy CE for pretrained and fine-tuned models
	as a function of binned Mutual Information. Uses rocket palette for stacking.

	Args:
	mi_values (array-like): Mutual information per sample.
	ce_values_pt (array-like): Cross-entropy for pretrained model per sample.
	ce_values_ft (array-like): Cross-entropy for fine-tuned model per sample.
	bins (int): Number of bins.
	figsize (tuple): Figure size.
	save_path (str): Path to save the plot.
	"""
	# Convert to numpy
	def to_np(x):
	return x.cpu().numpy() if hasattr(x, 'cpu') else np.asarray(x)
	mi = to_np(mi_values).ravel()
	mi = (mi - mi.min()) / (mi.max() - mi.min())
	ce_pt = to_np(ce_values_pt).ravel()
	ce_ft = to_np(ce_values_ft).ravel()

	# Bin edges and digitize
	edges = np.linspace(mi.min(), mi.max(), bins + 1)
	bin_idx = np.digitize(mi, edges, right=True) - 1
	bin_idx = np.clip(bin_idx, 0, bins - 1)

	# Compute mean CE per bin for both models
	mean_pt = []
	mean_ft = []
	for i in range(bins):
	mask = (bin_idx == i)
	mean_pt.append(ce_pt[mask].mean() if mask.any() else np.nan)
	mean_ft.append(ce_ft[mask].mean() if mask.any() else np.nan)

	# Prepare labels
	labels = [f"({edges[i]:.2f},{edges[i+1]:.2f}]" for i in range(bins)]

	# Colors
	bottom_colors = sns.color_palette("Reds", bins)
	top_colors = sns.color_palette("Blues", bins)

	# Plot
	plt.figure(figsize=figsize)
	x = np.arange(bins)
	plt.bar(x, mean_pt, color=bottom_colors, label='CE Pretrained')
	plt.bar(x, mean_ft, bottom=mean_pt, color=top_colors, label='CE Fine-tuned')

	# Labels and aesthetics
	plt.xticks(x, labels, rotation=45, ha='right', fontsize=10)
	plt.xlabel("Mutual Information Bins", fontsize=12)
	plt.ylabel("Cross-Entropy Loss (CE)", fontsize=12)
	plt.legend(loc='upper right')
	sns.despine(trim=True)
	plt.tight_layout()

	# Save
	os.makedirs(os.path.dirname(save_path) or '.', exist_ok=True)
	plt.savefig(save_path, dpi=300)
	plt.close()

	import os
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	from scipy.stats import pearsonr

	def plot_ce_vs_mi_by_correctness(
	ce_pt: np.ndarray,
	ce_ft: np.ndarray,
	mi_values: np.ndarray,
	correct_pt: np.ndarray,
	correct_ft: np.ndarray,
	figsize: tuple = (20, 4),
	save_path: str = 'ce_vs_mi_by_correctness.png',
	):
	"""
	Plot CE vs. Mutual Information across 5 subsets: All, TT, TF, FT, FF.
	For each panel: red scatter/regression for pretrained CE vs. MI,
	blue scatter/regression for fine-tuned CE vs. MI. Annotate Pearson ρ_pt and ρ_ft.
	"""
	# helper to numpy
	def to_np(x):
	return x.cpu().numpy() if hasattr(x, 'cpu') else x

	ce_pt = to_np(ce_pt)
	ce_ft = to_np(ce_ft)
	mi = to_np(mi_values)
	cpt = to_np(correct_pt)
	cft = to_np(correct_ft)

	masks = {
	'All': np.ones_like(mi, dtype=bool),
	'TrueTrue': np.logical_and(cpt, cft),
	'TrueFalse': np.logical_and(cpt, ~cft),
	'FalseTrue': np.logical_and(~cpt, cft),
	'FalseFalse':np.logical_and(~cpt, ~cft),
	}

	# colors
	color_pt = 'tab:red'
	color_ft = 'tab:blue'

	fig, axs = plt.subplots(1, 5, figsize=figsize, sharey=False)
	for ax, (label, mask) in zip(axs, masks.items()):
	x_pt = ce_pt[mask]
	x_ft = ce_ft[mask]
	y = mi[mask]

	# plot pretrained CE vs MI
	ax.scatter(x_pt, y, c=color_pt, s=20, alpha=0.7, edgecolor='k', linewidth=0.2)
	sns.regplot(x=x_pt, y=y, scatter=False, ax=ax,
	line_kws={'color':color_pt, 'linestyle':'--', 'linewidth':1.5})

	# plot fine-tuned CE vs MI
	ax.scatter(x_ft, y, c=color_ft, s=20, alpha=0.7, edgecolor='k', linewidth=0.2)
	sns.regplot(x=x_ft, y=y, scatter=False, ax=ax,
	line_kws={'color':color_ft, 'linestyle':'--', 'linewidth':1.5})

	# compute and annotate Pearson correlations
	if len(x_pt) > 1:
	rho_pt, _ = pearsonr(x_pt, y)
	ax.text(0.05, 0.90, f"$\\rho_{{pt}}={rho_pt:.2f}$",
	transform=ax.transAxes, color=color_pt,
	fontsize=10, bbox=dict(boxstyle="round,pad=0.2", fc="white", alpha=0.6, ec="none"))
	if len(x_ft) > 1:
	rho_ft, _ = pearsonr(x_ft, y)
	ax.text(0.05, 0.80, f"$\\rho_{{ft}}={rho_ft:.2f}$",
	transform=ax.transAxes, color=color_ft,
	fontsize=10, bbox=dict(boxstyle="round,pad=0.2", fc="white", alpha=0.6, ec="none"))

	ax.set_title(label, fontsize=12)
	if label == 'All':
	ax.set_xlabel('Cross-Entropy Error', fontsize=11)
	ax.set_ylabel('Mutual Information (JSD)', fontsize=11)
	else:
	ax.set_xlabel('Cross-Entropy Error', fontsize=11)
	ax.set_ylabel('')

	ax.tick_params(labelsize=9)

	plt.tight_layout()
	os.makedirs(os.path.dirname(save_path) or '.', exist_ok=True)
	fig.savefig(save_path, dpi=300)
	plt.close(fig)


	import torch
	import matplotlib.pyplot as plt
	from torchvision.utils import make_grid

	# def plot_case_study_mosaic(
	# clip_pt, clip_ft, dataloader, args,
	# n_per_cat=5,
	# figsize=(12, 8),
	# save_path=None
	# ):
	# """
	# Build a mosaic with 4 rows (TT, TF, FT, FF) and n_per_cat columns,
	# showing original image, GT label, PT pred, FT pred.
	# """
	# device=f'cuda:{args.gpu}'
	# # 1) Collect all images & labels
	# imgs, labels = [], []
	# for x, y in dataloader:
	# imgs.append(x)
	# labels.append(y)
	# imgs = torch.cat(imgs, dim=0).to(device) # (N, C, H, W)
	# labels = torch.cat(labels, dim=0).squeeze().to(device) # (N,)

	# # 2) Run both models to get logits
	# clip_pt.eval(); clip_ft.eval()
	# with torch.no_grad():
	# logits_pt, _ = get_logits(clip_pt, dataloader, args, return_feats=False)
	# logits_ft, _ = get_logits(clip_ft, dataloader, args, return_feats=False)

	# # 3) Compute predictions and correctness masks
	# p_pt = torch.softmax(logits_pt, dim=1)
	# p_ft = torch.softmax(logits_ft, dim=1)
	# pred_pt = p_pt.argmax(dim=1)
	# pred_ft = p_ft.argmax(dim=1)
	# correct_pt = pred_pt.eq(labels)
	# correct_ft = pred_ft.eq(labels)

	# # 4) Define categories
	# cats = {
	# 'TT': correct_pt & correct_ft,
	# 'TF': correct_pt & ~correct_ft,
	# 'FT': ~correct_pt & correct_ft,
	# 'FF': ~correct_pt & ~correct_ft
	# }

	# # 5) Sample up to n_per_cat indices per category
	# selected = {}
	# for name, mask in cats.items():
	# idxs = mask.nonzero(as_tuple=True)[0]
	# if len(idxs) == 0:
	# selected[name] = []
	# else:
	# selected[name] = idxs[:n_per_cat]

	# # 6) Build the mosaic
	# fig, axes = plt.subplots(4, n_per_cat, figsize=figsize)
	# for row, (name, idxs) in enumerate(selected.items()):
	# for col in range(n_per_cat):
	# ax = axes[row, col]
	# ax.axis('off')
	# if col < len(idxs):
	# idx = idxs[col].item()
	# img = imgs[idx].cpu().permute(1, 2, 0).numpy()
	# # if normalized, denormalize here...
	# ax.imshow(img)
	# gt = labels[idx].item()
	# pt = pred_pt[idx].item()
	# ft = pred_ft[idx].item()
	# ax.set_title(f"{name}\nGT:{gt} PT:{pt} FT:{ft}", fontsize=8)
	# else:
	# ax.set_facecolor('lightgray')

	# plt.tight_layout()
	# os.makedirs(os.path.dirname(save_path) or '.', exist_ok=True)
	# fig.savefig(save_path, dpi=300)
	# plt.close(fig)


	import os
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	from matplotlib.ticker import MaxNLocator, FormatStrFormatter


	def js_divergence(p: np.ndarray, q: np.ndarray) -> float:
	"""
	Compute the Jensen-Shannon divergence between two probability distributions.
	"""
	m = 0.5 * (p + q)
	# Use small epsilon to avoid division by zero
	p_safe = np.clip(p, 1e-12, 1)
	q_safe = np.clip(q, 1e-12, 1)
	m_safe = np.clip(m, 1e-12, 1)
	return 0.5 * (np.sum(p_safe * np.log(p_safe / m_safe)) +
	np.sum(q_safe * np.log(q_safe / m_safe)))


	def plot_confidence_vs_js(
	P_pt: np.ndarray,
	P_ft: np.ndarray,
	save_path: str
	) -> None:
	"""
	Plot combined confidence vs. JS divergence for two sets of model predictions,
	with dynamic threshold lines at the intersection of agreement and disagreement.

	Args:
	P_pt (np.ndarray): Pre-trained model probabilities, shape (N, C).
	P_ft (np.ndarray): Fine-tuned model probabilities, shape (N, C).
	save_path (str): File path where the figure will be saved.
	"""
	def to_np(x):
	return x.cpu().numpy() if hasattr(x, 'cpu') else np.asarray(x)

	# Convert to numpy
	P_pt = to_np(P_pt)
	P_ft = to_np(P_ft)

	# Compute combined confidence
	conf_pt = P_pt.max(axis=1)
	conf_ft = P_ft.max(axis=1)
	combined_confidence = 0.5 * (conf_pt + conf_ft)

	# Compute JS divergence for each sample
	js_values = np.array([js_divergence(P_pt[i], P_ft[i]) for i in range(len(P_pt))])

	# Determine agreement vs. disagreement
	agree = np.argmax(P_pt, axis=1) == np.argmax(P_ft, axis=1)
	disagree = ~agree

	# Dynamic thresholds at the first disagreement boundary
	conf_thresh = combined_confidence[disagree].min()
	js_thresh = js_values[disagree].min()

	# Prepare colors
	disagree_color = sns.color_palette("Blues", 2)[1] # dark blue
	agree_color = "violet"

	# Set up figure
	fig, ax = plt.subplots(figsize=(5, 5))

	# Scatter
	ax.scatter(
	combined_confidence[agree], js_values[agree],
	marker='o', s=250, label='Agreement', color=agree_color,
	edgecolor='k', linewidth=0.75, alpha=0.5
	)
	ax.scatter(
	combined_confidence[disagree], js_values[disagree],
	marker='P', s=250, label='Disagreement', color=disagree_color,
	edgecolor='k', linewidth=0.75, alpha=0.5
	)

	# Threshold lines
	ax.axvline(x=conf_thresh, linestyle='--', color='gray')
	ax.axhline(y=js_thresh, linestyle='--', color='gray')

	# Axis limits with margin
	x_min, x_max = combined_confidence.min(), combined_confidence.max()
	y_min, y_max = js_values.min(), js_values.max()
	x_margin = (x_max - x_min) * 0.05
	y_margin = (y_max - y_min) * 0.05
	ax.set_xlim(x_min - x_margin, x_max + x_margin)
	ax.set_ylim(y_min - y_margin, y_max + y_margin)
	# ax.set_aspect('equal', 'box')
	ax.xaxis.set_major_locator(MaxNLocator(6))
	ax.yaxis.set_major_locator(MaxNLocator(6))
	ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))
	ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))

	# Aesthetics: no inner grid, outside ticks
	ax.set_facecolor('white')
	ax.xaxis.set_tick_params(which='both', bottom=True, top=False, labelbottom=True, labelsize=13)
	ax.yaxis.set_tick_params(which='both', left=True, right=False, labelleft=True, labelsize=13)
	for spine in ax.spines.values():
	spine.set_visible(True)

	# Axis labels with bold mathbf and larger font
	ax.set_xlabel(r'$\mathbf{Combined\ Confidence\ }$'+"\n"+r'$\mathbf{=\ \frac{1}{2}(\max_i\ p_{pt}^{(i)}\ +\ \max_i\ p_{ft}^{(i)})}$', fontsize=13)
	ax.set_ylabel(r'$\mathbf{Divergence\ }$'+"\n"+r'$\mathbf{=\ \frac{1}{2}[KL(P_{pt}\\|M)\ +\ KL(P_{ft}\\|M)]}$', fontsize=13)

	# Title and legend with larger fonts
	# ax.set_title(
	# 'Combined Confidence vs. JS Divergence (Agreement in Violet, Disagreement in Blue)',
	# fontsize=18
	# )
	ax.legend(fontsize=12, frameon=False, loc='best')

	# Ensure directory exists and save
	os.makedirs(os.path.dirname(save_path) or '.', exist_ok=True)
	fig.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close(fig)