MadSBM / src /sampling /madsbm_sampler.py

Shrey Goel

initial commit

94c2704 about 2 months ago

6.95 kB

	import random

	import torch
	import numpy as np

	import torch.nn.functional as F

	from src.PeptiVerse.inference import PeptiVersePredictor
	from src.utils.model_utils import _print


	class MadSBMSampler:
	def __init__(self, model, config, device, guidance=None):
	self.config = config
	self.device = device
	self.model = model
	self.tokenizer = model.tokenizer
	self.mask_id = self.tokenizer.mask_token_id
	self.eps = config.time_embed.min_time
	self.seed_everything(seed=42)

	if guidance:
	self.guidance = guidance
	self.peptiverse = PeptiVersePredictor(
	manifest_path="/scratch/pranamlab/sgoel/MadSBM/src/PeptiVerse/best_models.txt",
	classifier_weight_root="/scratch/pranamlab/sgoel/MadSBM/src/PeptiVerse",
	device=self.device
	)


	@torch.inference_mode()
	def sample(self, xt, num_steps, tracer, target_toks=None, guidance=None):
	xt = xt.clone()
	B, L = xt.shape
	assert B == 1, "Do only 1 sequence at a time"

	t_max = 1.0 - self.eps
	dt = 1.0 / num_steps
	attn_mask = torch.ones_like(xt, device=self.device)

	action_traj = {}
	tot_action = 0.0

	tracer.log_step(xt=xt, step_idx=0)

	converge_idx = num_steps
	converged = False

	for k in range(num_steps):
	# t decreases from 1 --> 0 as our model was trained that t=1 --> noise and t=0 --> clean
	prog = (k + 1) / float(num_steps)
	t_val = t_max - (t_max - self.eps) * prog
	t = torch.full((B,), fill_value=float(t_val), device=self.device) # B = 1 during sampling

	# predicted control field --> B, L, V
	outs = self.model(input_ids=xt, attention_mask=attn_mask, t=t)

	u_tilt = outs['dit']
	total_logits = outs['madsbm']
	esm_logits = outs['esm']

	if self.config.model.ablate:
	actional = self.compute_action(u_tilt, esm_logits=None)
	else:
	actional = self.compute_action(u_tilt, esm_logits=esm_logits)

	action_traj[f"action_step_{k+1}"] = actional
	tot_action += (actional * dt)

	# Compute jump rates and jump probs
	# P(jump) = 1 - exp(-rate * dt)
	r_theta = torch.exp(u_tilt * self.config.sampling.rate_scale)
	R_tot = r_theta.sum(dim=-1) # 1, L
	rate = (- R_tot * self.config.sampling.jump_scale * dt).clamp(min=-40.0, max=0.0)
	jump_prob = 1.0 - torch.exp(rate)

	# Scale and filter logits with nucleus sampling
	logits = total_logits.clone()
	logits /= self.config.sampling.tau
	logits = self.top_p_filter(logits, self.config.sampling.top_p)

	# Sample new tokens
	probs = F.softmax(logits, dim=-1)
	probs = probs.view(-1, probs.size(-1))
	sample = torch.multinomial(probs, 1)
	candidate_toks = sample.view(B, L)

	# determine tokens we can change
	rand = torch.rand(B, L, device=self.device)
	can_jump = (rand < jump_prob)
	updatable = can_jump & self.is_masked(xt)

	# Update the sequence
	if guidance:
	chosen_candidate = self.binding_guidance(probs, target_toks, B, L)
	xt[updatable] = chosen_candidate[updatable]
	else:
	xt[updatable] = candidate_toks[updatable]

	tracer.log_step(xt=xt, step_idx = k+1)

	if k == num_steps-1:
	final_logits = total_logits
	still_masked = self.is_masked(xt)

	if not converged and not self.is_masked(xt).any():
	converge_idx = k + 1
	converged = True

	# Copy over remaining tokens
	if still_masked.any():
	final_toks = final_logits.argmax(dim=-1)
	xt[still_masked] = final_toks[still_masked]

	tracer.log_step(xt, num_steps + 1)

	binding_affin = self.peptiverse.predict_binding_affinity(
	mode = 'wt',
	target_ids = target_toks,
	binder_ids = xt
	)['affinity']

	return xt, binding_affin


	def binding_guidance(self, probs, target_toks, B, L):
	M = self.config.sampling.M
	candidate_toks = []
	affinities = []

	for _ in range(M):
	ith_sample = torch.multinomial(probs, 1).view(B, L)
	candidate_toks.append(ith_sample)

	for toks in candidate_toks:
	pred = self.peptiverse.predict_binding_affinity(
	mode = 'wt',
	target_ids = target_toks,
	binder_ids = toks.detach()
	)['affinity']
	affinities.append(pred)

	affinities = torch.tensor(affinities, dtype=torch.float32)
	weights = F.softmax(affinities / self.config.sampling.tau, dim=0)
	chosen_idx = torch.multinomial(weights, 1).item()

	return candidate_toks[chosen_idx]


	def compute_action(self, u_tilt, esm_logits=None):
	""" Computes the action functional for evals """
	if esm_logits is not None:
	R0 = torch.softmax(esm_logits, dim=-1)
	else:
	R0 = 1.0 / self.tokenizer.vocab_size

	psi_u = torch.exp(u_tilt) - u_tilt - 1.0
	action_per_tok = (R0 * psi_u).sum(dim=-1) # R0 goes to 1 in both cases

	return action_per_tok.mean().item()


	def top_p_filter(self, logits, p_val):
	"""
	Implementation of nucleus / top-p sampling
	Masks out tokens that contribute to the bottom (1 - p) cumulative probability
	"""
	# Sort logits and get cumulative probabilities
	sorted_logits, sorted_indices = torch.sort(logits, descending=True)
	cum_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

	# Remove tokens with cum prob > p-val thresh
	sorted_idx_to_remove = cum_probs > p_val

	# Shift the indices to the right to keep also the first token above the threshold
	sorted_idx_to_remove[..., 1:] = sorted_idx_to_remove[..., :-1].clone()
	sorted_idx_to_remove[..., 0] = 0

	idx_to_remove = sorted_idx_to_remove.scatter(-1, sorted_indices, sorted_idx_to_remove)
	logits[idx_to_remove] = float('-inf')
	return logits


	def is_masked(self, xt):
	return (xt == self.mask_id)


	def seed_everything(self, seed):
	if seed is None:
	return
	random.seed(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	if torch.cuda.is_available():
	torch.cuda.manual_seed(seed)
	torch.cuda.manual_seed_all(seed) # if using multi-GPU
	torch.backends.cudnn.deterministic = True
	torch.backends.cudnn.benchmark = False