Spaces:

Shree2604
/

BioStack

Sleeping

App Files Files Community

AE-Shree commited on Feb 24

Commit

f3e4ffb

1 Parent(s): 28046a7

Deploy BioStack RLHF Medical Demo

Browse files

Files changed (1) hide show

server.py +340 -398

server.py CHANGED Viewed

@@ -2,7 +2,6 @@ import io
 import torch
 import torch.nn as nn
 import timm
-import pickle
 import traceback
 import os
 from PIL import Image
@@ -13,7 +12,7 @@ from transformers import T5ForConditionalGeneration, T5Tokenizer
 from huggingface_hub import hf_hub_download
 # ─────────────────────────────────────────────────────────────────────────────
-# CONFIGURATION
 # ─────────────────────────────────────────────────────────────────────────────
 CONFIG = {
     'coatnet_model': 'coatnet_1_rw_224',
@@ -21,6 +20,8 @@ CONFIG = {
     'img_emb_dim': 768,
     'train_last_stages': 2,
     'image_size': 224,
 }
 # ─────────────────────────────────────────────────────────────────────────────
@@ -30,18 +31,17 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"🖥️  Using device: {device}")
 # ─────────────────────────────────────────────────────────────────────────────
-# SECTION 7: Load Tokenizer and Image Transform
 # ─────────────────────────────────────────────────────────────────────────────
 print("\n" + "="*80)
-print("LOADING TOKENIZER AND IMAGE TRANSFORM")
 print("="*80)
-# Load tokenizer
 tokenizer = T5Tokenizer.from_pretrained(CONFIG['t5_model'])
 print(f"✓ Loaded tokenizer: {CONFIG['t5_model']}")
-# Define image transform
 transform = transforms.Compose([
     transforms.Resize((CONFIG['image_size'], CONFIG['image_size'])),
     transforms.ToTensor(),
@@ -52,487 +52,429 @@ transform = transforms.Compose([
 ])
 print(f"✓ Image transform defined (size: {CONFIG['image_size']}x{CONFIG['image_size']})")
-def preprocess_image(image_path: str) -> torch.Tensor:
-    """Load and preprocess image."""
-    image = Image.open(image_path).convert('RGB')
-    return transform(image)
 # ─────────────────────────────────────────────────────────────────────────────
-# ARCHITECTURE 1 — CoAtNet Encoder  (shared by all three models)
-# Matches BOTH notebooks exactly.
 # ─────────────────────────────────────────────────────────────────────────────
 class CoAtNetEncoder(nn.Module):
-    def __init__(self, model_name=None, pretrained=False, train_last_stages=None):
         super().__init__()
-        # Use CONFIG defaults if not specified
-        model_name = model_name or CONFIG['coatnet_model']
-        train_last_stages = train_last_stages or CONFIG['train_last_stages']
-        # pretrained=False at inference time — weights come from .pt file
-        self.backbone = timm.create_model(model_name, pretrained=pretrained)
-        for name, param in self.backbone.named_parameters():
-            param.requires_grad = False
-            for i in range(5 - train_last_stages, 5):
-                if f"stages.{i}" in name:
-                    param.requires_grad = True
-                    break
-        # Detect feature_dim dynamically (same as RM/PPO notebook Cell 4)
-        with torch.no_grad():
-            dummy = torch.randn(1, 3, 224, 224)
-            features = self.backbone.forward_features(dummy)
-            if len(features.shape) == 4:
-                features = features.mean(dim=[2, 3])
-            self.feature_dim = features.shape[-1]
-        print(f"   CoAtNetEncoder feature_dim = {self.feature_dim}")
     def forward(self, x):
-        features = self.backbone.forward_features(x)
-        if len(features.shape) == 4:
-            features = features.mean(dim=[2, 3])
-        return features
 # ─────────────────────────────────────────────────────────────────────────────
-# ARCHITECTURE 2 — SFT VisionT5Model
-# BUG FIX: Uses self.t5 and self.proj — exactly matching best_model.pt keys
-# from SFT notebook Cell 33. Do NOT rename these to txt_model/img_proj.
 # ─────────────────────────────────────────────────────────────────────────────
-class SFTVisionT5Model(nn.Module):
     def __init__(self, img_encoder, txt_model_name="t5-small", img_emb_dim=768):
         super().__init__()
         self.img_encoder = img_encoder
-        # ← self.t5  (NOT self.txt_model — must match saved keys)
         self.t5 = T5ForConditionalGeneration.from_pretrained(txt_model_name)
-        # ← self.proj (NOT self.img_proj — must match saved keys)
         self.proj = nn.Linear(img_emb_dim, self.t5.config.d_model)
         for p in self.t5.shared.parameters():
             p.requires_grad = False
-    def generate_reports(self, pixel_values, max_length=100):
-        self.eval()
-        with torch.no_grad():
-            # Extract + project image features
-            img_feats = self.img_encoder(pixel_values)          # [B, feature_dim]
-            img_feats = self.proj(img_feats)                    # [B, d_model]
-            encoder_hidden_states = img_feats.unsqueeze(1)     # [B, 1, d_model]
-            # Encode
-            encoder_outputs = self.t5.encoder(
-                inputs_embeds=encoder_hidden_states
-            )
-            attn = torch.ones(
-                encoder_hidden_states.size()[:2], device=pixel_values.device
-            )
-            # BUG FIX 3: repetition_penalty + no_repeat_ngram_size breaks
-            # the "Projection: Projection: Projection:" loop
-            generated_ids = self.t5.generate(
-                encoder_outputs=encoder_outputs,
-                attention_mask=attn,
-                max_length=max_length,
-                num_beams=4,
-                early_stopping=True,
-                no_repeat_ngram_size=3,
-                repetition_penalty=1.3,
-            )
-        return generated_ids
-# ─────────────────────────────────────────────────────────────────────────────
-# ARCHITECTURE 3 — PPO VisionT5Model
-# Uses self.txt_model and self.img_proj — matching RM/PPO notebook Cell 4.
-# ─────────────────────────────────────────────────────────────────────────────
-class PPOVisionT5Model(nn.Module):
-    def __init__(self, img_encoder, txt_model_name="t5-small", img_emb_dim=768):
-        super().__init__()
-        self.img_encoder = img_encoder
-        # ← self.txt_model (matches PPO notebook Cell 4)
-        self.txt_model = T5ForConditionalGeneration.from_pretrained(txt_model_name)
-        # ← self.img_proj (matches PPO notebook Cell 4)
-        self.img_proj = nn.Linear(img_emb_dim, self.txt_model.config.d_model)
-    def generate_reports(self, images, max_length=128):
-        self.eval()
-        with torch.no_grad():
-            img_features = self.img_encoder(images)             # [B, feature_dim]
-            img_emb = self.img_proj(img_features).unsqueeze(1) # [B, 1, d_model]
-            batch_size = images.size(0)
-            img_attn = torch.ones(batch_size, 1, device=images.device)
-            encoder_outputs = self.txt_model.encoder(
-                inputs_embeds=img_emb,
-                attention_mask=img_attn
-            )
-            # BUG FIX 3: same repetition guards as SFT
-            generated = self.txt_model.generate(
-                encoder_outputs=encoder_outputs,
-                attention_mask=img_attn,
-                max_length=max_length,
-                num_beams=4,
-                early_stopping=True,
-                no_repeat_ngram_size=3,
-                repetition_penalty=1.3,
-            )
-        return generated
 # ─────────────────────────────────────────────────────────────────────────────
-# ARCHITECTURE 4 — Reward Model
-# Matches RM/PPO notebook Cell 5 exactly.
 # ─────────────────────────────────────────────────────────────────────────────
-class RewardModel(nn.Module):
-    def __init__(self, img_encoder, txt_model_name="t5-small"):
-        super().__init__()
-        self.img_encoder = img_encoder
-        self.txt_encoder = T5ForConditionalGeneration.from_pretrained(txt_model_name).encoder
-        img_dim = img_encoder.feature_dim
-        txt_dim = self.txt_encoder.config.d_model
-        self.img_proj = nn.Linear(img_dim, 512)
-        self.txt_proj = nn.Linear(txt_dim, 512)
-        self.reward_head = nn.Sequential(
-            nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.1),
-            nn.Linear(512, 256),  nn.ReLU(), nn.Dropout(0.1),
-            nn.Linear(256, 1)
         )
-    def forward(self, images, input_ids, attention_mask):
-        img_features = self.img_encoder(images)
-        img_emb = self.img_proj(img_features)
-        txt_outputs = self.txt_encoder(input_ids=input_ids, attention_mask=attention_mask)
-        txt_emb = txt_outputs.last_hidden_state.mean(dim=1)
-        txt_emb = self.txt_proj(txt_emb)
-        combined = torch.cat([img_emb, txt_emb], dim=1)
-        return self.reward_head(combined).squeeze(-1)
 # ─────────────────────────────────────────────────────────────────────────────
-# MODEL LOADER — handles both .pt (state_dict) and .pkl (full model)
-# Prints a key-match diagnostic so you can see exactly what loaded.
 # ─────────────────────────────────────────────────────────────────────────────
-def remap_keys(raw_sd: dict, label: str) -> dict:
     """
-    Remap state_dict keys to match current model attribute names.
-    Known mismatches discovered from diagnostic output:
-      SFT notebook used:
-        img_encoder.encoder.*   →  we use  img_encoder.backbone.*
-        t5.*                    →  we use  t5.*  (already correct for SFTVisionT5Model)
-        proj.*                  →  we use  proj.* (already correct for SFTVisionT5Model)
-      PPO/RM notebooks used:
-        img_encoder.backbone.*  →  already correct ✅
-        txt_model.*             →  already correct ✅
-        img_proj.*              →  already correct ✅
     """
-    remapped = {}
-    changed = 0
-    for k, v in raw_sd.items():
-        new_k = k
-        # SFT encoder used self.encoder, our CoAtNetEncoder uses self.backbone
-        if "img_encoder.encoder." in new_k:
-            new_k = new_k.replace("img_encoder.encoder.", "img_encoder.backbone.")
-            changed += 1
-        remapped[new_k] = v
-    if changed:
-        print(f"   🔧 Remapped {changed} keys: img_encoder.encoder.* → img_encoder.backbone.*")
-    return remapped
-def load_model(path: str, model_obj: nn.Module, label: str) -> nn.Module:
-    print(f"\n📂 Loading {label} from: {path}")
-    if path.endswith(".pkl"):
-        with open(path, "rb") as f:
-            loaded = pickle.load(f)
-        print(f"   ✅ Loaded full pickle object: {type(loaded)}")
-        return loaded.to(device)
-    # .pt state_dict
-    raw_sd = torch.load(path, map_location=device)
-    # Print first 5 saved keys for diagnosis
-    saved_keys = list(raw_sd.keys())
-    print(f"   Saved keys (first 5): {saved_keys[:5]}")
-    model_keys = list(model_obj.state_dict().keys())
-    print(f"   Model keys (first 5): {model_keys[:5]}")
-    # Remap any mismatched key prefixes
-    raw_sd = remap_keys(raw_sd, label)
-    result = model_obj.load_state_dict(raw_sd, strict=False)
-    # Ignore known-safe missing keys:
-    #   head.fc.*            - classification head, intentionally removed (num_classes=0)
-    #   num_batches_tracked  - BatchNorm counter, not a learned weight
-    SAFE_MISSING = ("num_batches_tracked", "head.fc.")
-    missing    = [k for k in result.missing_keys    if not any(s in k for s in SAFE_MISSING)]
-    unexpected = [k for k in result.unexpected_keys if "num_batches_tracked" not in k]
-    if missing:
-        print(f"   Missing keys: {missing[:5]}{'...' if len(missing)>5 else ''}")
-        print(f"   WARNING: {len(missing)} missing keys - weights NOT loaded for those layers!")
-    if unexpected:
-        print(f"   Unexpected keys: {unexpected[:5]}{'...' if len(unexpected)>5 else ''}")
-    if not missing and not unexpected:
-        print(f"   OK: All keys matched perfectly!")
-    return model_obj.to(device)
-# ─────────────────────────────────────────────────────────────────────────────
-# LOAD ALL THREE MODELS FROM HUGGING FACE HUB
-# Models are downloaded from Shree2604/BioStack repository
-# ─────────────────────────────────────────────────────────────────────────────
-def download_model_from_hf(model_filename: str, local_path: str = "models/") -> str:
-    """Download model from Hugging Face Hub if not exists locally"""
-    os.makedirs(local_path, exist_ok=True)
-    full_path = os.path.join(local_path, model_filename)
-    if not os.path.exists(full_path):
-        print(f" Downloading {model_filename} from Hugging Face Hub...")
-        try:
-            downloaded_path = hf_hub_download(
-                repo_id="Shree2604/BioStack",
-                filename=model_filename,
-                local_dir=local_path,
-                local_dir_use_symlinks=False
             )
-            print(f" Downloaded {model_filename}")
-            return downloaded_path
-        except Exception as e:
-            print(f" Failed to download {model_filename}: {e}")
-            raise
-    else:
-        print(f" Using local {model_filename}")
-        return full_path
-print("\n" + "="*60)
-print("  LOADING MODELS FROM HUGGING FACE HUB")
-print("="*60)
-# Download models from Hugging Face
-SFT_MODEL_PATH = download_model_from_hf("best_model.pt")
-REWARD_MODEL_PATH = download_model_from_hf("reward_model.pt")
-PPO_MODEL_PATH = download_model_from_hf("rlhf_model.pt")
-# SFT
-_sft_enc  = CoAtNetEncoder(pretrained=False)
-sft_model = load_model(SFT_MODEL_PATH, SFTVisionT5Model(_sft_enc), "SFT Model")
-sft_model.eval()
-# Reward
-_rm_enc      = CoAtNetEncoder(pretrained=False)
-reward_model = load_model(REWARD_MODEL_PATH, RewardModel(_rm_enc), "Reward Model")
-reward_model.eval()
-# PPO
-_ppo_enc  = CoAtNetEncoder(pretrained=False)
-ppo_model = load_model(PPO_MODEL_PATH, PPOVisionT5Model(_ppo_enc), "PPO Model")
-ppo_model.eval()
-print("\n All models loaded and ready!\n" + "="*60 + "\n")
-# ─────────────────────────────────────────────────────────────────────────────
-# IMAGE PREPROCESSING
-# Matches BOTH notebooks: RGB, 224×224, ImageNet normalisation
-# ─────────────────────────────────────────────────────────────────────────────
-transform = transforms.Compose([
-    transforms.Resize((224, 224)),
-    transforms.ToTensor(),
-    transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                         std=[0.229, 0.224, 0.225])
-])
-def preprocess(file_bytes: bytes) -> torch.Tensor:
-    img = Image.open(io.BytesIO(file_bytes)).convert("RGB")
-    return transform(img).unsqueeze(0).to(device)   # [1, 3, 224, 224]
 # ─────────────────────────────────────────────────────────────────────────────
-# REWARD FEEDBACK GENERATOR
 # ─────────────────────────────────────────────────────────────────────────────
-KEY_MEDICAL_TERMS = [
-    'lung', 'heart', 'normal', 'clear', 'opacity', 'infiltrate',
-    'cardiomegaly', 'pleural', 'pulmonary', 'chest', 'thorax',
-    'pneumonia', 'edema', 'effusion', 'consolidation'
-]
-def reward_feedback(report: str, score: float) -> str:
-    rl = report.lower()
-    present = [t for t in KEY_MEDICAL_TERMS if t in rl]
-    missing  = [t for t in KEY_MEDICAL_TERMS if t not in rl]
-    words    = len(report.split())
-    length_q = "good" if 50 <= words <= 150 else ("too short" if words < 50 else "too long")
-    # Quality factor assessments based on the score and analysis
-    terminology_score = len(present) / len(KEY_MEDICAL_TERMS)
-    completeness_score = min(1.0, words / 100.0)  # Rough estimate based on length
-    structure_score = 1.0 if 50 <= words <= 150 else 0.5  # Good structure if proper length
-    radiological_score = score  # The overall score represents alignment
-    return (
-        f"Reward Score: {score:.2f} | "
-        f"Quality Factors - "
-        f"Medical Terminology: {terminology_score:.1%} | "
-        f"Clinical Completeness: {completeness_score:.1%} | "
-        f"Report Structure: {structure_score:.1%}"
     )
 # ─────────────────────────────────────────────────────────────────────────────
 # FASTAPI APP
 # ─────────────────────────────────────────────────────────────────────────────
-app = FastAPI(title="RLHF Medical Demo")
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],  # Allow all origins for Hugging Face Spaces
     allow_methods=["*"],
     allow_headers=["*"],
 )
 @app.get("/health")
 def health():
-    return {"status": "ok", "device": str(device)}
 @app.post("/sft")
 async def sft_inference(file: UploadFile = File(...)):
     try:
-        tensor = preprocess(await file.read())
-        generated_ids = sft_model.generate_reports(tensor)
         report = tokenizer.decode(generated_ids[0], skip_special_tokens=True).strip()
-        # Strip any leading "Projection: X." prefix that leaked from training data
-        if report.lower().startswith("projection:"):
-            parts = report.split(".", 1)
-            report = parts[1].strip() if len(parts) > 1 else report
         print(f"[SFT] Generated: {report}")
-        return {"report": report[:81]}
-    except Exception as e:
-        traceback.print_exc()
-        return {"report": f"ERROR: {str(e)}"}
-@app.post("/reward")
-async def reward_inference(file: UploadFile = File(...)):
-    try:
-        tensor = preprocess(await file.read())
-        # First get the SFT report to score
-        sft_generated_ids = sft_model.generate_reports(tensor)
-        sft_report = tokenizer.decode(sft_generated_ids[0], skip_special_tokens=True).strip()
-        # Strip any leading "Projection: X." prefix that leaked from training data
-        if sft_report.lower().startswith("projection:"):
-            parts = sft_report.split(".", 1)
-            sft_report = parts[1].strip() if len(parts) > 1 else sft_report
-        print(f"[REWARD] Scoring SFT report: {sft_report}")
-        if not sft_report.strip():
-            return {"score": 0.0, "feedback": "", "sft_report": ""}
-        enc = tokenizer(
-            [sft_report],
-            max_length=128,
-            padding="max_length",
-            truncation=True,
-            return_tensors="pt"
-        )
-        input_ids      = enc.input_ids.to(device)
-        attention_mask = enc.attention_mask.to(device)
-        with torch.no_grad():
-            raw_score = reward_model(tensor, input_ids, attention_mask).item()
-        # Detailed debug logging
-        print(f"[REWARD] Raw neural network output: {raw_score:.6f}")
-        print(f"[REWARD] Clamping to [0,1] range: max(0.0, min(1.0, {raw_score:.6f})) = {max(0.0, min(1.0, raw_score)):.6f}")
-        # Quality assessment details
-        rl = sft_report.lower()
-        present = [t for t in KEY_MEDICAL_TERMS if t in rl]
-        missing  = [t for t in KEY_MEDICAL_TERMS if t not in rl]
-        words    = len(sft_report.split())
-        length_q = "good" if 50 <= words <= 150 else ("too short" if words < 50 else "too long")
-        print(f"[REWARD] Report analysis:")
-        print(f"         - Total words: {words} ({length_q})")
-        print(f"         - Medical terms present ({len(present)}/{len(KEY_MEDICAL_TERMS)}): {present}")
-        print(f"         - Medical terms missing: {missing}")
-        print(f"         - Key terms list: {KEY_MEDICAL_TERMS}")
-        # Reward model architecture details
-        print(f"[REWARD] Model architecture:")
-        print(f"         - CoAtNet feature dim: {reward_model.img_encoder.feature_dim}")
-        print(f"         - T5 d_model: {reward_model.txt_encoder.config.d_model}")
-        print(f"         - Combined feature dim: 1024 (512 img + 512 text)")
-        print(f"         - Reward head: 1024→512→256→1")
-        # Clamped score for display
-        score = float(max(0.0, min(1.0, raw_score)))
-        feedback = reward_feedback(sft_report, score)
-        print(f"[REWARD] Final Score={score:.3f}")
-        return {"score": score, "feedback": feedback, "sft_report": sft_report}
     except Exception as e:
         traceback.print_exc()
-        return {"score": 0.0, "feedback": f"ERROR: {str(e)}", "sft_report": ""}
 @app.post("/ppo")
 async def ppo_inference(file: UploadFile = File(...)):
     try:
-        tensor = preprocess(await file.read())
-        generated_ids = ppo_model.generate_reports(tensor)
         report = tokenizer.decode(generated_ids[0], skip_special_tokens=True).strip()
-        # Strip any leading "Projection: X." prefix that leaked from training data
-        if report.lower().startswith("projection:"):
-            parts = report.split(".", 1)
-            report = parts[1].strip() if len(parts) > 1 else report
         print(f"[PPO] Generated: {report}")
-        return {"report": report}
     except Exception as e:
         traceback.print_exc()
-        return {"report": f"ERROR: {str(e)}"}
-# ─────────────────────────────────────────────────────────────────────────────
-# DIAGNOSTIC ENDPOINT — call GET /debug_keys to verify key names in your files
-# e.g.  curl http://localhost:8000/debug_keys
-# ─────────────────────────────────────────────────────────────────────────────
-@app.get("/debug_keys")
-def debug_keys():
-    import os
-    result = {}
-    for label, path in [("SFT", SFT_MODEL_PATH), ("Reward", REWARD_MODEL_PATH), ("PPO", PPO_MODEL_PATH)]:
-        if not os.path.exists(path):
-            result[label] = f"FILE NOT FOUND: {path}"
-            continue
-        try:
-            sd = torch.load(path, map_location="cpu")
-            keys = list(sd.keys())
-            result[label] = {"first_10_keys": keys[:10], "total_keys": len(keys)}
-        except Exception as e:
-            result[label] = f"ERROR: {e}"
-    return result
 # ─────────────────────────────────────────────────────────────────────────────
-# STATIC FILE SERVING - Mount React build directory AFTER all API routes
 # ─────────────────────────────────────────────────────────────────────────────
 from fastapi.staticfiles import StaticFiles
-import os
-# Check if build directory exists, create fallback if needed
 if os.path.exists("build"):
     app.mount("/", StaticFiles(directory="build", html=True), name="static")
     print("✅ React app mounted at /")

 import torch
 import torch.nn as nn
 import timm
 import traceback
 import os
 from PIL import Image
 from huggingface_hub import hf_hub_download
 # ─────────────────────────────────────────────────────────────────────────────
+# CONFIGURATION - Matching Colab Notebook Exactly
 # ─────────────────────────────────────────────────────────────────────────────
 CONFIG = {
     'coatnet_model': 'coatnet_1_rw_224',
     'img_emb_dim': 768,
     'train_last_stages': 2,
     'image_size': 224,
+    'max_length': 100,
+    'num_beams': 4,
 }
 # ─────────────────────────────────────────────────────────────────────────────
 print(f"🖥️  Using device: {device}")
 # ─────────────────────────────────────────────────────────────────────────────
+# LOAD TOKENIZER - Matching Colab
 # ─────────────────────────────────────────────────────────────────────────────
 print("\n" + "="*80)
+print("LOADING TOKENIZER")
 print("="*80)
 tokenizer = T5Tokenizer.from_pretrained(CONFIG['t5_model'])
 print(f"✓ Loaded tokenizer: {CONFIG['t5_model']}")
+# ─────────────────────────────────────────────────────────────────────────────
+# IMAGE TRANSFORM - Matching Colab Exactly
+# ─────────────────────────────────────────────────────────────────────────────
 transform = transforms.Compose([
     transforms.Resize((CONFIG['image_size'], CONFIG['image_size'])),
     transforms.ToTensor(),
 ])
 print(f"✓ Image transform defined (size: {CONFIG['image_size']}x{CONFIG['image_size']})")
 # ─────────────────────────────────────────────────────────────────────────────
+# ARCHITECTURE 1: CoAtNetEncoder - Exactly from Colab SECTION 6
 # ─────────────────────────────────────────────────────────────────────────────
 class CoAtNetEncoder(nn.Module):
+    def __init__(self, model_name="coatnet_1_rw_224", pretrained=True, train_last_stages=2):
         super().__init__()
+        self.encoder = timm.create_model(
+            model_name,
+            pretrained=pretrained,
+            num_classes=0,
+            global_pool="avg"
+        )
+        # Freeze all parameters
+        for p in self.encoder.parameters():
+            p.requires_grad = False
+        # Unfreeze last stages
+        if hasattr(self.encoder, "stages") and train_last_stages is not None:
+            stages = self.encoder.stages
+            for stage in stages[-train_last_stages:]:
+                for p in stage.parameters():
+                    p.requires_grad = True
     def forward(self, x):
+        return self.encoder(x)
 # ─────────────────────────────────────────────────────────────────────────────
+# ARCHITECTURE 2: VisionT5Model - Exactly from Colab SECTION 6
 # ─────────────────────────────────────────────────────────────────────────────
+class VisionT5Model(nn.Module):
     def __init__(self, img_encoder, txt_model_name="t5-small", img_emb_dim=768):
         super().__init__()
+        # Vision encoder (CoAtNet)
         self.img_encoder = img_encoder
+        # Text decoder (T5)
         self.t5 = T5ForConditionalGeneration.from_pretrained(txt_model_name)
+        # Projection layer to match image features with T5 d_model
         self.proj = nn.Linear(img_emb_dim, self.t5.config.d_model)
+        # Freeze shared T5 embeddings for faster and stable training
         for p in self.t5.shared.parameters():
             p.requires_grad = False
+    def forward(self, pixel_values, input_ids, attention_mask, labels=None):
+        # Extract image features
+        img_feats = self.img_encoder(pixel_values)
+        # Project image features to T5 embedding space
+        img_feats = self.proj(img_feats)
+        # Add sequence dimension
+        encoder_hidden_states = img_feats.unsqueeze(1)
+        # Run T5 encoder using image embeddings
+        encoder_outputs = self.t5.encoder(
+            inputs_embeds=encoder_hidden_states
+        )
+        # Run T5 decoder and compute loss
+        outputs = self.t5(
+            encoder_outputs=encoder_outputs,
+            attention_mask=torch.ones(
+                encoder_hidden_states.size()[:2], device=device
+            ),
+            input_ids=input_ids,
+            labels=labels,
+        )
+        return outputs
+    def generate_reports(self, pixel_values, max_length=100, num_beams=4):
+        """
+        Generate reports - EXACTLY matching Colab SECTION 6
+        """
+        # Extract and project image features
+        img_feats = self.img_encoder(pixel_values)
+        img_feats = self.proj(img_feats)
+        encoder_hidden_states = img_feats.unsqueeze(1)
+        # Encode image features
+        encoder_outputs = self.t5.encoder(
+            inputs_embeds=encoder_hidden_states
+        )
+        # Generate report using beam search - EXACT parameters from Colab
+        generated_ids = self.t5.generate(
+            encoder_outputs=encoder_outputs,
+            attention_mask=torch.ones(
+                encoder_hidden_states.size()[:2], device=device
+            ),
+            max_length=max_length,
+            num_beams=num_beams,
+            early_stopping=True
+        )
+        return generated_ids
+print("✓ Model architecture classes defined")
 # ─────────────────────────────────────────────────────────────────────────────
+# MODEL LOADING FUNCTION - Exactly from Colab SECTION 8
 # ─────────────────────────────────────────────────────────────────────────────
+def load_model_from_checkpoint(checkpoint_path: str, model_name: str, config: dict):
+    """
+    Load VisionT5Model from checkpoint - EXACT implementation from Colab
+    """
+    print(f"\nLoading {model_name} model...")
+    print(f"  Checkpoint: {checkpoint_path}")
+    try:
+        # Create image encoder
+        print(f"  Creating CoAtNet encoder: {config['coatnet_model']}")
+        img_encoder = CoAtNetEncoder(
+            model_name=config['coatnet_model'],
+            pretrained=False,  # Weights will come from checkpoint
+            train_last_stages=config['train_last_stages']
         )
+        # Create full model
+        print(f"  Creating VisionT5 model with T5: {config['t5_model']}")
+        model = VisionT5Model(
+            img_encoder=img_encoder,
+            txt_model_name=config['t5_model'],
+            img_emb_dim=config['img_emb_dim']
+        )
+        # Load checkpoint
+        print(f"  Loading checkpoint weights...")
+        checkpoint = torch.load(checkpoint_path, map_location=device)
+        # Handle different checkpoint formats
+        if isinstance(checkpoint, dict):
+            if 'model_state_dict' in checkpoint:
+                state_dict = checkpoint['model_state_dict']
+                print(f"  Found 'model_state_dict' in checkpoint")
+            elif 'state_dict' in checkpoint:
+                state_dict = checkpoint['state_dict']
+                print(f"  Found 'state_dict' in checkpoint")
+            elif 'model' in checkpoint:
+                state_dict = checkpoint['model']
+                print(f"  Found 'model' in checkpoint")
+            else:
+                # Assume checkpoint is the state dict
+                state_dict = checkpoint
+                print(f"  Using checkpoint as state_dict directly")
+            # Print additional checkpoint info if available
+            if 'epoch' in checkpoint:
+                print(f"  Checkpoint epoch: {checkpoint['epoch']}")
+            if 'loss' in checkpoint:
+                print(f"  Checkpoint loss: {checkpoint['loss']:.4f}")
+        else:
+            state_dict = checkpoint
+            print(f"  Checkpoint is a state_dict")
+        # Load state dict
+        missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
+        if missing_keys:
+            print(f"  ⚠️ Missing keys: {len(missing_keys)}")
+            if len(missing_keys) <= 5:
+                for key in missing_keys:
+                    print(f"    - {key}")
+        if unexpected_keys:
+            print(f"  ⚠️ Unexpected keys: {len(unexpected_keys)}")
+            if len(unexpected_keys) <= 5:
+                for key in unexpected_keys:
+                    print(f"    - {key}")
+        # Move to device and set to eval mode
+        model = model.to(device)
+        model.eval()
+        print(f"✓ {model_name} model loaded successfully!")
+        return model
+    except Exception as e:
+        print(f"❌ Error loading {model_name} model: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        raise
 # ─────────────────────────────────────────────────────────────────────────────
+# INFERENCE FUNCTION - Exactly from Colab SECTION 9
 # ─────────────────────────────────────────────────────────────────────────────
+def generate_report(
+    image_path: str,
+    model: VisionT5Model,
+    config: dict
+) -> str:
     """
+    Generate medical report from X-ray image - EXACT implementation from Colab
     """
+    try:
+        # Preprocess image
+        image = Image.open(image_path).convert('RGB')
+        pixel_values = transform(image).unsqueeze(0).to(device)
+        # Generate report - using EXACT parameters from Colab
+        with torch.no_grad():
+            generated_ids = model.generate_reports(
+                pixel_values,
+                max_length=config['max_length'],
+                num_beams=config['num_beams']
             )
+        # Decode
+        report = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+        return report.strip()
+    except Exception as e:
+        print(f"Error generating report for {image_path}: {str(e)}")
+        return ""
 # ─────────────────────────────────────────────────────────────────────────────
+# LOAD MODELS FROM HUGGINGFACE
 # ─────────────────────────────────────────────────────────────────────────────
+print("\n" + "="*80)
+print("LOADING MODELS FROM HUGGINGFACE")
+print("="*80)
+# Download model files from Hugging Face
+try:
+    SFT_MODEL_PATH = hf_hub_download(
+        repo_id="vinaykumarhs2020/RLHF_radiology_model",
+        filename="best_model.pt"
+    )
+    PPO_MODEL_PATH = hf_hub_download(
+        repo_id="vinaykumarhs2020/RLHF_radiology_model",
+        filename="rlhf_model.pt"
     )
+    print(f"✓ Downloaded SFT model: {SFT_MODEL_PATH}")
+    print(f"✓ Downloaded PPO model: {PPO_MODEL_PATH}")
+except Exception as e:
+    print(f"❌ Error downloading models: {e}")
+    # Fallback to local paths if downloads fail
+    SFT_MODEL_PATH = "/content/best_model.pt"
+    PPO_MODEL_PATH = "/content/rlhf_model.pt"
+    print(f"⚠️ Using local paths instead")
+# Load both models
+print("\n" + "="*80)
+print("LOADING MODELS")
+print("="*80)
+sft_model = load_model_from_checkpoint(
+    SFT_MODEL_PATH,
+    "SFT",
+    CONFIG
+)
+ppo_model = load_model_from_checkpoint(
+    PPO_MODEL_PATH,
+    "PPO",
+    CONFIG
+)
+print("\n✓ Both models loaded successfully!")
 # ─────────────────────────────────────────────────────────────────────────────
 # FASTAPI APP
 # ─────────────────────────────────────────────────────────────────────────────
+app = FastAPI(title="Medical Report Generation - Matching Colab")
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],
     allow_methods=["*"],
     allow_headers=["*"],
 )
+def preprocess_bytes(file_bytes: bytes) -> torch.Tensor:
+    """Preprocess image bytes for inference"""
+    img = Image.open(io.BytesIO(file_bytes)).convert("RGB")
+    return transform(img).unsqueeze(0).to(device)
 @app.get("/health")
 def health():
+    return {
+        "status": "ok",
+        "device": str(device),
+        "models_loaded": True,
+        "config": CONFIG
+    }
 @app.post("/sft")
 async def sft_inference(file: UploadFile = File(...)):
+    """
+    SFT model inference - EXACTLY matching Colab behavior
+    """
     try:
+        # Preprocess image
+        tensor = preprocess_bytes(await file.read())
+        # Generate report using EXACT Colab parameters
+        with torch.no_grad():
+            generated_ids = sft_model.generate_reports(
+                tensor,
+                max_length=CONFIG['max_length'],
+                num_beams=CONFIG['num_beams']
+            )
+        # Decode - EXACTLY as Colab does
         report = tokenizer.decode(generated_ids[0], skip_special_tokens=True).strip()
         print(f"[SFT] Generated: {report}")
+        # Return FULL report without truncation
+        return {"report": report, "model": "SFT", "config_used": CONFIG}
     except Exception as e:
         traceback.print_exc()
+        return {"report": f"ERROR: {str(e)}", "model": "SFT"}
 @app.post("/ppo")
 async def ppo_inference(file: UploadFile = File(...)):
+    """
+    PPO model inference - EXACTLY matching Colab behavior
+    """
     try:
+        # Preprocess image
+        tensor = preprocess_bytes(await file.read())
+        # Generate report using EXACT Colab parameters
+        with torch.no_grad():
+            generated_ids = ppo_model.generate_reports(
+                tensor,
+                max_length=CONFIG['max_length'],
+                num_beams=CONFIG['num_beams']
+            )
+        # Decode - EXACTLY as Colab does
         report = tokenizer.decode(generated_ids[0], skip_special_tokens=True).strip()
         print(f"[PPO] Generated: {report}")
+        # Return FULL report without truncation
+        return {"report": report, "model": "PPO", "config_used": CONFIG}
     except Exception as e:
         traceback.print_exc()
+        return {"report": f"ERROR: {str(e)}", "model": "PPO"}
+@app.post("/compare")
+async def compare_models(file: UploadFile = File(...)):
+    """
+    Generate reports from both models for comparison
+    """
+    try:
+        file_bytes = await file.read()
+        tensor = preprocess_bytes(file_bytes)
+        # SFT Generation
+        with torch.no_grad():
+            sft_ids = sft_model.generate_reports(
+                tensor,
+                max_length=CONFIG['max_length'],
+                num_beams=CONFIG['num_beams']
+            )
+        sft_report = tokenizer.decode(sft_ids[0], skip_special_tokens=True).strip()
+        # PPO Generation
+        with torch.no_grad():
+            ppo_ids = ppo_model.generate_reports(
+                tensor,
+                max_length=CONFIG['max_length'],
+                num_beams=CONFIG['num_beams']
+            )
+        ppo_report = tokenizer.decode(ppo_ids[0], skip_special_tokens=True).strip()
+        print(f"[COMPARE] SFT: {sft_report}")
+        print(f"[COMPARE] PPO: {ppo_report}")
+        return {
+            "sft_report": sft_report,
+            "ppo_report": ppo_report,
+            "config_used": CONFIG
+        }
+    except Exception as e:
+        traceback.print_exc()
+        return {
+            "sft_report": f"ERROR: {str(e)}",
+            "ppo_report": f"ERROR: {str(e)}"
+        }
+@app.get("/debug_config")
+def debug_config():
+    """Debug endpoint to check configuration"""
+    return {
+        "config": CONFIG,
+        "device": str(device),
+        "tokenizer": CONFIG['t5_model'],
+        "image_size": CONFIG['image_size'],
+        "max_length": CONFIG['max_length'],
+        "num_beams": CONFIG['num_beams'],
+        "models_loaded": {
+            "sft": sft_model is not None,
+            "ppo": ppo_model is not None
+        }
+    }
 # ─────────────────────────────────────────────────────────────────────────────
+# STATIC FILE SERVING
 # ─────────────────────────────────────────────────────────────────────────────
 from fastapi.staticfiles import StaticFiles
 if os.path.exists("build"):
     app.mount("/", StaticFiles(directory="build", html=True), name="static")
     print("✅ React app mounted at /")