Spaces:

m0ksh
/

PeptideAI

Sleeping

App Files Files Community

m0ksh commited on Mar 25

Commit

5eace46

verified ·

1 Parent(s): 8ec3495

Sync from GitHub (preserve manual model files)

Browse files

Files changed (9) hide show

StreamlitApp/StreamlitApp.py +3 -3
StreamlitApp/utils/analyze.py +1 -2
StreamlitApp/utils/optimize.py +0 -1
StreamlitApp/utils/predict.py +1 -2
StreamlitApp/utils/rate_limit.py +29 -0
StreamlitApp/utils/shared_ui.py +328 -0
StreamlitApp/utils/tsne.py +33 -0
StreamlitApp/utils/visualize.py +673 -32
requirements.txt +9 -9

StreamlitApp/StreamlitApp.py CHANGED Viewed

@@ -9,11 +9,11 @@ import plotly.express as px
 import html as _html
 from sklearn.manifold import TSNE
-# Page features are implemented in utils so this file stays orchestration-focused.
 from utils.predict import load_model, predict_amp, encode_sequence
 from utils.analyze import aa_composition, compute_properties
 from utils.optimize import optimize_sequence
-from utils.ui_helpers import (
     choose_top_candidate,
     format_conf_percent,
     mutation_heatmap_html,
@@ -24,7 +24,7 @@ from utils.ui_helpers import (
     build_analysis_insights,
     build_analysis_summary_text,
 )
-from utils.peptide_extras import (
     KNOWN_AMPS,
     MAX_3D_SEQUENCE_LENGTH,
     COMPACT_3D_LEGEND,

 import html as _html
 from sklearn.manifold import TSNE
+# Utils map to sidebar pages: predict / analyze / optimize / visualize / tsne, plus shared_ui.
 from utils.predict import load_model, predict_amp, encode_sequence
 from utils.analyze import aa_composition, compute_properties
 from utils.optimize import optimize_sequence
+from utils.shared_ui import (
     choose_top_candidate,
     format_conf_percent,
     mutation_heatmap_html,
     build_analysis_insights,
     build_analysis_summary_text,
 )
+from utils.visualize import (
     KNOWN_AMPS,
     MAX_3D_SEQUENCE_LENGTH,
     COMPACT_3D_LEGEND,

StreamlitApp/utils/analyze.py CHANGED Viewed

@@ -1,5 +1,4 @@
-# Sequence composition and physicochemical property helpers.
-# Mass and charge are textbook approximations for the UI, not for publishing numbers.
 from collections import Counter
 def aa_composition(sequence):

+# Analyze page: amino acid composition and simple physicochemical properties.
 from collections import Counter
 def aa_composition(sequence):

StreamlitApp/utils/optimize.py CHANGED Viewed

@@ -1,5 +1,4 @@
 # Heuristic mutation search used by the Optimize page.
-# It’s greedy and uses a few residue buckets — fun to play with, not a real design pipeline.
 import random
 from utils.predict import predict_amp

 # Heuristic mutation search used by the Optimize page.
 import random
 from utils.predict import predict_amp

StreamlitApp/utils/predict.py CHANGED Viewed

@@ -1,5 +1,4 @@
-# Model loading, sequence encoding, and AMP inference helpers.
-# Features are flattened one-hots (length × 20), not transformer embeddings — keeps the app small and CPU-friendly.
 import pathlib
 import numpy as np
 import torch

+# Predict page (and shared): load AMP model, one-hot encode, run predict_amp.
 import pathlib
 import numpy as np
 import torch

StreamlitApp/utils/rate_limit.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# Optional rate limiter (not wired to a sidebar page yet).
+import time
+from collections import deque
+class RateLimiter:
+    # Each instance tracks call timestamps for one caller/key.
+    def __init__(self, max_calls: int, period_seconds: float):
+        self.max_calls = max_calls
+        self.period = period_seconds
+        self.calls = deque()
+    def allow(self) -> bool:
+        now = time.time()
+        # Drop timestamps outside the active window.
+        while self.calls and self.calls[0] <= now - self.period:
+            self.calls.popleft()
+        if len(self.calls) < self.max_calls:
+            self.calls.append(now)
+            return True
+        return False
+    def time_until_next(self) -> float:
+        # Return wait time before another call is allowed (seconds).
+        now = time.time()
+        if len(self.calls) < self.max_calls:
+            return 0.0
+        oldest = self.calls[0]
+        return max(0.0, (oldest + self.period) - now)

StreamlitApp/utils/shared_ui.py ADDED Viewed

	@@ -0,0 +1,328 @@

+# Shared UI: formatting, tables, analysis bullets, exports: used on several sidebar pages
+import html as _html
+from typing import Dict, List, Tuple, Optional
+from utils.analyze import compute_properties
+def predicted_confidence(row: Dict) -> Optional[float]:
+    # Convert AMP probability into confidence of the predicted class.
+    if not row:
+        return None
+    pred = row.get("Prediction")
+    p_amp = row.get("Confidence")
+    if p_amp is None:
+        return None
+    try:
+        p_amp = float(p_amp)
+    except (TypeError, ValueError):
+        return None
+    if pred == "AMP":
+        return p_amp
+    # Convert AMP probability into confidence for the predicted class.
+    return 1.0 - p_amp
+def format_conf_percent(conf_prob: float, digits: int = 1) -> str:
+    return f"{round(conf_prob * 100, digits)}%"
+def heuristic_reason_for_profile(charge: float, hydro_fraction: float) -> str:
+    if charge > 2:
+        return "High positive charge supports membrane disruption"
+    if 0.3 <= hydro_fraction <= 0.6:
+        return "Balanced hydrophobicity"
+    return "Favorable predicted profile"
+def choose_top_candidate(predictions: List[Dict]) -> Optional[Dict]:
+    # Select best candidate row and attach a short profile-based reason.
+    if not predictions:
+        return None
+    # Prefer AMP rows first, then fall back to highest-confidence overall row.
+    amp_rows = [r for r in predictions if r.get("Prediction") == "AMP"]
+    rows = amp_rows if amp_rows else predictions
+    best_row = None
+    best_conf = -1.0
+    for r in rows:
+        c = predicted_confidence(r)
+        if c is None:
+            continue
+        if c > best_conf:
+            best_conf = c
+            best_row = r
+    if best_row is None:
+        return None
+    seq = best_row.get("Sequence", "")
+    if not seq:
+        return None
+    props = compute_properties(seq)
+    charge = props.get("Net Charge (approx.)", 0)
+    hydro = props.get("Hydrophobic Fraction", 0)
+    return {
+        "Sequence": seq,
+        "Prediction": best_row.get("Prediction"),
+        "predicted_confidence": best_conf,
+        "Reason": heuristic_reason_for_profile(charge, hydro),
+        "Charge": charge,
+        "Hydrophobic Fraction": hydro,
+    }
+def mutation_heatmap_html(original: str, final: str) -> str:
+    # Highlight per-position residue changes between original and final sequences.
+    orig = original or ""
+    fin = final or ""
+    max_len = max(len(orig), len(fin))
+    # Use monospace layout so per-position residue changes align visually.
+    out: List[str] = [
+        "<div style='font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, \"Liberation Mono\", monospace; white-space: pre-wrap;'>"
+    ]
+    for i in range(max_len):
+        o = orig[i] if i < len(orig) else ""
+        f = fin[i] if i < len(fin) else ""
+        residue = f if f else o
+        changed = (o != f)
+        residue_escaped = _html.escape(residue)
+        if changed and residue:
+            out.append(f"<span style='color:#d62728; font-weight:700;'>{residue_escaped}</span>")
+        else:
+            out.append(residue_escaped if residue else "&nbsp;")
+    out.append("</div>")
+    return "".join(out)
+def mutation_diff_table(original: str, final: str) -> List[Dict]:
+    orig = original or ""
+    fin = final or ""
+    max_len = max(len(orig), len(fin))
+    rows: List[Dict] = []
+    for i in range(max_len):
+        o = orig[i] if i < len(orig) else ""
+        f = fin[i] if i < len(fin) else ""
+        rows.append(
+            {
+                "Position": i + 1,
+                "Original": o,
+                "Final": f,
+                "Changed": "Yes" if o != f else "No",
+            }
+        )
+    return rows
+def _ideal_distance_to_interval(value: float, low: float, high: float) -> float:
+    if low <= value <= high:
+        return 0.0
+    if value < low:
+        return low - value
+    return value - high
+def optimization_summary(orig_seq: str, orig_conf: float, final_seq: str, final_conf: float) -> Dict:
+    # Compute confidence and property deltas for the Optimize summary panel.
+    orig_seq = orig_seq or ""
+    final_seq = final_seq or ""
+    # Property deltas drive the compact "what changed" summary panel.
+    props_orig = compute_properties(orig_seq) if orig_seq else {}
+    props_final = compute_properties(final_seq) if final_seq else {}
+    charge_orig = props_orig.get("Net Charge (approx.)", props_orig.get("Net charge", 0))
+    charge_final = props_final.get("Net Charge (approx.)", props_final.get("Net charge", 0))
+    hydro_orig = props_orig.get("Hydrophobic Fraction", 0)
+    hydro_final = props_final.get("Hydrophobic Fraction", 0)
+    delta_conf_pct = (float(final_conf) - float(orig_conf)) * 100.0
+    if charge_final > charge_orig:
+        charge_change = "Increased"
+    elif charge_final < charge_orig:
+        charge_change = "Decreased"
+    else:
+        charge_change = "Same"
+    ideal_low, ideal_high = 0.4, 0.5
+    dist_orig = _ideal_distance_to_interval(float(hydro_orig), ideal_low, ideal_high)
+    dist_final = _ideal_distance_to_interval(float(hydro_final), ideal_low, ideal_high)
+    if dist_final < dist_orig:
+        hydro_change = "Improved balance"
+    elif dist_final > dist_orig:
+        hydro_change = "Less optimal"
+    else:
+        hydro_change = "Same"
+    return {
+        "delta_conf_pct": delta_conf_pct,
+        "charge_orig": charge_orig,
+        "charge_final": charge_final,
+        "charge_change": charge_change,
+        "hydro_orig": hydro_orig,
+        "hydro_final": hydro_final,
+        "hydro_change": hydro_change,
+    }
+def sequence_length_warning(seq: str) -> Optional[str]:
+    if not seq:
+        return None
+    n = len(seq)
+    if n < 8:
+        return "Too short for typical AMP"
+    if n > 50:
+        return "Unusually long sequence"
+    return None
+def sequence_health_label(conf_prob: float, charge: float, hydro_fraction: float) -> Tuple[str, str]:
+    # Return a short quality label plus color for Analyze page status display.
+    # Very high model confidence is treated as strong even outside ideal property ranges.
+    if conf_prob >= 0.9:
+        return "Strong AMP candidate", "#2ca02c"
+    if conf_prob > 0.75 and charge >= 2 and 0.3 <= hydro_fraction <= 0.6:
+        return "Strong AMP candidate", "#2ca02c"
+    if conf_prob > 0.5:
+        return "Moderate potential", "#ff9800"
+    return "Unlikely AMP", "#d62728"
+# Plain-language bullets for Analyze — rules of thumb, not a second model.
+def build_analysis_insights(
+    label: str,
+    conf: float,
+    comp: Dict[str, float],
+    length: int,
+    hydro: float,
+    charge: float,
+) -> List[str]:
+    # Short, mechanism-oriented bullets for the Analyze page (heuristics, not lab truth).
+    lines: List[str] = []
+    p_amp = float(conf)
+    conf_pct = round(p_amp * 100, 1)
+    pred_conf = conf_pct if label == "AMP" else round((1 - p_amp) * 100, 1)
+    if label == "AMP":
+        if pred_conf >= 80:
+            lines.append(
+                f"Model: **AMP** with high confidence ({pred_conf}% on this prediction)—profile below explains typical mechanisms."
+            )
+        elif pred_conf >= 60:
+            lines.append(
+                f"Model: **AMP** with moderate confidence ({pred_conf}%); cross-check chemistry bullets before treating it as a strong hit."
+            )
+        else:
+            lines.append(
+                f"Model: **AMP** but low confidence ({pred_conf}%); the mechanistic notes below matter more than the label alone."
+            )
+    else:
+        if pred_conf >= 80:
+            lines.append(
+                f"Model: **Non-AMP** with high confidence ({pred_conf}% on this prediction)—below are common reasons a sequence may not behave like a classic AMP."
+            )
+        elif pred_conf >= 60:
+            lines.append(
+                f"Model: **Non-AMP** with moderate confidence ({pred_conf}%); reasons below are typical but not exhaustive."
+            )
+        else:
+            lines.append(
+                f"Model: **Non-AMP** with low confidence ({pred_conf}%); treat the label as tentative and read the property-based notes."
+            )
+    polar_frac = sum(float(comp.get(aa, 0.0)) for aa in "STNQYC")
+    basic_frac = sum(float(comp.get(aa, 0.0)) for aa in "KRH")
+    explain_weak = (label == "Non-AMP") or (label == "AMP" and pred_conf < 65)
+    if explain_weak:
+        if charge <= 0:
+            lines.append(
+                "Weak or absent **positive net charge**: many AMPs rely on cationic residues to bind **anionic bacterial surfaces** (e.g. LPS, teichoic acids); near-neutral or negative peptides often lack that first electrostatic hook."
+            )
+        if hydro < 0.28:
+            lines.append(
+                "Low **hydrophobic** content: membrane insertion, pore formation, or lipid disruption is harder without a hydrophobic face or core to partition into the bilayer."
+            )
+        if hydro > 0.65:
+            lines.append(
+                "Very high **hydrophobic** content: risk of aggregation or poor **aqueous solubility** before the peptide can reach bacteria—delivery and effective concentration suffer."
+            )
+        if polar_frac < 0.12:
+            lines.append(
+                "Few **polar / H-bonding** residues (S, T, N, Q, Y, C): weaker interfacial interactions with lipids and water at the membrane—many AMP mechanisms benefit from polar positioning at the interface."
+            )
+        if basic_frac < 0.06 and charge < 2:
+            lines.append(
+                "Sparse **basic** residues (K, R, H): a hallmark of many AMPs is concentrated positive charge for initial **bacterial association**; this sequence is thin on that axis."
+            )
+        if length < 8:
+            lines.append(
+                "Very **short** length: may be too small to form a stable membrane-active structure or to span a bilayer meaningfully."
+            )
+        elif length > 50:
+            lines.append(
+                "Unusually **long** chain: folding, proteolysis, and synthesis cost can diverge from small cationic AMP archetypes."
+            )
+        if label == "Non-AMP" and charge >= 2 and 0.28 <= hydro <= 0.58:
+            lines.append(
+                "**Note:** Charge and hydrophobic balance still look somewhat AMP-like—the model says Non-AMP, so treat this as a **disagreement** worth validating experimentally, not proof either way."
+            )
+    if label == "AMP" and pred_conf >= 65:
+        if charge >= 2 and 0.28 <= hydro <= 0.58:
+            lines.append(
+                "**Positive charge** plus **moderate hydrophobic fraction** aligns with membrane-targeting motifs common in AMP literature."
+            )
+        if polar_frac >= 0.12:
+            lines.append(
+                "Adequate **polar** residues can help **interfacial** placement and H-bonding at the membrane."
+            )
+    if (comp.get("K", 0) + comp.get("R", 0) + comp.get("H", 0)) >= 0.18:
+        lines.append(
+            "Higher **basic** residue fraction supports **electrostatic** attraction to anionic bacterial components."
+        )
+    if (comp.get("C", 0) + comp.get("W", 0)) >= 0.08:
+        lines.append(
+            "**Cysteine / tryptophan** can contribute to membrane insertion, stacking, or oxidative chemistry depending on context."
+        )
+    # De-duplicate while preserving order.
+    out: List[str] = []
+    seen = set()
+    for line in lines:
+        if line not in seen:
+            seen.add(line)
+            out.append(line)
+    return out[:14]
+def build_analysis_summary_text(
+    sequence: str,
+    prediction: str,
+    confidence_display: str,
+    props: Dict,
+    analysis_lines: List[str],
+) -> str:
+    length = props.get("Length", len(sequence))
+    charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
+    hydro = props.get("Hydrophobic Fraction", props.get("Hydrophobic", 0))
+    analysis_block = "\n".join(f"- {line}" for line in (analysis_lines or []))
+    return (
+        f"Sequence: {sequence}\n"
+        f"Prediction: {prediction}\n"
+        f"Confidence: {confidence_display}\n"
+        f"Length: {length}\n"
+        f"Net Charge (approx.): {charge}\n"
+        f"Hydrophobic Fraction: {hydro}\n\n"
+        f"Summary:\n{analysis_block}\n"
+    )

StreamlitApp/utils/tsne.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# t-SNE page: optional helper embedding + scatter (StreamlitApp also runs t-SNE inline with Plotly).
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.manifold import TSNE
+import streamlit as st
+import torch
+import numpy as np
+from utils.predict import encode_sequence
+def tsne_visualization(sequences, model):
+    # Project model embeddings into 2D and render a quick scatter plot.
+    st.info("Generating embeddings... this may take a moment.")
+    embeddings = []
+    for seq in sequences:
+        x = torch.tensor(encode_sequence(seq), dtype=torch.float32).unsqueeze(0)
+        with torch.no_grad():
+            # Use an early hidden layer as a compact learned representation.
+            emb = model.layers[0](x)
+        embeddings.append(emb.numpy().flatten())
+    embeddings = np.vstack(embeddings)
+    perplexity = min(30, len(sequences) - 1)
+    if perplexity < 2:
+        st.warning("Need at least 2 sequences for visualization.")
+        return
+    tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity)
+    reduced = tsne.fit_transform(embeddings)
+    df = pd.DataFrame(reduced, columns=["x", "y"])
+    st.success("t-SNE visualization complete.")
+    st.scatter_chart(df)

StreamlitApp/utils/visualize.py CHANGED Viewed

@@ -1,33 +1,674 @@
-# t-SNE helper — uses the first linear layer as a quick embedding; main app duplicates this with Plotly inline.
-import pandas as pd
-import matplotlib.pyplot as plt
-from sklearn.manifold import TSNE
-import streamlit as st
-import torch
 import numpy as np
-from utils.predict import encode_sequence
-def tsne_visualization(sequences, model):
-    # Project model embeddings into 2D and render a quick scatter plot.
-    st.info("Generating embeddings... this may take a moment.")
-    embeddings = []
-    for seq in sequences:
-        x = torch.tensor(encode_sequence(seq), dtype=torch.float32).unsqueeze(0)
-        with torch.no_grad():
-            # Use an early hidden layer as a compact learned representation.
-            emb = model.layers[0](x)
-        embeddings.append(emb.numpy().flatten())
-    embeddings = np.vstack(embeddings)
-    perplexity = min(30, len(sequences) - 1)
-    if perplexity < 2:
-        st.warning("Need at least 2 sequences for visualization.")
-        return
-    tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity)
-    reduced = tsne.fit_transform(embeddings)
-    df = pd.DataFrame(reduced, columns=["x", "y"])
-    st.success("t-SNE visualization complete.")
-    st.scatter_chart(df)

+# Visualize page: 3D (py3Dmol / Plotly), helical wheel, known-AMP similarity, map HTML, shape blurbs.
+from __future__ import annotations
+import csv
+import math
+import pathlib
+from typing import Any, List, Optional, Tuple
 import numpy as np
+# Fallback if `Data/ampData.csv` is missing (e.g. local dev without Data/).
+_FALLBACK_KNOWN_AMPS: Tuple[str, ...] = (
+    "KWKLFKKIGAVLKVL",
+    "GIGKFLHSAKKFGKAFVGEIMNS",
+    "LLGDFFRKSKEKIGKEFKRIVQRIKDFLRNLV",
+    "KLFKKILKYL",
+    "FLPLLAGLAANFLPKIFCKITRKC",
+)
+def _amp_data_csv_path() -> pathlib.Path:
+    # StreamlitApp/utils/visualize.py -> repo root is parents[2]
+    return pathlib.Path(__file__).resolve().parents[2] / "Data" / "ampData.csv"
+def _load_known_amps_from_csv() -> List[str]:
+    # Load unique AMP-labeled sequences from CSV and normalize to uppercase.
+    path = _amp_data_csv_path()
+    if not path.exists():
+        return list(_FALLBACK_KNOWN_AMPS)
+    seen: set[str] = set()
+    amps: List[str] = []
+    try:
+        with path.open(newline="", encoding="utf-8") as f:
+            reader = csv.DictReader(f)
+            if not reader.fieldnames or "sequence" not in reader.fieldnames:
+                return list(_FALLBACK_KNOWN_AMPS)
+            for row in reader:
+                label = str(row.get("label", "")).strip()
+                if label != "1":
+                    continue
+                raw = (row.get("sequence") or "").strip()
+                if not raw:
+                    continue
+                seq = raw.upper()
+                if seq in seen:
+                    continue
+                seen.add(seq)
+                amps.append(seq)
+    except Exception:
+        return list(_FALLBACK_KNOWN_AMPS)
+    return amps if amps else list(_FALLBACK_KNOWN_AMPS)
+# Known AMP pool for similarity search (from ampData.csv label=1, or fallback list).
+KNOWN_AMPS: List[str] = _load_known_amps_from_csv()
+# py3Dmol viewer: skip very long sequences (labels + sticks scale with length).
+MAX_3D_SEQUENCE_LENGTH: int = 60
+STRUCTURE_3D_LEGEND_MARKDOWN: str = """
+**Color legend**
+- **Blue:** Positively charged residues (K, R, H)
+- **Red:** Negatively charged residues (D, E)
+- **Green:** Hydrophobic residues (A, V, I, L, M, F, W, Y)
+- **Gray:** Other / polar or unclassified residues
+"""
+STRUCTURE_3D_INTERPRETATION_MARKDOWN: str = """
+**Structural interpretation (approximation only)**
+This is a **simplified helical CA trace** used to visualize how residue chemistry is arranged in 3D space — **not** an experimentally determined fold.
+- **Clusters of green** often correspond to membrane-facing / hydrophobic patches.
+- **Blue regions** highlight cationic residues that can promote binding to anionic bacterial surfaces.
+- **Spatial separation** between hydrophobic and charged segments can suggest **amphipathic** character, common among many AMPs.
+Together, these cues help discuss whether a sequence has motifs frequently associated with antimicrobial peptides — **wet-lab validation is still required**.
+"""
+# One-letter -> three-letter (for minimal PDB lines for py3Dmol).
+_ONE_TO_THREE = {
+    "A": "ALA",
+    "R": "ARG",
+    "N": "ASN",
+    "D": "ASP",
+    "C": "CYS",
+    "Q": "GLN",
+    "E": "GLU",
+    "G": "GLY",
+    "H": "HIS",
+    "I": "ILE",
+    "L": "LEU",
+    "K": "LYS",
+    "M": "MET",
+    "F": "PHE",
+    "P": "PRO",
+    "S": "SER",
+    "T": "THR",
+    "W": "TRP",
+    "Y": "TYR",
+    "V": "VAL",
+}
+def sequence_similarity(seq1: str, seq2: str) -> float:
+    # Compute simple position-wise match score normalized by the longer sequence.
+    if not seq1 or not seq2:
+        return 0.0
+    matches = sum(1 for a, b in zip(seq1, seq2) if a == b)
+    return matches / max(len(seq1), len(seq2))
+def find_most_similar(sequence: str) -> Tuple[Optional[str], float]:
+    # Return the closest known AMP and its simple position-match similarity score.
+    if not sequence or not KNOWN_AMPS:
+        return None, 0.0
+    seq = "".join(c for c in sequence.upper() if not c.isspace())
+    if not seq:
+        return None, 0.0
+    best_seq = KNOWN_AMPS[0]
+    best_score = sequence_similarity(seq, KNOWN_AMPS[0])
+    for amp in KNOWN_AMPS[1:]:
+        score = sequence_similarity(seq, amp)
+        if score > best_score:
+            best_score = score
+            best_seq = amp
+    return best_seq, best_score
+def get_residue_color(aa: str) -> str:
+    # Map one-letter residue codes to py3Dmol color categories.
+    ch = aa.upper() if aa else ""
+    positive = ["K", "R", "H"]
+    negative = ["D", "E"]
+    hydrophobic = ["A", "V", "I", "L", "M", "F", "W", "Y"]
+    if ch in positive:
+        return "blue"
+    if ch in negative:
+        return "red"
+    if ch in hydrophobic:
+        return "green"
+    return "gray"
+def residue_color_mpl(aa: str) -> str:
+    # Return high-contrast Matplotlib colors that mirror the 3D residue categories.
+    cat = get_residue_color(aa)
+    return {
+        "blue": "#1D4ED8",
+        "red": "#DC2626",
+        "green": "#16A34A",
+        "gray": "#57534E",
+    }.get(cat, "#57534E")
+HELIX_WHEEL_LEGEND_MARKDOWN: str = """
+**Helical wheel readout**
+- **Blue wedge:** cationic (K, R, H) — often important for initial membrane association.
+- **Red wedge:** anionic (D, E).
+- **Green wedge:** hydrophobic — often grouped on one face in amphipathic helices (membrane-facing).
+- **Gray:** polar / other — may participate in solubility or hydrogen bonding.
+Residues are placed using a **100° step** per position (common α-helical wheel convention). This is a **2D projection**, not a solved 3D structure.
+"""
+# Short blurbs for compact UI expanders (Visualize Peptide page)
+COMPACT_3D_LEGEND: str = """
+**How to read this 3D view**
+- **Plotly:** thick gray **backbone line** + colored residue markers (interactive rotation).
+- **3Dmol:** gray **cylinder backbone** between Cα positions + colored spheres (same chemistry colors).
+- **Blue:** positively charged residues (K, R, H)
+- **Red:** negatively charged residues (D, E)
+- **Green:** hydrophobic residues (A, V, I, L, M, F, W, Y)
+- **Gray:** other / polar residues
+- Geometry is a **helix-like approximation**, not an experimental structure.
+"""
+COMPACT_WHEEL_LEGEND: str = """
+**How to read this helical wheel**
+- **Radial spokes:** residue positions around the helix (100 degrees per residue)
+- **Black connectors:** sequence order (`i -> i+1`) across the wheel
+- **Colored circles:** residue chemistry classes
+- Color mapping matches the 3D view (**blue / red / green / gray**)
+"""
+COMPACT_MAP_LEGEND: str = """
+**How to read this sequence map**
+- Uses the same residue color mapping as 3D and helical wheel
+- Highlights where charged vs hydrophobic residues cluster along the sequence
+- Useful for quick amphipathic pattern checks
+"""
+def plot_helical_wheel(sequence: str, figsize: Tuple[float, float] = (6.2, 6.2)) -> Any:
+    # Build a detailed helical wheel with spokes, sequence connectors, and color-coded residues.
+    import matplotlib.pyplot as plt
+    from matplotlib import patheffects as pe
+    # Normalize user input to whitespace-free uppercase sequence.
+    clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
+    n = len(clean)
+    fig, ax = plt.subplots(figsize=figsize, subplot_kw={"projection": "polar"})
+    fig.patch.set_facecolor("white")
+    if n == 0:
+        ax.set_facecolor("#ffffff")
+        ax.set_title("Helical wheel (empty sequence)", pad=12)
+        return fig
+    ax.set_facecolor("#ffffff")
+    angles_deg = np.array([i * 100.0 for i in range(n)], dtype=float) % 360.0
+    angles_rad = np.deg2rad(angles_deg)
+    r_inner, r_ring = 0.06, 0.88
+    fs = max(7, min(11, int(220 / max(n, 1))))
+    pt_size = float(np.clip(8000.0 / max(n, 1), 130.0, 420.0))
+    ax.set_theta_zero_location("N")
+    ax.set_theta_direction(-1)
+    # Radial spokes (residue positions)
+    for i in range(n):
+        th = angles_rad[i]
+        ax.plot(
+            [th, th],
+            [r_inner, r_ring],
+            color="#1a1a1a",
+            linewidth=0.65,
+            alpha=0.45,
+            zorder=1,
+        )
+    # Sequence-order connections (straight chords in the plane — classic wheel “star”)
+    for i in range(n - 1):
+        ax.plot(
+            [angles_rad[i], angles_rad[i + 1]],
+            [r_ring, r_ring],
+            color="#0a0a0a",
+            linewidth=1.05,
+            solid_capstyle="round",
+            zorder=2,
+        )
+    # Draw residue nodes after spokes/connectors so labels stay readable.
+    colors = [residue_color_mpl(aa) for aa in clean]
+    ax.scatter(
+        angles_rad,
+        np.full(n, r_ring),
+        s=pt_size,
+        c=colors,
+        edgecolors="#111111",
+        linewidths=1.2,
+        zorder=4,
+    )
+    for i, aa in enumerate(clean):
+        t = ax.text(
+            angles_rad[i],
+            r_ring,
+            aa,
+            ha="center",
+            va="center",
+            fontsize=fs,
+            color="#0a0a0a",
+            fontweight="bold",
+            zorder=5,
+        )
+        t.set_path_effects([pe.withStroke(linewidth=2.2, foreground="white")])
+    ax.set_ylim(0, 1.0)
+    ax.set_yticklabels([])
+    ax.set_xticklabels([])
+    ax.grid(False)
+    ax.set_title(
+        "Helical wheel (α-helix, 100°/residue) — spokes + sequence connectors",
+        pad=14,
+        fontsize=11,
+        color="#111111",
+    )
+    return fig
+def get_residue_style(aa: str) -> str:
+    # Return inline CSS style for sequence-map residue coloring.
+    positive = ["K", "R", "H"]
+    negative = ["D", "E"]
+    hydrophobic = ["A", "V", "I", "L", "M", "F", "W", "Y"]
+    if aa in positive:
+        return "background-color: #1D4ED8; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
+    if aa in negative:
+        return "background-color: #DC2626; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
+    if aa in hydrophobic:
+        return "background-color: #16A34A; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
+    return "background-color: #57534E; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
+def build_importance_map_html(sequence: str) -> str:
+    # Build safe HTML spans for residue-by-residue chemical highlighting.
+    import html as html_mod
+    # Emit one colored <span> per residue for inline sequence highlighting.
+    parts: List[str] = []
+    for ch in sequence:
+        if ch.isspace():
+            continue
+        aa = ch.upper()
+        style = get_residue_style(aa)
+        parts.append(f'<span style="{style}">{html_mod.escape(aa)}</span>')
+    return "".join(parts)
+def helix_coordinates(sequence: str, *, smooth: bool = False) -> np.ndarray:
+    # Shared CA trace used by PDB, Plotly, and py3Dmol (same geometry as the helical wheel).
+    clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
+    n = len(clean)
+    if n == 0:
+        return np.zeros((0, 3), dtype=float)
+    theta_step = 100.0 * math.pi / 180.0  # ~α-helix angular step on the wheel
+    rise = 1.45
+    coords: List[Tuple[float, float, float]] = []
+    for i in range(n):
+        angle = i * theta_step
+        r = 5.0 + 0.12 * math.sin(i * 0.4)
+        x = math.cos(angle) * r
+        y = math.sin(angle) * r
+        z = i * rise
+        coords.append((x, y, z))
+    if smooth and n >= 3:
+        xs = np.array([c[0] for c in coords], dtype=float)
+        ys = np.array([c[1] for c in coords], dtype=float)
+        zs = np.array([c[2] for c in coords], dtype=float)
+        k = np.array([0.2, 0.6, 0.2])
+        for _ in range(2):
+            xs = np.convolve(xs, k, mode="same")
+            ys = np.convolve(ys, k, mode="same")
+            zs = np.convolve(zs, k, mode="same")
+        xs[0], xs[-1] = coords[0][0], coords[-1][0]
+        ys[0], ys[-1] = coords[0][1], coords[-1][1]
+        zs[0], zs[-1] = coords[0][2], coords[-1][2]
+        coords = list(zip(xs.tolist(), ys.tolist(), zs.tolist()))
+    return np.array(coords, dtype=float)
+def generate_helix_pdb(sequence: str, smooth: bool = False) -> str:
+    # Minimal CA-only helix-like PDB for py3Dmol (coordinates only; bonds drawn via cylinders).
+    pdb_lines: List[str] = []
+    atom_index = 1
+    clean = "".join(c for c in sequence.upper() if not c.isspace())
+    n = len(clean)
+    if n == 0:
+        return ""
+    coords = helix_coordinates(clean, smooth=smooth)
+    for i, aa in enumerate(clean):
+        res_name = _ONE_TO_THREE.get(aa, "UNK")
+        x, y, z = float(coords[i, 0]), float(coords[i, 1]), float(coords[i, 2])
+        res_num = i + 1
+        pdb_lines.append(
+            f"ATOM  {atom_index:5d}  CA  {res_name:3s} A{res_num:4d}    "
+            f"{x:8.3f}{y:8.3f}{z:8.3f}  1.00  0.00           C"
+        )
+        atom_index += 1
+    return "\n".join(pdb_lines)
+def residue_shape_label(aa: str) -> str:
+    # Short chemistry label for hovers and shape summary text.
+    cat = get_residue_color(aa)
+    return {
+        "blue": "cationic",
+        "red": "anionic",
+        "green": "hydrophobic",
+        "gray": "polar / other",
+    }.get(cat, "polar / other")
+def _helical_wheel_resultant(indices: List[int]) -> float:
+    # Circular mean length in [0, 1]: high values mean residues cluster on one face of the wheel.
+    if len(indices) < 2:
+        return 0.0
+    angles = [math.radians((i * 100.0) % 360.0) for i in indices]
+    vx = sum(math.cos(a) for a in angles) / len(angles)
+    vy = sum(math.sin(a) for a in angles) / len(angles)
+    return float(math.hypot(vx, vy))
+def build_shape_visual_summary(
+    sequence: str,
+    *,
+    amp_label: Optional[str] = None,
+    amp_prob: Optional[float] = None,
+) -> List[str]:
+    # Short bullets tying the helix/wheel geometry to AMP-relevant “shape chemistry” (heuristic).
+    clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
+    n = len(clean)
+    lines: List[str] = []
+    if n == 0:
+        return lines
+    lines.append(
+        "This view places residues on a **helix-like CA trace** (same geometry as the wheel). "
+        "It shows **how** charged, hydrophobic, and polar positions sit in space—not a solved NMR/crystal structure."
+    )
+    pos_i = [i for i, aa in enumerate(clean) if get_residue_color(aa) == "blue"]
+    neg_i = [i for i, aa in enumerate(clean) if get_residue_color(aa) == "red"]
+    hyd_i = [i for i, aa in enumerate(clean) if get_residue_color(aa) == "green"]
+    pol_i = [i for i, aa in enumerate(clean) if get_residue_color(aa) == "gray"]
+    f_h = len(hyd_i) / n
+    f_p = len(pol_i) / n
+    f_pos = len(pos_i) / n
+    R_h = _helical_wheel_resultant(hyd_i)
+    R_k = _helical_wheel_resultant(pos_i)
+    if f_h >= 0.18 and f_p >= 0.12:
+        lines.append(
+            "You can point to **both** a **hydrophobic** (green) and **polar / other** (gray) presence along the trace—"
+            "a common ingredient for **interface** behavior (aqueous vs lipid-facing), which many AMP mechanisms exploit."
+        )
+    elif f_h >= 0.25 and f_p < 0.1:
+        lines.append(
+            "The trace is **dominated by hydrophobic** (green) positions; without much polar (gray) or cationic (blue) balance, "
+            "membrane engagement can be less like classic cationic AMP helices (still sequence-context dependent)."
+        )
+    elif f_p >= 0.35 and f_h < 0.15:
+        lines.append(
+            "The trace is **rich in polar / other** (gray) and light on hydrophobic (green) packing—often more soluble, "
+            "but less like a compact amphipathic helix unless charge or hydrophobic content appears elsewhere."
+        )
+    if len(hyd_i) >= 3 and R_h >= 0.52:
+        lines.append(
+            "**Hydrophobic residues cluster on one side** of the helical wheel (tight arc)—consistent with an **amphipathic** "
+            "helix face that could sit at the **membrane interface**."
+        )
+    elif len(hyd_i) >= 2 and R_h < 0.35:
+        lines.append(
+            "**Hydrophobic** (green) positions are **spread** around the wheel—less of a single membrane-facing stripe; "
+            "some AMPs still look like this, but classic amphipathic faces are easier to see when green groups on one arc."
+        )
+    if len(pos_i) >= 2 and R_k >= 0.5:
+        lines.append(
+            "**Cationic** (blue) residues group in angular space—helpful for a **localized positive patch** toward anionic lipids, "
+            "a pattern often discussed for membrane-targeting peptides."
+        )
+    if amp_label is not None and amp_prob is not None:
+        p = float(amp_prob)
+        pred_conf = round(p * 100, 1) if amp_label == "AMP" else round((1.0 - p) * 100, 1)
+        if amp_label == "AMP" and pred_conf >= 65:
+            lines.append(
+                f"**Model:** AMP at **{pred_conf}%** confidence on this sequence—combined with the spatial pattern above, "
+                "use the plot to argue **where** positive charge and hydrophobic bulk sit relative to each other."
+            )
+        elif amp_label == "Non-AMP" and pred_conf >= 65:
+            lines.append(
+                f"**Model:** Non-AMP at **{pred_conf}%** confidence—if the trace still **looks** amphipathic, treat that as "
+                "**chemistry vs. classifier** tension worth testing in the lab, not proof of activity."
+            )
+        else:
+            lines.append(
+                f"**Model:** **{amp_label}** (about **{pred_conf}%** on that call)—read the **shape** bullets as physical intuition; "
+                "they do not override the model or experiments."
+            )
+    # De-duplicate, cap length.
+    out: List[str] = []
+    seen: set[str] = set()
+    for line in lines:
+        if line not in seen:
+            seen.add(line)
+            out.append(line)
+    return out[:12]
+def render_3d_plotly(
+    sequence: str,
+    *,
+    height: int = 460,
+) -> bool:
+    # Interactive 3D backbone (line + markers) — avoids sparse “dots only” when py3Dmol bonds are missing.
+    try:
+        import plotly.graph_objects as go
+        import streamlit as st
+    except Exception:
+        return False
+    clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
+    if not clean:
+        return False
+    if len(clean) > MAX_3D_SEQUENCE_LENGTH:
+        return False
+    coords = helix_coordinates(clean, smooth=True)
+    if coords.shape[0] == 0:
+        return False
+    colors = [residue_color_mpl(aa) for aa in clean]
+    labels = [residue_shape_label(aa) for aa in clean]
+    hover = [f"{i + 1} {aa} · {labels[i]}" for i, aa in enumerate(clean)]
+    msize = float(np.clip(900.0 / max(len(clean), 1), 3.5, 11.0))
+    show_text = len(clean) <= 36
+    text_pos = "top center" if len(clean) <= 24 else "middle center"
+    fig = go.Figure()
+    fig.add_trace(
+        go.Scatter3d(
+            x=coords[:, 0],
+            y=coords[:, 1],
+            z=coords[:, 2],
+            mode="lines",
+            line=dict(color="rgba(110,110,118,0.92)", width=12),
+            hoverinfo="skip",
+            showlegend=False,
+        )
+    )
+    fig.add_trace(
+        go.Scatter3d(
+            x=coords[:, 0],
+            y=coords[:, 1],
+            z=coords[:, 2],
+            mode="markers+text" if show_text else "markers",
+            marker=dict(
+                size=msize,
+                color=colors,
+                line=dict(color="#1a1a1a", width=0.8),
+            ),
+            text=list(clean) if show_text else None,
+            textposition=text_pos,
+            textfont=dict(size=max(9, min(12, int(220 / max(len(clean), 1)))), color="#111111"),
+            customdata=hover,
+            hovertemplate="%{customdata}<extra></extra>",
+            name="Residues",
+        )
+    )
+    fig.update_layout(
+        height=height,
+        margin=dict(l=0, r=0, t=36, b=0),
+        paper_bgcolor="#fafafa",
+        title=dict(
+            text="Helix-like CA trace (approximation) · drag to rotate",
+            font=dict(size=13, color="#333333"),
+            x=0.5,
+            xanchor="center",
+        ),
+        scene=dict(
+            aspectmode="data",
+            bgcolor="#f3f4f6",
+            xaxis=dict(visible=False),
+            yaxis=dict(visible=False),
+            zaxis=dict(visible=False),
+        ),
+        showlegend=False,
+    )
+    st.plotly_chart(fig, use_container_width=True)
+    return True
+def render_3d_structure(
+    sequence: str,
+    width: int = 500,
+    height: int = 400,
+    iframe_height: int = 420,
+    *,
+    enhanced: bool = False,
+    spin: bool = False,
+) -> bool:
+    # Render CA-only py3Dmol structure with category coloring and optional enhanced styling/spin.
+    import streamlit.components.v1 as components
+    # Input sanitization keeps renderer stable across pasted FASTA/text snippets.
+    clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
+    if not clean:
+        return False
+    if len(clean) > MAX_3D_SEQUENCE_LENGTH:
+        return False
+    try:
+        import py3Dmol  # type: ignore
+    except Exception:
+        return False
+    try:
+        coords = helix_coordinates(clean, smooth=enhanced)
+        pdb_data = generate_helix_pdb(clean, smooth=enhanced)
+        view = py3Dmol.view(width=width, height=height)
+        view.addModel(pdb_data, "pdb")
+        try:
+            view.setBackgroundColor("#0f0f12" if enhanced else "#1e1e1e")
+        except Exception:
+            pass
+        cyl_r = 0.34 if enhanced else 0.28
+        for i in range(len(coords) - 1):
+            p0 = coords[i]
+            p1 = coords[i + 1]
+            cyl: dict = {
+                "start": {"x": float(p0[0]), "y": float(p0[1]), "z": float(p0[2])},
+                "end": {"x": float(p1[0]), "y": float(p1[1]), "z": float(p1[2])},
+                "radius": cyl_r,
+                "color": "#7a7a82",
+                "fromCap": 1,
+                "toCap": 1,
+            }
+            try:
+                view.addCylinder(cyl)
+            except Exception:
+                try:
+                    view.addCylinder(
+                        {
+                            "start": {"x": float(p0[0]), "y": float(p0[1]), "z": float(p0[2])},
+                            "end": {"x": float(p1[0]), "y": float(p1[1]), "z": float(p1[2])},
+                            "radius": cyl_r,
+                            "color": "#7a7a82",
+                        }
+                    )
+                except Exception:
+                    pass
+        sphere_radius = 0.36 if enhanced else 0.32
+        for i, aa in enumerate(clean):
+            color = get_residue_color(aa)
+            sel = {"resi": i + 1}
+            sphere_style = {"sphere": {"radius": sphere_radius, "color": color}}
+            view.setStyle(sel, sphere_style)
+        max_labels = 60 if enhanced else 40
+        label_every = max(1, (len(clean) + max_labels - 1) // max_labels)
+        fs = 10 if enhanced else 9
+        for i, aa in enumerate(clean):
+            if i % label_every != 0:
+                continue
+            try:
+                view.addLabel(
+                    aa,
+                    {
+                        "position": {"resi": i + 1, "atom": "CA"},
+                        "backgroundColor": "#1a1a1a",
+                        "fontColor": "#ffffff",
+                        "fontSize": fs,
+                    },
+                )
+            except Exception:
+                pass
+        view.zoomTo()
+        if spin:
+            try:
+                view.spin(True)
+            except Exception:
+                try:
+                    sp = getattr(view, "spin", None)
+                    if callable(sp):
+                        sp()
+                except Exception:
+                    pass
+        if hasattr(view, "_make_html"):
+            html = view._make_html()
+        else:
+            html = view.write()
+        components.html(html, height=iframe_height)
+        return True
+    except Exception:
+        return False

requirements.txt CHANGED Viewed

@@ -1,9 +1,9 @@
-streamlit  #1
-pandas  #2
-numpy  #3
-torch  #4
-scikit-learn  #5
-matplotlib  #6
-plotly  #7
-requests  #8
-py3dmol  #9

+streamlit>=1.28.0
+pandas>=2.0.0
+numpy>=1.24.0
+torch>=2.0.0
+scikit-learn>=1.3.0
+matplotlib>=3.7.0
+plotly>=5.14.0
+requests>=2.28.0
+py3dmol>=2.0.0