m0ksh commited on
Commit
5eace46
·
verified ·
1 Parent(s): 8ec3495

Sync from GitHub (preserve manual model files)

Browse files
StreamlitApp/StreamlitApp.py CHANGED
@@ -9,11 +9,11 @@ import plotly.express as px
9
  import html as _html
10
  from sklearn.manifold import TSNE
11
 
12
- # Page features are implemented in utils so this file stays orchestration-focused.
13
  from utils.predict import load_model, predict_amp, encode_sequence
14
  from utils.analyze import aa_composition, compute_properties
15
  from utils.optimize import optimize_sequence
16
- from utils.ui_helpers import (
17
  choose_top_candidate,
18
  format_conf_percent,
19
  mutation_heatmap_html,
@@ -24,7 +24,7 @@ from utils.ui_helpers import (
24
  build_analysis_insights,
25
  build_analysis_summary_text,
26
  )
27
- from utils.peptide_extras import (
28
  KNOWN_AMPS,
29
  MAX_3D_SEQUENCE_LENGTH,
30
  COMPACT_3D_LEGEND,
 
9
  import html as _html
10
  from sklearn.manifold import TSNE
11
 
12
+ # Utils map to sidebar pages: predict / analyze / optimize / visualize / tsne, plus shared_ui.
13
  from utils.predict import load_model, predict_amp, encode_sequence
14
  from utils.analyze import aa_composition, compute_properties
15
  from utils.optimize import optimize_sequence
16
+ from utils.shared_ui import (
17
  choose_top_candidate,
18
  format_conf_percent,
19
  mutation_heatmap_html,
 
24
  build_analysis_insights,
25
  build_analysis_summary_text,
26
  )
27
+ from utils.visualize import (
28
  KNOWN_AMPS,
29
  MAX_3D_SEQUENCE_LENGTH,
30
  COMPACT_3D_LEGEND,
StreamlitApp/utils/analyze.py CHANGED
@@ -1,5 +1,4 @@
1
- # Sequence composition and physicochemical property helpers.
2
- # Mass and charge are textbook approximations for the UI, not for publishing numbers.
3
  from collections import Counter
4
 
5
  def aa_composition(sequence):
 
1
+ # Analyze page: amino acid composition and simple physicochemical properties.
 
2
  from collections import Counter
3
 
4
  def aa_composition(sequence):
StreamlitApp/utils/optimize.py CHANGED
@@ -1,5 +1,4 @@
1
  # Heuristic mutation search used by the Optimize page.
2
- # It’s greedy and uses a few residue buckets — fun to play with, not a real design pipeline.
3
  import random
4
  from utils.predict import predict_amp
5
 
 
1
  # Heuristic mutation search used by the Optimize page.
 
2
  import random
3
  from utils.predict import predict_amp
4
 
StreamlitApp/utils/predict.py CHANGED
@@ -1,5 +1,4 @@
1
- # Model loading, sequence encoding, and AMP inference helpers.
2
- # Features are flattened one-hots (length × 20), not transformer embeddings — keeps the app small and CPU-friendly.
3
  import pathlib
4
  import numpy as np
5
  import torch
 
1
+ # Predict page (and shared): load AMP model, one-hot encode, run predict_amp.
 
2
  import pathlib
3
  import numpy as np
4
  import torch
StreamlitApp/utils/rate_limit.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Optional rate limiter (not wired to a sidebar page yet).
2
+ import time
3
+ from collections import deque
4
+
5
+ class RateLimiter:
6
+ # Each instance tracks call timestamps for one caller/key.
7
+ def __init__(self, max_calls: int, period_seconds: float):
8
+ self.max_calls = max_calls
9
+ self.period = period_seconds
10
+ self.calls = deque()
11
+
12
+ def allow(self) -> bool:
13
+ now = time.time()
14
+
15
+ # Drop timestamps outside the active window.
16
+ while self.calls and self.calls[0] <= now - self.period:
17
+ self.calls.popleft()
18
+ if len(self.calls) < self.max_calls:
19
+ self.calls.append(now)
20
+ return True
21
+ return False
22
+
23
+ def time_until_next(self) -> float:
24
+ # Return wait time before another call is allowed (seconds).
25
+ now = time.time()
26
+ if len(self.calls) < self.max_calls:
27
+ return 0.0
28
+ oldest = self.calls[0]
29
+ return max(0.0, (oldest + self.period) - now)
StreamlitApp/utils/shared_ui.py ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Shared UI: formatting, tables, analysis bullets, exports: used on several sidebar pages
2
+ import html as _html
3
+ from typing import Dict, List, Tuple, Optional
4
+
5
+ from utils.analyze import compute_properties
6
+
7
+ def predicted_confidence(row: Dict) -> Optional[float]:
8
+ # Convert AMP probability into confidence of the predicted class.
9
+ if not row:
10
+ return None
11
+ pred = row.get("Prediction")
12
+ p_amp = row.get("Confidence")
13
+ if p_amp is None:
14
+ return None
15
+ try:
16
+ p_amp = float(p_amp)
17
+ except (TypeError, ValueError):
18
+ return None
19
+ if pred == "AMP":
20
+ return p_amp
21
+ # Convert AMP probability into confidence for the predicted class.
22
+ return 1.0 - p_amp
23
+
24
+
25
+ def format_conf_percent(conf_prob: float, digits: int = 1) -> str:
26
+ return f"{round(conf_prob * 100, digits)}%"
27
+
28
+
29
+ def heuristic_reason_for_profile(charge: float, hydro_fraction: float) -> str:
30
+ if charge > 2:
31
+ return "High positive charge supports membrane disruption"
32
+ if 0.3 <= hydro_fraction <= 0.6:
33
+ return "Balanced hydrophobicity"
34
+ return "Favorable predicted profile"
35
+
36
+
37
+ def choose_top_candidate(predictions: List[Dict]) -> Optional[Dict]:
38
+ # Select best candidate row and attach a short profile-based reason.
39
+ if not predictions:
40
+ return None
41
+
42
+ # Prefer AMP rows first, then fall back to highest-confidence overall row.
43
+ amp_rows = [r for r in predictions if r.get("Prediction") == "AMP"]
44
+ rows = amp_rows if amp_rows else predictions
45
+
46
+ best_row = None
47
+ best_conf = -1.0
48
+ for r in rows:
49
+ c = predicted_confidence(r)
50
+ if c is None:
51
+ continue
52
+ if c > best_conf:
53
+ best_conf = c
54
+ best_row = r
55
+
56
+ if best_row is None:
57
+ return None
58
+
59
+ seq = best_row.get("Sequence", "")
60
+ if not seq:
61
+ return None
62
+
63
+ props = compute_properties(seq)
64
+ charge = props.get("Net Charge (approx.)", 0)
65
+ hydro = props.get("Hydrophobic Fraction", 0)
66
+
67
+ return {
68
+ "Sequence": seq,
69
+ "Prediction": best_row.get("Prediction"),
70
+ "predicted_confidence": best_conf,
71
+ "Reason": heuristic_reason_for_profile(charge, hydro),
72
+ "Charge": charge,
73
+ "Hydrophobic Fraction": hydro,
74
+ }
75
+
76
+
77
+ def mutation_heatmap_html(original: str, final: str) -> str:
78
+ # Highlight per-position residue changes between original and final sequences.
79
+ orig = original or ""
80
+ fin = final or ""
81
+ max_len = max(len(orig), len(fin))
82
+
83
+ # Use monospace layout so per-position residue changes align visually.
84
+ out: List[str] = [
85
+ "<div style='font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, \"Liberation Mono\", monospace; white-space: pre-wrap;'>"
86
+ ]
87
+ for i in range(max_len):
88
+ o = orig[i] if i < len(orig) else ""
89
+ f = fin[i] if i < len(fin) else ""
90
+ residue = f if f else o
91
+ changed = (o != f)
92
+ residue_escaped = _html.escape(residue)
93
+ if changed and residue:
94
+ out.append(f"<span style='color:#d62728; font-weight:700;'>{residue_escaped}</span>")
95
+ else:
96
+ out.append(residue_escaped if residue else "&nbsp;")
97
+ out.append("</div>")
98
+ return "".join(out)
99
+
100
+
101
+ def mutation_diff_table(original: str, final: str) -> List[Dict]:
102
+ orig = original or ""
103
+ fin = final or ""
104
+ max_len = max(len(orig), len(fin))
105
+ rows: List[Dict] = []
106
+ for i in range(max_len):
107
+ o = orig[i] if i < len(orig) else ""
108
+ f = fin[i] if i < len(fin) else ""
109
+ rows.append(
110
+ {
111
+ "Position": i + 1,
112
+ "Original": o,
113
+ "Final": f,
114
+ "Changed": "Yes" if o != f else "No",
115
+ }
116
+ )
117
+ return rows
118
+
119
+
120
+ def _ideal_distance_to_interval(value: float, low: float, high: float) -> float:
121
+ if low <= value <= high:
122
+ return 0.0
123
+ if value < low:
124
+ return low - value
125
+ return value - high
126
+
127
+
128
+ def optimization_summary(orig_seq: str, orig_conf: float, final_seq: str, final_conf: float) -> Dict:
129
+ # Compute confidence and property deltas for the Optimize summary panel.
130
+ orig_seq = orig_seq or ""
131
+ final_seq = final_seq or ""
132
+
133
+ # Property deltas drive the compact "what changed" summary panel.
134
+ props_orig = compute_properties(orig_seq) if orig_seq else {}
135
+ props_final = compute_properties(final_seq) if final_seq else {}
136
+
137
+ charge_orig = props_orig.get("Net Charge (approx.)", props_orig.get("Net charge", 0))
138
+ charge_final = props_final.get("Net Charge (approx.)", props_final.get("Net charge", 0))
139
+
140
+ hydro_orig = props_orig.get("Hydrophobic Fraction", 0)
141
+ hydro_final = props_final.get("Hydrophobic Fraction", 0)
142
+
143
+ delta_conf_pct = (float(final_conf) - float(orig_conf)) * 100.0
144
+
145
+ if charge_final > charge_orig:
146
+ charge_change = "Increased"
147
+ elif charge_final < charge_orig:
148
+ charge_change = "Decreased"
149
+ else:
150
+ charge_change = "Same"
151
+
152
+ ideal_low, ideal_high = 0.4, 0.5
153
+ dist_orig = _ideal_distance_to_interval(float(hydro_orig), ideal_low, ideal_high)
154
+ dist_final = _ideal_distance_to_interval(float(hydro_final), ideal_low, ideal_high)
155
+
156
+ if dist_final < dist_orig:
157
+ hydro_change = "Improved balance"
158
+ elif dist_final > dist_orig:
159
+ hydro_change = "Less optimal"
160
+ else:
161
+ hydro_change = "Same"
162
+
163
+ return {
164
+ "delta_conf_pct": delta_conf_pct,
165
+ "charge_orig": charge_orig,
166
+ "charge_final": charge_final,
167
+ "charge_change": charge_change,
168
+ "hydro_orig": hydro_orig,
169
+ "hydro_final": hydro_final,
170
+ "hydro_change": hydro_change,
171
+ }
172
+
173
+
174
+ def sequence_length_warning(seq: str) -> Optional[str]:
175
+ if not seq:
176
+ return None
177
+ n = len(seq)
178
+ if n < 8:
179
+ return "Too short for typical AMP"
180
+ if n > 50:
181
+ return "Unusually long sequence"
182
+ return None
183
+
184
+
185
+ def sequence_health_label(conf_prob: float, charge: float, hydro_fraction: float) -> Tuple[str, str]:
186
+ # Return a short quality label plus color for Analyze page status display.
187
+ # Very high model confidence is treated as strong even outside ideal property ranges.
188
+ if conf_prob >= 0.9:
189
+ return "Strong AMP candidate", "#2ca02c"
190
+ if conf_prob > 0.75 and charge >= 2 and 0.3 <= hydro_fraction <= 0.6:
191
+ return "Strong AMP candidate", "#2ca02c"
192
+ if conf_prob > 0.5:
193
+ return "Moderate potential", "#ff9800"
194
+ return "Unlikely AMP", "#d62728"
195
+
196
+
197
+ # Plain-language bullets for Analyze — rules of thumb, not a second model.
198
+ def build_analysis_insights(
199
+ label: str,
200
+ conf: float,
201
+ comp: Dict[str, float],
202
+ length: int,
203
+ hydro: float,
204
+ charge: float,
205
+ ) -> List[str]:
206
+ # Short, mechanism-oriented bullets for the Analyze page (heuristics, not lab truth).
207
+ lines: List[str] = []
208
+ p_amp = float(conf)
209
+ conf_pct = round(p_amp * 100, 1)
210
+ pred_conf = conf_pct if label == "AMP" else round((1 - p_amp) * 100, 1)
211
+
212
+ if label == "AMP":
213
+ if pred_conf >= 80:
214
+ lines.append(
215
+ f"Model: **AMP** with high confidence ({pred_conf}% on this prediction)—profile below explains typical mechanisms."
216
+ )
217
+ elif pred_conf >= 60:
218
+ lines.append(
219
+ f"Model: **AMP** with moderate confidence ({pred_conf}%); cross-check chemistry bullets before treating it as a strong hit."
220
+ )
221
+ else:
222
+ lines.append(
223
+ f"Model: **AMP** but low confidence ({pred_conf}%); the mechanistic notes below matter more than the label alone."
224
+ )
225
+ else:
226
+ if pred_conf >= 80:
227
+ lines.append(
228
+ f"Model: **Non-AMP** with high confidence ({pred_conf}% on this prediction)—below are common reasons a sequence may not behave like a classic AMP."
229
+ )
230
+ elif pred_conf >= 60:
231
+ lines.append(
232
+ f"Model: **Non-AMP** with moderate confidence ({pred_conf}%); reasons below are typical but not exhaustive."
233
+ )
234
+ else:
235
+ lines.append(
236
+ f"Model: **Non-AMP** with low confidence ({pred_conf}%); treat the label as tentative and read the property-based notes."
237
+ )
238
+
239
+ polar_frac = sum(float(comp.get(aa, 0.0)) for aa in "STNQYC")
240
+ basic_frac = sum(float(comp.get(aa, 0.0)) for aa in "KRH")
241
+
242
+ explain_weak = (label == "Non-AMP") or (label == "AMP" and pred_conf < 65)
243
+
244
+ if explain_weak:
245
+ if charge <= 0:
246
+ lines.append(
247
+ "Weak or absent **positive net charge**: many AMPs rely on cationic residues to bind **anionic bacterial surfaces** (e.g. LPS, teichoic acids); near-neutral or negative peptides often lack that first electrostatic hook."
248
+ )
249
+ if hydro < 0.28:
250
+ lines.append(
251
+ "Low **hydrophobic** content: membrane insertion, pore formation, or lipid disruption is harder without a hydrophobic face or core to partition into the bilayer."
252
+ )
253
+ if hydro > 0.65:
254
+ lines.append(
255
+ "Very high **hydrophobic** content: risk of aggregation or poor **aqueous solubility** before the peptide can reach bacteria—delivery and effective concentration suffer."
256
+ )
257
+ if polar_frac < 0.12:
258
+ lines.append(
259
+ "Few **polar / H-bonding** residues (S, T, N, Q, Y, C): weaker interfacial interactions with lipids and water at the membrane—many AMP mechanisms benefit from polar positioning at the interface."
260
+ )
261
+ if basic_frac < 0.06 and charge < 2:
262
+ lines.append(
263
+ "Sparse **basic** residues (K, R, H): a hallmark of many AMPs is concentrated positive charge for initial **bacterial association**; this sequence is thin on that axis."
264
+ )
265
+ if length < 8:
266
+ lines.append(
267
+ "Very **short** length: may be too small to form a stable membrane-active structure or to span a bilayer meaningfully."
268
+ )
269
+ elif length > 50:
270
+ lines.append(
271
+ "Unusually **long** chain: folding, proteolysis, and synthesis cost can diverge from small cationic AMP archetypes."
272
+ )
273
+
274
+ if label == "Non-AMP" and charge >= 2 and 0.28 <= hydro <= 0.58:
275
+ lines.append(
276
+ "**Note:** Charge and hydrophobic balance still look somewhat AMP-like—the model says Non-AMP, so treat this as a **disagreement** worth validating experimentally, not proof either way."
277
+ )
278
+
279
+ if label == "AMP" and pred_conf >= 65:
280
+ if charge >= 2 and 0.28 <= hydro <= 0.58:
281
+ lines.append(
282
+ "**Positive charge** plus **moderate hydrophobic fraction** aligns with membrane-targeting motifs common in AMP literature."
283
+ )
284
+ if polar_frac >= 0.12:
285
+ lines.append(
286
+ "Adequate **polar** residues can help **interfacial** placement and H-bonding at the membrane."
287
+ )
288
+
289
+ if (comp.get("K", 0) + comp.get("R", 0) + comp.get("H", 0)) >= 0.18:
290
+ lines.append(
291
+ "Higher **basic** residue fraction supports **electrostatic** attraction to anionic bacterial components."
292
+ )
293
+ if (comp.get("C", 0) + comp.get("W", 0)) >= 0.08:
294
+ lines.append(
295
+ "**Cysteine / tryptophan** can contribute to membrane insertion, stacking, or oxidative chemistry depending on context."
296
+ )
297
+
298
+ # De-duplicate while preserving order.
299
+ out: List[str] = []
300
+ seen = set()
301
+ for line in lines:
302
+ if line not in seen:
303
+ seen.add(line)
304
+ out.append(line)
305
+ return out[:14]
306
+
307
+
308
+ def build_analysis_summary_text(
309
+ sequence: str,
310
+ prediction: str,
311
+ confidence_display: str,
312
+ props: Dict,
313
+ analysis_lines: List[str],
314
+ ) -> str:
315
+ length = props.get("Length", len(sequence))
316
+ charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
317
+ hydro = props.get("Hydrophobic Fraction", props.get("Hydrophobic", 0))
318
+ analysis_block = "\n".join(f"- {line}" for line in (analysis_lines or []))
319
+ return (
320
+ f"Sequence: {sequence}\n"
321
+ f"Prediction: {prediction}\n"
322
+ f"Confidence: {confidence_display}\n"
323
+ f"Length: {length}\n"
324
+ f"Net Charge (approx.): {charge}\n"
325
+ f"Hydrophobic Fraction: {hydro}\n\n"
326
+ f"Summary:\n{analysis_block}\n"
327
+ )
328
+
StreamlitApp/utils/tsne.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # t-SNE page: optional helper embedding + scatter (StreamlitApp also runs t-SNE inline with Plotly).
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ from sklearn.manifold import TSNE
5
+ import streamlit as st
6
+ import torch
7
+ import numpy as np
8
+ from utils.predict import encode_sequence
9
+
10
+ def tsne_visualization(sequences, model):
11
+ # Project model embeddings into 2D and render a quick scatter plot.
12
+ st.info("Generating embeddings... this may take a moment.")
13
+ embeddings = []
14
+ for seq in sequences:
15
+ x = torch.tensor(encode_sequence(seq), dtype=torch.float32).unsqueeze(0)
16
+ with torch.no_grad():
17
+ # Use an early hidden layer as a compact learned representation.
18
+ emb = model.layers[0](x)
19
+ embeddings.append(emb.numpy().flatten())
20
+
21
+ embeddings = np.vstack(embeddings)
22
+
23
+ perplexity = min(30, len(sequences) - 1)
24
+ if perplexity < 2:
25
+ st.warning("Need at least 2 sequences for visualization.")
26
+ return
27
+
28
+ tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity)
29
+ reduced = tsne.fit_transform(embeddings)
30
+ df = pd.DataFrame(reduced, columns=["x", "y"])
31
+
32
+ st.success("t-SNE visualization complete.")
33
+ st.scatter_chart(df)
StreamlitApp/utils/visualize.py CHANGED
@@ -1,33 +1,674 @@
1
- # t-SNE helper uses the first linear layer as a quick embedding; main app duplicates this with Plotly inline.
2
- import pandas as pd
3
- import matplotlib.pyplot as plt
4
- from sklearn.manifold import TSNE
5
- import streamlit as st
6
- import torch
 
 
7
  import numpy as np
8
- from utils.predict import encode_sequence
9
-
10
- def tsne_visualization(sequences, model):
11
- # Project model embeddings into 2D and render a quick scatter plot.
12
- st.info("Generating embeddings... this may take a moment.")
13
- embeddings = []
14
- for seq in sequences:
15
- x = torch.tensor(encode_sequence(seq), dtype=torch.float32).unsqueeze(0)
16
- with torch.no_grad():
17
- # Use an early hidden layer as a compact learned representation.
18
- emb = model.layers[0](x)
19
- embeddings.append(emb.numpy().flatten())
20
-
21
- embeddings = np.vstack(embeddings)
22
-
23
- perplexity = min(30, len(sequences) - 1)
24
- if perplexity < 2:
25
- st.warning("Need at least 2 sequences for visualization.")
26
- return
27
-
28
- tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity)
29
- reduced = tsne.fit_transform(embeddings)
30
- df = pd.DataFrame(reduced, columns=["x", "y"])
31
-
32
- st.success("t-SNE visualization complete.")
33
- st.scatter_chart(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Visualize page: 3D (py3Dmol / Plotly), helical wheel, known-AMP similarity, map HTML, shape blurbs.
2
+ from __future__ import annotations
3
+
4
+ import csv
5
+ import math
6
+ import pathlib
7
+ from typing import Any, List, Optional, Tuple
8
+
9
  import numpy as np
10
+
11
+ # Fallback if `Data/ampData.csv` is missing (e.g. local dev without Data/).
12
+ _FALLBACK_KNOWN_AMPS: Tuple[str, ...] = (
13
+ "KWKLFKKIGAVLKVL",
14
+ "GIGKFLHSAKKFGKAFVGEIMNS",
15
+ "LLGDFFRKSKEKIGKEFKRIVQRIKDFLRNLV",
16
+ "KLFKKILKYL",
17
+ "FLPLLAGLAANFLPKIFCKITRKC",
18
+ )
19
+
20
+ def _amp_data_csv_path() -> pathlib.Path:
21
+ # StreamlitApp/utils/visualize.py -> repo root is parents[2]
22
+ return pathlib.Path(__file__).resolve().parents[2] / "Data" / "ampData.csv"
23
+
24
+
25
+ def _load_known_amps_from_csv() -> List[str]:
26
+ # Load unique AMP-labeled sequences from CSV and normalize to uppercase.
27
+ path = _amp_data_csv_path()
28
+ if not path.exists():
29
+ return list(_FALLBACK_KNOWN_AMPS)
30
+
31
+ seen: set[str] = set()
32
+ amps: List[str] = []
33
+ try:
34
+ with path.open(newline="", encoding="utf-8") as f:
35
+ reader = csv.DictReader(f)
36
+ if not reader.fieldnames or "sequence" not in reader.fieldnames:
37
+ return list(_FALLBACK_KNOWN_AMPS)
38
+ for row in reader:
39
+ label = str(row.get("label", "")).strip()
40
+ if label != "1":
41
+ continue
42
+ raw = (row.get("sequence") or "").strip()
43
+ if not raw:
44
+ continue
45
+ seq = raw.upper()
46
+ if seq in seen:
47
+ continue
48
+ seen.add(seq)
49
+ amps.append(seq)
50
+ except Exception:
51
+ return list(_FALLBACK_KNOWN_AMPS)
52
+
53
+ return amps if amps else list(_FALLBACK_KNOWN_AMPS)
54
+
55
+
56
+ # Known AMP pool for similarity search (from ampData.csv label=1, or fallback list).
57
+ KNOWN_AMPS: List[str] = _load_known_amps_from_csv()
58
+
59
+ # py3Dmol viewer: skip very long sequences (labels + sticks scale with length).
60
+ MAX_3D_SEQUENCE_LENGTH: int = 60
61
+
62
+ STRUCTURE_3D_LEGEND_MARKDOWN: str = """
63
+ **Color legend**
64
+ - **Blue:** Positively charged residues (K, R, H)
65
+ - **Red:** Negatively charged residues (D, E)
66
+ - **Green:** Hydrophobic residues (A, V, I, L, M, F, W, Y)
67
+ - **Gray:** Other / polar or unclassified residues
68
+ """
69
+
70
+ STRUCTURE_3D_INTERPRETATION_MARKDOWN: str = """
71
+ **Structural interpretation (approximation only)**
72
+
73
+ This is a **simplified helical CA trace** used to visualize how residue chemistry is arranged in 3D space — **not** an experimentally determined fold.
74
+
75
+ - **Clusters of green** often correspond to membrane-facing / hydrophobic patches.
76
+ - **Blue regions** highlight cationic residues that can promote binding to anionic bacterial surfaces.
77
+ - **Spatial separation** between hydrophobic and charged segments can suggest **amphipathic** character, common among many AMPs.
78
+
79
+ Together, these cues help discuss whether a sequence has motifs frequently associated with antimicrobial peptides — **wet-lab validation is still required**.
80
+ """
81
+
82
+ # One-letter -> three-letter (for minimal PDB lines for py3Dmol).
83
+ _ONE_TO_THREE = {
84
+ "A": "ALA",
85
+ "R": "ARG",
86
+ "N": "ASN",
87
+ "D": "ASP",
88
+ "C": "CYS",
89
+ "Q": "GLN",
90
+ "E": "GLU",
91
+ "G": "GLY",
92
+ "H": "HIS",
93
+ "I": "ILE",
94
+ "L": "LEU",
95
+ "K": "LYS",
96
+ "M": "MET",
97
+ "F": "PHE",
98
+ "P": "PRO",
99
+ "S": "SER",
100
+ "T": "THR",
101
+ "W": "TRP",
102
+ "Y": "TYR",
103
+ "V": "VAL",
104
+ }
105
+
106
+
107
+ def sequence_similarity(seq1: str, seq2: str) -> float:
108
+ # Compute simple position-wise match score normalized by the longer sequence.
109
+ if not seq1 or not seq2:
110
+ return 0.0
111
+ matches = sum(1 for a, b in zip(seq1, seq2) if a == b)
112
+ return matches / max(len(seq1), len(seq2))
113
+
114
+
115
+ def find_most_similar(sequence: str) -> Tuple[Optional[str], float]:
116
+ # Return the closest known AMP and its simple position-match similarity score.
117
+ if not sequence or not KNOWN_AMPS:
118
+ return None, 0.0
119
+ seq = "".join(c for c in sequence.upper() if not c.isspace())
120
+ if not seq:
121
+ return None, 0.0
122
+ best_seq = KNOWN_AMPS[0]
123
+ best_score = sequence_similarity(seq, KNOWN_AMPS[0])
124
+ for amp in KNOWN_AMPS[1:]:
125
+ score = sequence_similarity(seq, amp)
126
+ if score > best_score:
127
+ best_score = score
128
+ best_seq = amp
129
+ return best_seq, best_score
130
+
131
+
132
+ def get_residue_color(aa: str) -> str:
133
+ # Map one-letter residue codes to py3Dmol color categories.
134
+ ch = aa.upper() if aa else ""
135
+ positive = ["K", "R", "H"]
136
+ negative = ["D", "E"]
137
+ hydrophobic = ["A", "V", "I", "L", "M", "F", "W", "Y"]
138
+ if ch in positive:
139
+ return "blue"
140
+ if ch in negative:
141
+ return "red"
142
+ if ch in hydrophobic:
143
+ return "green"
144
+ return "gray"
145
+
146
+
147
+ def residue_color_mpl(aa: str) -> str:
148
+ # Return high-contrast Matplotlib colors that mirror the 3D residue categories.
149
+ cat = get_residue_color(aa)
150
+ return {
151
+ "blue": "#1D4ED8",
152
+ "red": "#DC2626",
153
+ "green": "#16A34A",
154
+ "gray": "#57534E",
155
+ }.get(cat, "#57534E")
156
+
157
+
158
+ HELIX_WHEEL_LEGEND_MARKDOWN: str = """
159
+ **Helical wheel readout**
160
+ - **Blue wedge:** cationic (K, R, H) — often important for initial membrane association.
161
+ - **Red wedge:** anionic (D, E).
162
+ - **Green wedge:** hydrophobic — often grouped on one face in amphipathic helices (membrane-facing).
163
+ - **Gray:** polar / other — may participate in solubility or hydrogen bonding.
164
+
165
+ Residues are placed using a **100° step** per position (common α-helical wheel convention). This is a **2D projection**, not a solved 3D structure.
166
+ """
167
+
168
+ # Short blurbs for compact UI expanders (Visualize Peptide page)
169
+ COMPACT_3D_LEGEND: str = """
170
+ **How to read this 3D view**
171
+ - **Plotly:** thick gray **backbone line** + colored residue markers (interactive rotation).
172
+ - **3Dmol:** gray **cylinder backbone** between Cα positions + colored spheres (same chemistry colors).
173
+ - **Blue:** positively charged residues (K, R, H)
174
+ - **Red:** negatively charged residues (D, E)
175
+ - **Green:** hydrophobic residues (A, V, I, L, M, F, W, Y)
176
+ - **Gray:** other / polar residues
177
+ - Geometry is a **helix-like approximation**, not an experimental structure.
178
+ """
179
+ COMPACT_WHEEL_LEGEND: str = """
180
+ **How to read this helical wheel**
181
+ - **Radial spokes:** residue positions around the helix (100 degrees per residue)
182
+ - **Black connectors:** sequence order (`i -> i+1`) across the wheel
183
+ - **Colored circles:** residue chemistry classes
184
+ - Color mapping matches the 3D view (**blue / red / green / gray**)
185
+ """
186
+ COMPACT_MAP_LEGEND: str = """
187
+ **How to read this sequence map**
188
+ - Uses the same residue color mapping as 3D and helical wheel
189
+ - Highlights where charged vs hydrophobic residues cluster along the sequence
190
+ - Useful for quick amphipathic pattern checks
191
+ """
192
+
193
+
194
+ def plot_helical_wheel(sequence: str, figsize: Tuple[float, float] = (6.2, 6.2)) -> Any:
195
+ # Build a detailed helical wheel with spokes, sequence connectors, and color-coded residues.
196
+ import matplotlib.pyplot as plt
197
+ from matplotlib import patheffects as pe
198
+
199
+ # Normalize user input to whitespace-free uppercase sequence.
200
+ clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
201
+ n = len(clean)
202
+ fig, ax = plt.subplots(figsize=figsize, subplot_kw={"projection": "polar"})
203
+ fig.patch.set_facecolor("white")
204
+ if n == 0:
205
+ ax.set_facecolor("#ffffff")
206
+ ax.set_title("Helical wheel (empty sequence)", pad=12)
207
+ return fig
208
+
209
+ ax.set_facecolor("#ffffff")
210
+
211
+ angles_deg = np.array([i * 100.0 for i in range(n)], dtype=float) % 360.0
212
+ angles_rad = np.deg2rad(angles_deg)
213
+ r_inner, r_ring = 0.06, 0.88
214
+ fs = max(7, min(11, int(220 / max(n, 1))))
215
+ pt_size = float(np.clip(8000.0 / max(n, 1), 130.0, 420.0))
216
+
217
+ ax.set_theta_zero_location("N")
218
+ ax.set_theta_direction(-1)
219
+
220
+ # Radial spokes (residue positions)
221
+ for i in range(n):
222
+ th = angles_rad[i]
223
+ ax.plot(
224
+ [th, th],
225
+ [r_inner, r_ring],
226
+ color="#1a1a1a",
227
+ linewidth=0.65,
228
+ alpha=0.45,
229
+ zorder=1,
230
+ )
231
+
232
+ # Sequence-order connections (straight chords in the plane — classic wheel “star”)
233
+ for i in range(n - 1):
234
+ ax.plot(
235
+ [angles_rad[i], angles_rad[i + 1]],
236
+ [r_ring, r_ring],
237
+ color="#0a0a0a",
238
+ linewidth=1.05,
239
+ solid_capstyle="round",
240
+ zorder=2,
241
+ )
242
+
243
+ # Draw residue nodes after spokes/connectors so labels stay readable.
244
+ colors = [residue_color_mpl(aa) for aa in clean]
245
+ ax.scatter(
246
+ angles_rad,
247
+ np.full(n, r_ring),
248
+ s=pt_size,
249
+ c=colors,
250
+ edgecolors="#111111",
251
+ linewidths=1.2,
252
+ zorder=4,
253
+ )
254
+
255
+ for i, aa in enumerate(clean):
256
+ t = ax.text(
257
+ angles_rad[i],
258
+ r_ring,
259
+ aa,
260
+ ha="center",
261
+ va="center",
262
+ fontsize=fs,
263
+ color="#0a0a0a",
264
+ fontweight="bold",
265
+ zorder=5,
266
+ )
267
+ t.set_path_effects([pe.withStroke(linewidth=2.2, foreground="white")])
268
+
269
+ ax.set_ylim(0, 1.0)
270
+ ax.set_yticklabels([])
271
+ ax.set_xticklabels([])
272
+ ax.grid(False)
273
+ ax.set_title(
274
+ "Helical wheel (α-helix, 100°/residue) — spokes + sequence connectors",
275
+ pad=14,
276
+ fontsize=11,
277
+ color="#111111",
278
+ )
279
+ return fig
280
+
281
+
282
+ def get_residue_style(aa: str) -> str:
283
+ # Return inline CSS style for sequence-map residue coloring.
284
+ positive = ["K", "R", "H"]
285
+ negative = ["D", "E"]
286
+ hydrophobic = ["A", "V", "I", "L", "M", "F", "W", "Y"]
287
+ if aa in positive:
288
+ return "background-color: #1D4ED8; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
289
+ if aa in negative:
290
+ return "background-color: #DC2626; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
291
+ if aa in hydrophobic:
292
+ return "background-color: #16A34A; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
293
+ return "background-color: #57534E; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
294
+
295
+
296
+ def build_importance_map_html(sequence: str) -> str:
297
+ # Build safe HTML spans for residue-by-residue chemical highlighting.
298
+ import html as html_mod
299
+
300
+ # Emit one colored <span> per residue for inline sequence highlighting.
301
+ parts: List[str] = []
302
+ for ch in sequence:
303
+ if ch.isspace():
304
+ continue
305
+ aa = ch.upper()
306
+ style = get_residue_style(aa)
307
+ parts.append(f'<span style="{style}">{html_mod.escape(aa)}</span>')
308
+ return "".join(parts)
309
+
310
+
311
+ def helix_coordinates(sequence: str, *, smooth: bool = False) -> np.ndarray:
312
+ # Shared CA trace used by PDB, Plotly, and py3Dmol (same geometry as the helical wheel).
313
+ clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
314
+ n = len(clean)
315
+ if n == 0:
316
+ return np.zeros((0, 3), dtype=float)
317
+
318
+ theta_step = 100.0 * math.pi / 180.0 # ~α-helix angular step on the wheel
319
+ rise = 1.45
320
+ coords: List[Tuple[float, float, float]] = []
321
+ for i in range(n):
322
+ angle = i * theta_step
323
+ r = 5.0 + 0.12 * math.sin(i * 0.4)
324
+ x = math.cos(angle) * r
325
+ y = math.sin(angle) * r
326
+ z = i * rise
327
+ coords.append((x, y, z))
328
+
329
+ if smooth and n >= 3:
330
+ xs = np.array([c[0] for c in coords], dtype=float)
331
+ ys = np.array([c[1] for c in coords], dtype=float)
332
+ zs = np.array([c[2] for c in coords], dtype=float)
333
+ k = np.array([0.2, 0.6, 0.2])
334
+ for _ in range(2):
335
+ xs = np.convolve(xs, k, mode="same")
336
+ ys = np.convolve(ys, k, mode="same")
337
+ zs = np.convolve(zs, k, mode="same")
338
+ xs[0], xs[-1] = coords[0][0], coords[-1][0]
339
+ ys[0], ys[-1] = coords[0][1], coords[-1][1]
340
+ zs[0], zs[-1] = coords[0][2], coords[-1][2]
341
+ coords = list(zip(xs.tolist(), ys.tolist(), zs.tolist()))
342
+
343
+ return np.array(coords, dtype=float)
344
+
345
+
346
+ def generate_helix_pdb(sequence: str, smooth: bool = False) -> str:
347
+ # Minimal CA-only helix-like PDB for py3Dmol (coordinates only; bonds drawn via cylinders).
348
+ pdb_lines: List[str] = []
349
+ atom_index = 1
350
+ clean = "".join(c for c in sequence.upper() if not c.isspace())
351
+ n = len(clean)
352
+ if n == 0:
353
+ return ""
354
+
355
+ coords = helix_coordinates(clean, smooth=smooth)
356
+ for i, aa in enumerate(clean):
357
+ res_name = _ONE_TO_THREE.get(aa, "UNK")
358
+ x, y, z = float(coords[i, 0]), float(coords[i, 1]), float(coords[i, 2])
359
+ res_num = i + 1
360
+ pdb_lines.append(
361
+ f"ATOM {atom_index:5d} CA {res_name:3s} A{res_num:4d} "
362
+ f"{x:8.3f}{y:8.3f}{z:8.3f} 1.00 0.00 C"
363
+ )
364
+ atom_index += 1
365
+ return "\n".join(pdb_lines)
366
+
367
+
368
+ def residue_shape_label(aa: str) -> str:
369
+ # Short chemistry label for hovers and shape summary text.
370
+ cat = get_residue_color(aa)
371
+ return {
372
+ "blue": "cationic",
373
+ "red": "anionic",
374
+ "green": "hydrophobic",
375
+ "gray": "polar / other",
376
+ }.get(cat, "polar / other")
377
+
378
+
379
+ def _helical_wheel_resultant(indices: List[int]) -> float:
380
+ # Circular mean length in [0, 1]: high values mean residues cluster on one face of the wheel.
381
+ if len(indices) < 2:
382
+ return 0.0
383
+ angles = [math.radians((i * 100.0) % 360.0) for i in indices]
384
+ vx = sum(math.cos(a) for a in angles) / len(angles)
385
+ vy = sum(math.sin(a) for a in angles) / len(angles)
386
+ return float(math.hypot(vx, vy))
387
+
388
+
389
+ def build_shape_visual_summary(
390
+ sequence: str,
391
+ *,
392
+ amp_label: Optional[str] = None,
393
+ amp_prob: Optional[float] = None,
394
+ ) -> List[str]:
395
+ # Short bullets tying the helix/wheel geometry to AMP-relevant “shape chemistry” (heuristic).
396
+ clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
397
+ n = len(clean)
398
+ lines: List[str] = []
399
+ if n == 0:
400
+ return lines
401
+
402
+ lines.append(
403
+ "This view places residues on a **helix-like CA trace** (same geometry as the wheel). "
404
+ "It shows **how** charged, hydrophobic, and polar positions sit in space—not a solved NMR/crystal structure."
405
+ )
406
+
407
+ pos_i = [i for i, aa in enumerate(clean) if get_residue_color(aa) == "blue"]
408
+ neg_i = [i for i, aa in enumerate(clean) if get_residue_color(aa) == "red"]
409
+ hyd_i = [i for i, aa in enumerate(clean) if get_residue_color(aa) == "green"]
410
+ pol_i = [i for i, aa in enumerate(clean) if get_residue_color(aa) == "gray"]
411
+
412
+ f_h = len(hyd_i) / n
413
+ f_p = len(pol_i) / n
414
+ f_pos = len(pos_i) / n
415
+
416
+ R_h = _helical_wheel_resultant(hyd_i)
417
+ R_k = _helical_wheel_resultant(pos_i)
418
+
419
+ if f_h >= 0.18 and f_p >= 0.12:
420
+ lines.append(
421
+ "You can point to **both** a **hydrophobic** (green) and **polar / other** (gray) presence along the trace—"
422
+ "a common ingredient for **interface** behavior (aqueous vs lipid-facing), which many AMP mechanisms exploit."
423
+ )
424
+ elif f_h >= 0.25 and f_p < 0.1:
425
+ lines.append(
426
+ "The trace is **dominated by hydrophobic** (green) positions; without much polar (gray) or cationic (blue) balance, "
427
+ "membrane engagement can be less like classic cationic AMP helices (still sequence-context dependent)."
428
+ )
429
+ elif f_p >= 0.35 and f_h < 0.15:
430
+ lines.append(
431
+ "The trace is **rich in polar / other** (gray) and light on hydrophobic (green) packing—often more soluble, "
432
+ "but less like a compact amphipathic helix unless charge or hydrophobic content appears elsewhere."
433
+ )
434
+
435
+ if len(hyd_i) >= 3 and R_h >= 0.52:
436
+ lines.append(
437
+ "**Hydrophobic residues cluster on one side** of the helical wheel (tight arc)—consistent with an **amphipathic** "
438
+ "helix face that could sit at the **membrane interface**."
439
+ )
440
+ elif len(hyd_i) >= 2 and R_h < 0.35:
441
+ lines.append(
442
+ "**Hydrophobic** (green) positions are **spread** around the wheel—less of a single membrane-facing stripe; "
443
+ "some AMPs still look like this, but classic amphipathic faces are easier to see when green groups on one arc."
444
+ )
445
+
446
+ if len(pos_i) >= 2 and R_k >= 0.5:
447
+ lines.append(
448
+ "**Cationic** (blue) residues group in angular space—helpful for a **localized positive patch** toward anionic lipids, "
449
+ "a pattern often discussed for membrane-targeting peptides."
450
+ )
451
+
452
+ if amp_label is not None and amp_prob is not None:
453
+ p = float(amp_prob)
454
+ pred_conf = round(p * 100, 1) if amp_label == "AMP" else round((1.0 - p) * 100, 1)
455
+ if amp_label == "AMP" and pred_conf >= 65:
456
+ lines.append(
457
+ f"**Model:** AMP at **{pred_conf}%** confidence on this sequence—combined with the spatial pattern above, "
458
+ "use the plot to argue **where** positive charge and hydrophobic bulk sit relative to each other."
459
+ )
460
+ elif amp_label == "Non-AMP" and pred_conf >= 65:
461
+ lines.append(
462
+ f"**Model:** Non-AMP at **{pred_conf}%** confidence—if the trace still **looks** amphipathic, treat that as "
463
+ "**chemistry vs. classifier** tension worth testing in the lab, not proof of activity."
464
+ )
465
+ else:
466
+ lines.append(
467
+ f"**Model:** **{amp_label}** (about **{pred_conf}%** on that call)—read the **shape** bullets as physical intuition; "
468
+ "they do not override the model or experiments."
469
+ )
470
+
471
+ # De-duplicate, cap length.
472
+ out: List[str] = []
473
+ seen: set[str] = set()
474
+ for line in lines:
475
+ if line not in seen:
476
+ seen.add(line)
477
+ out.append(line)
478
+ return out[:12]
479
+
480
+
481
+ def render_3d_plotly(
482
+ sequence: str,
483
+ *,
484
+ height: int = 460,
485
+ ) -> bool:
486
+ # Interactive 3D backbone (line + markers) — avoids sparse “dots only” when py3Dmol bonds are missing.
487
+ try:
488
+ import plotly.graph_objects as go
489
+ import streamlit as st
490
+ except Exception:
491
+ return False
492
+
493
+ clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
494
+ if not clean:
495
+ return False
496
+ if len(clean) > MAX_3D_SEQUENCE_LENGTH:
497
+ return False
498
+
499
+ coords = helix_coordinates(clean, smooth=True)
500
+ if coords.shape[0] == 0:
501
+ return False
502
+
503
+ colors = [residue_color_mpl(aa) for aa in clean]
504
+ labels = [residue_shape_label(aa) for aa in clean]
505
+ hover = [f"{i + 1} {aa} · {labels[i]}" for i, aa in enumerate(clean)]
506
+
507
+ msize = float(np.clip(900.0 / max(len(clean), 1), 3.5, 11.0))
508
+ show_text = len(clean) <= 36
509
+ text_pos = "top center" if len(clean) <= 24 else "middle center"
510
+
511
+ fig = go.Figure()
512
+ fig.add_trace(
513
+ go.Scatter3d(
514
+ x=coords[:, 0],
515
+ y=coords[:, 1],
516
+ z=coords[:, 2],
517
+ mode="lines",
518
+ line=dict(color="rgba(110,110,118,0.92)", width=12),
519
+ hoverinfo="skip",
520
+ showlegend=False,
521
+ )
522
+ )
523
+ fig.add_trace(
524
+ go.Scatter3d(
525
+ x=coords[:, 0],
526
+ y=coords[:, 1],
527
+ z=coords[:, 2],
528
+ mode="markers+text" if show_text else "markers",
529
+ marker=dict(
530
+ size=msize,
531
+ color=colors,
532
+ line=dict(color="#1a1a1a", width=0.8),
533
+ ),
534
+ text=list(clean) if show_text else None,
535
+ textposition=text_pos,
536
+ textfont=dict(size=max(9, min(12, int(220 / max(len(clean), 1)))), color="#111111"),
537
+ customdata=hover,
538
+ hovertemplate="%{customdata}<extra></extra>",
539
+ name="Residues",
540
+ )
541
+ )
542
+
543
+ fig.update_layout(
544
+ height=height,
545
+ margin=dict(l=0, r=0, t=36, b=0),
546
+ paper_bgcolor="#fafafa",
547
+ title=dict(
548
+ text="Helix-like CA trace (approximation) · drag to rotate",
549
+ font=dict(size=13, color="#333333"),
550
+ x=0.5,
551
+ xanchor="center",
552
+ ),
553
+ scene=dict(
554
+ aspectmode="data",
555
+ bgcolor="#f3f4f6",
556
+ xaxis=dict(visible=False),
557
+ yaxis=dict(visible=False),
558
+ zaxis=dict(visible=False),
559
+ ),
560
+ showlegend=False,
561
+ )
562
+
563
+ st.plotly_chart(fig, use_container_width=True)
564
+ return True
565
+
566
+
567
+ def render_3d_structure(
568
+ sequence: str,
569
+ width: int = 500,
570
+ height: int = 400,
571
+ iframe_height: int = 420,
572
+ *,
573
+ enhanced: bool = False,
574
+ spin: bool = False,
575
+ ) -> bool:
576
+ # Render CA-only py3Dmol structure with category coloring and optional enhanced styling/spin.
577
+ import streamlit.components.v1 as components
578
+
579
+ # Input sanitization keeps renderer stable across pasted FASTA/text snippets.
580
+ clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
581
+ if not clean:
582
+ return False
583
+ if len(clean) > MAX_3D_SEQUENCE_LENGTH:
584
+ return False
585
+ try:
586
+ import py3Dmol # type: ignore
587
+ except Exception:
588
+ return False
589
+
590
+ try:
591
+ coords = helix_coordinates(clean, smooth=enhanced)
592
+ pdb_data = generate_helix_pdb(clean, smooth=enhanced)
593
+ view = py3Dmol.view(width=width, height=height)
594
+ view.addModel(pdb_data, "pdb")
595
+
596
+ try:
597
+ view.setBackgroundColor("#0f0f12" if enhanced else "#1e1e1e")
598
+ except Exception:
599
+ pass
600
+
601
+ cyl_r = 0.34 if enhanced else 0.28
602
+ for i in range(len(coords) - 1):
603
+ p0 = coords[i]
604
+ p1 = coords[i + 1]
605
+ cyl: dict = {
606
+ "start": {"x": float(p0[0]), "y": float(p0[1]), "z": float(p0[2])},
607
+ "end": {"x": float(p1[0]), "y": float(p1[1]), "z": float(p1[2])},
608
+ "radius": cyl_r,
609
+ "color": "#7a7a82",
610
+ "fromCap": 1,
611
+ "toCap": 1,
612
+ }
613
+ try:
614
+ view.addCylinder(cyl)
615
+ except Exception:
616
+ try:
617
+ view.addCylinder(
618
+ {
619
+ "start": {"x": float(p0[0]), "y": float(p0[1]), "z": float(p0[2])},
620
+ "end": {"x": float(p1[0]), "y": float(p1[1]), "z": float(p1[2])},
621
+ "radius": cyl_r,
622
+ "color": "#7a7a82",
623
+ }
624
+ )
625
+ except Exception:
626
+ pass
627
+
628
+ sphere_radius = 0.36 if enhanced else 0.32
629
+ for i, aa in enumerate(clean):
630
+ color = get_residue_color(aa)
631
+ sel = {"resi": i + 1}
632
+ sphere_style = {"sphere": {"radius": sphere_radius, "color": color}}
633
+ view.setStyle(sel, sphere_style)
634
+
635
+ max_labels = 60 if enhanced else 40
636
+ label_every = max(1, (len(clean) + max_labels - 1) // max_labels)
637
+ fs = 10 if enhanced else 9
638
+ for i, aa in enumerate(clean):
639
+ if i % label_every != 0:
640
+ continue
641
+ try:
642
+ view.addLabel(
643
+ aa,
644
+ {
645
+ "position": {"resi": i + 1, "atom": "CA"},
646
+ "backgroundColor": "#1a1a1a",
647
+ "fontColor": "#ffffff",
648
+ "fontSize": fs,
649
+ },
650
+ )
651
+ except Exception:
652
+ pass
653
+
654
+ view.zoomTo()
655
+
656
+ if spin:
657
+ try:
658
+ view.spin(True)
659
+ except Exception:
660
+ try:
661
+ sp = getattr(view, "spin", None)
662
+ if callable(sp):
663
+ sp()
664
+ except Exception:
665
+ pass
666
+
667
+ if hasattr(view, "_make_html"):
668
+ html = view._make_html()
669
+ else:
670
+ html = view.write()
671
+ components.html(html, height=iframe_height)
672
+ return True
673
+ except Exception:
674
+ return False
requirements.txt CHANGED
@@ -1,9 +1,9 @@
1
- streamlit #1
2
- pandas #2
3
- numpy #3
4
- torch #4
5
- scikit-learn #5
6
- matplotlib #6
7
- plotly #7
8
- requests #8
9
- py3dmol #9
 
1
+ streamlit>=1.28.0
2
+ pandas>=2.0.0
3
+ numpy>=1.24.0
4
+ torch>=2.0.0
5
+ scikit-learn>=1.3.0
6
+ matplotlib>=3.7.0
7
+ plotly>=5.14.0
8
+ requests>=2.28.0
9
+ py3dmol>=2.0.0