|
|
| import re
|
| import requests
|
| from typing import Any, Dict, List, Tuple
|
|
|
| DEFAULT_DIAG_TERMS = {
|
| "Glioblastoma": ["glioblastoma", "GBM", "glioblastoma multiforme"],
|
| "Diffuse midline glioma": ["diffuse midline glioma", "DMG", "H3 K27M"],
|
| "Anaplastic astrocytoma": ["anaplastic astrocytoma", "grade 3 astrocytoma"],
|
| "Astrocytoma": ["astrocytoma", "grade 2 astrocytoma", "grade 4 astrocytoma"],
|
| "Oligodendroglioma": ["oligodendroglioma", "1p19q codeleted"],
|
| "Meningioma": ["meningioma"],
|
| "Medulloblastoma": ["medulloblastoma"],
|
| "Ependymoma": ["ependymoma"],
|
| "Spinal cord tumor": ["spinal cord tumor", "spinal cord neoplasm"],
|
| }
|
|
|
| API_BASE = "https://clinicaltrials.gov/api/v2/studies"
|
| UA = {"User-Agent": "BrainTrialsFinder-Desktop/1.0 (+https://clinicaltrials.gov)"}
|
|
|
|
|
| def build_terms(diagnosis: str, keywords: str) -> List[str]:
|
| terms: List[str] = []
|
| if diagnosis in DEFAULT_DIAG_TERMS:
|
| terms.extend(DEFAULT_DIAG_TERMS[diagnosis])
|
| else:
|
| terms.extend(["brain tumor", "spinal cord tumor", "CNS tumor"])
|
| extra = [k.strip() for k in (keywords or "").split(",") if k.strip()]
|
| return terms + extra
|
|
|
|
|
| def ctgov_search_one(term: str, statuses: List[str], page_size: int = 100, max_pages: int = 5) -> List[Dict[str, Any]]:
|
| session = requests.Session()
|
| session.headers.update(UA)
|
| all_studies: List[Dict[str, Any]] = []
|
| page_token = None
|
| count = 0
|
| max_iters = max_pages or 0
|
| while count < max_iters:
|
| params = {
|
| "query.term": term,
|
| "filter.overallStatus": ",".join(statuses),
|
| "pageSize": page_size,
|
| }
|
| if page_token:
|
| params["pageToken"] = page_token
|
| r = session.get(API_BASE, params=params, timeout=30)
|
| r.raise_for_status()
|
| data = r.json()
|
| studies = data.get("studies", [])
|
| if not studies:
|
| break
|
| all_studies.extend(studies)
|
| page_token = data.get("nextPageToken")
|
| if not page_token:
|
| break
|
| count += 1
|
| return all_studies
|
|
|
|
|
| def fetch_all_terms(terms: List[str], statuses: List[str], page_size=100, max_pages=5) -> List[Dict[str, Any]]:
|
| dedup: Dict[str, Dict[str, Any]] = {}
|
| for t in terms:
|
| try:
|
| for s in ctgov_search_one(t, statuses, page_size=page_size, max_pages=max_pages):
|
| ident = (s.get("protocolSection", {}) or {}).get("identificationModule", {}) or {}
|
| nct = ident.get("nctId")
|
| key = nct or id(s)
|
| if key not in dedup:
|
| dedup[key] = s
|
| except requests.HTTPError:
|
| continue
|
| return list(dedup.values())
|
|
|
|
|
| def mentions(txt: str, term: str) -> bool:
|
| return bool(re.search(rf"\b{re.escape(term)}\b", txt or "", re.I))
|
|
|
|
|
| def as_text(obj: Any) -> str:
|
| if obj is None:
|
| return ""
|
| if isinstance(obj, dict):
|
| for k in ("textblock", "textBlock", "value"):
|
| if k in obj:
|
| return str(obj.get(k) or "")
|
| return " ".join(str(v) for v in obj.values() if v is not None)
|
| if isinstance(obj, list):
|
| return "; ".join(as_text(x) for x in obj)
|
| return str(obj)
|
|
|
|
|
| def parse_age_to_int(v: Any):
|
| if v is None:
|
| return None
|
| if isinstance(v, dict):
|
| return parse_age_to_int(v.get("value"))
|
| if isinstance(v, (int, float)):
|
| return int(v)
|
| m = re.search(r"(\d+)", str(v))
|
| return int(m.group(1)) if m else None
|
|
|
|
|
| def ensure_list(v: Any):
|
| if v is None:
|
| return []
|
| if isinstance(v, list):
|
| return v
|
| return [v]
|
|
|
|
|
| def score_trial(t: Dict[str, Any], intake: Dict[str, Any]) -> Tuple[int, List[str]]:
|
| age_local = (intake or {}).get("age")
|
| kps_local = (intake or {}).get("kps")
|
| prior_bev_local = bool((intake or {}).get("prior_bev", False))
|
| setting_local = (intake or {}).get("setting") or ""
|
| keywords_local = (intake or {}).get("keywords") or ""
|
| diagnosis_local = (intake or {}).get("diagnosis") or ""
|
|
|
| if diagnosis_local in DEFAULT_DIAG_TERMS:
|
| diag_terms = DEFAULT_DIAG_TERMS[diagnosis_local]
|
| elif diagnosis_local and diagnosis_local != "Other":
|
| diag_terms = [diagnosis_local]
|
| else:
|
| diag_terms = ["brain tumor", "CNS tumor", "spinal cord tumor"]
|
|
|
| ps = (t or {}).get("protocolSection") or {}
|
| elig = ps.get("eligibilityModule")
|
| crit = ""
|
| min_age = None
|
| max_age = None
|
| if isinstance(elig, dict):
|
| crit_raw = elig.get("eligibilityCriteria") or elig.get("criteria") or elig
|
| crit = as_text(crit_raw)
|
| min_age = parse_age_to_int(elig.get("minimumAge"))
|
| max_age = parse_age_to_int(elig.get("maximumAge"))
|
| elif isinstance(elig, str):
|
| crit = as_text(elig)
|
|
|
| phases_list = ensure_list(ps.get("designModule", {}).get("phases"))
|
| phases_up = [str(p).upper() for p in phases_list]
|
| conds_list = ensure_list(ps.get("conditionsModule", {}).get("conditions"))
|
| title = (ps.get("identificationModule", {}) or {}).get("briefTitle", "")
|
|
|
| s = 0
|
| reasons: List[str] = []
|
| if any(any(mentions(c, term) for term in diag_terms) for c in conds_list) or any(mentions(title, term) for term in diag_terms):
|
| s += 30
|
| reasons.append(f"Matches diagnosis: {diagnosis_local or 'neuro-oncology'}.")
|
| if any("PHASE 2" in p or "PHASE2" in p for p in phases_up):
|
| s += 8
|
| if any("PHASE 3" in p or "PHASE3" in p for p in phases_up):
|
| s += 12
|
| try:
|
| if min_age is not None and age_local is not None and age_local < min_age:
|
| reasons.append(f"Age below minimum ({min_age}).")
|
| s -= 30
|
| if max_age is not None and age_local is not None and age_local > max_age:
|
| reasons.append(f"Age above maximum ({max_age}).")
|
| s -= 30
|
| except Exception:
|
| pass
|
| if mentions(crit, "ECOG 0-1") and (kps_local is None or kps_local < 80):
|
| s -= 15
|
| reasons.append("Requires ECOG 0–1 (KPS ~≥80).")
|
| if mentions(crit, "Karnofsky") and (kps_local is None or kps_local < 70):
|
| s -= 10
|
| reasons.append("Requires KPS ≥70.")
|
| if prior_bev_local and mentions(crit, "no prior bevacizumab"):
|
| s -= 25
|
| reasons.append("Excludes prior bevacizumab.")
|
| if setting_local == "Recurrent" and mentions(crit, "recurrent"):
|
| s += 8
|
| if setting_local == "Newly diagnosed" and (mentions(crit, "newly diagnosed") or mentions(title, "adjuvant")):
|
| s += 8
|
| for kw in [k.strip() for k in (keywords_local or "").split(",") if k.strip()]:
|
| if mentions(title, kw) or mentions(crit, kw):
|
| s += 3
|
| return max(0, min(100, s)), reasons
|
|
|
| def extract_row(study: dict) -> dict:
|
| """Return a flat row dict for the table/PDF. Safe against missing fields."""
|
| ps = (study.get("protocolSection") or {})
|
| idm = (ps.get("identificationModule") or {})
|
| scm = (ps.get("statusModule") or {})
|
| dsm = (ps.get("designModule") or {})
|
| cdnm = (ps.get("conditionsModule") or {})
|
| slm = (ps.get("sponsorCollaboratorsModule") or {})
|
| clm = (ps.get("contactsLocationsModule") or {})
|
|
|
| title = (idm.get("officialTitle") or idm.get("briefTitle") or "").strip()
|
| nct = (idm.get("nctId") or "").strip()
|
|
|
| status_raw = (scm.get("overallStatus") or "").strip()
|
|
|
| status = status_raw.replace("_", " ").title() if status_raw else ""
|
|
|
| phases_list = ensure_list(dsm.get("phases"))
|
| phases = ", ".join(phases_list)
|
|
|
| conditions = ", ".join(ensure_list(cdnm.get("conditions")))
|
|
|
| sponsor = ""
|
| lead = slm.get("leadSponsor") or {}
|
| if isinstance(lead, dict):
|
| sponsor = (lead.get("name") or "").strip()
|
|
|
| city_country = ""
|
| locs = ensure_list(clm.get("locations"))
|
| if locs:
|
| first = locs[0]
|
| city = (first.get("locationCity") or "").strip()
|
| country = (first.get("locationCountry") or "").strip()
|
| parts = [p for p in [city, country] if p]
|
| city_country = ", ".join(parts)
|
|
|
| return {
|
| "title": title,
|
| "nct": nct,
|
| "status": status,
|
| "phases": phases,
|
| "conditions": conditions,
|
| "sponsor": sponsor,
|
| "city_country": city_country,
|
| }
|
|
|