Commit ·
bddf009
1
Parent(s): 4a31a2e
Update app.py for new hub version and download to local
Browse files
app.py
CHANGED
|
@@ -5,6 +5,8 @@ import os
|
|
| 5 |
import posixpath
|
| 6 |
from huggingface_hub import hf_hub_download
|
| 7 |
from huggingface_hub import list_repo_files
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# Replace this with your actual Hugging Face repo ID
|
| 10 |
REPO_ID = "PortPy-Project/PortPy_Dataset"
|
|
@@ -32,7 +34,7 @@ def load_all_metadata(disease_site):
|
|
| 32 |
filtered_patients = patient_df[patient_df["disease_site"] == disease_site]
|
| 33 |
|
| 34 |
metadata = {}
|
| 35 |
-
for patient_id in filtered_patients["patient_id"]:
|
| 36 |
# Load structure metadata for the patient
|
| 37 |
structs = load_structure_metadata(patient_id)
|
| 38 |
# Load beam metadata for the patient
|
|
@@ -144,8 +146,7 @@ def download_data(repo_id, patient_ids, beam_ids=None, planner_beam_ids=True, ma
|
|
| 144 |
"CT_Data.h5", "CT_MetaData.json",
|
| 145 |
"StructureSet_Data.h5", "StructureSet_MetaData.json",
|
| 146 |
"OptimizationVoxels_Data.h5", "OptimizationVoxels_MetaData.json",
|
| 147 |
-
"PlannerBeams.json"
|
| 148 |
-
"rt_dose_echo_imrt.dcm", "rt_plan_echo_imrt.dcm"
|
| 149 |
]
|
| 150 |
for filename in static_files:
|
| 151 |
hf_path = posixpath.join("data", patient_id, filename)
|
|
@@ -164,6 +165,32 @@ def download_data(repo_id, patient_ids, beam_ids=None, planner_beam_ids=True, ma
|
|
| 164 |
if attempt == max_retries - 1:
|
| 165 |
st.error(f"Failed to download {hf_path}: {e}")
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
if planner_beam_ids:
|
| 168 |
planner_file = os.path.join(local_dir, 'data', patient_id, "PlannerBeams.json")
|
| 169 |
try:
|
|
@@ -239,7 +266,10 @@ def main():
|
|
| 239 |
query_ptv_vol = st.sidebar.number_input("Minimum PTV volume (cc):", value=0)
|
| 240 |
|
| 241 |
# Checkbox: Only planner beams
|
| 242 |
-
only_planner = st.sidebar.checkbox(
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
results_df = filter_matched_data(
|
| 245 |
filtered_patients, query_ptv_vol, beam_gantry_filter,
|
|
@@ -271,21 +301,71 @@ def main():
|
|
| 271 |
# st.dataframe(pd.DataFrame(structs), use_container_width=True)
|
| 272 |
# st.subheader(f"📡 Beams for {selected_patient}")
|
| 273 |
# st.dataframe(pd.DataFrame(beams), use_container_width=True)
|
| 274 |
-
|
| 275 |
-
|
|
|
|
| 276 |
# Multi-select and download
|
| 277 |
to_download = st.sidebar.multiselect("Select Patients to Download", results_df["patient_id"].tolist())
|
| 278 |
-
local_dir = st.sidebar.text_input("Enter local directory to download data:", value="./downloaded")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
if st.sidebar.button("Download Selected Patients"):
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
patient_to_beams = {
|
| 282 |
-
row["patient_id"]: row["
|
|
|
|
|
|
|
| 283 |
}
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
|
| 290 |
# if st.button("Download Data"):
|
| 291 |
# patients_to_download = results_df["patient_id"].tolist()
|
|
@@ -293,4 +373,5 @@ def main():
|
|
| 293 |
# st.success("Download complete!")
|
| 294 |
|
| 295 |
if __name__ == "__main__":
|
| 296 |
-
main()
|
|
|
|
|
|
| 5 |
import posixpath
|
| 6 |
from huggingface_hub import hf_hub_download
|
| 7 |
from huggingface_hub import list_repo_files
|
| 8 |
+
import io
|
| 9 |
+
import zipfile
|
| 10 |
|
| 11 |
# Replace this with your actual Hugging Face repo ID
|
| 12 |
REPO_ID = "PortPy-Project/PortPy_Dataset"
|
|
|
|
| 34 |
filtered_patients = patient_df[patient_df["disease_site"] == disease_site]
|
| 35 |
|
| 36 |
metadata = {}
|
| 37 |
+
for patient_id in filtered_patients["patient_id"]: # TODO: limit for testing
|
| 38 |
# Load structure metadata for the patient
|
| 39 |
structs = load_structure_metadata(patient_id)
|
| 40 |
# Load beam metadata for the patient
|
|
|
|
| 146 |
"CT_Data.h5", "CT_MetaData.json",
|
| 147 |
"StructureSet_Data.h5", "StructureSet_MetaData.json",
|
| 148 |
"OptimizationVoxels_Data.h5", "OptimizationVoxels_MetaData.json",
|
| 149 |
+
"PlannerBeams.json"
|
|
|
|
| 150 |
]
|
| 151 |
for filename in static_files:
|
| 152 |
hf_path = posixpath.join("data", patient_id, filename)
|
|
|
|
| 165 |
if attempt == max_retries - 1:
|
| 166 |
st.error(f"Failed to download {hf_path}: {e}")
|
| 167 |
|
| 168 |
+
# ---------------------------------------------------------------
|
| 169 |
+
# 2. Download all DICOM files under data/<patient_id>/DicomFiles/
|
| 170 |
+
# ---------------------------------------------------------------
|
| 171 |
+
try:
|
| 172 |
+
all_files = list_repo_files(repo_id, repo_type="dataset")
|
| 173 |
+
dicom_prefix = f"data/{patient_id}/DicomFiles/"
|
| 174 |
+
dicom_files = [f for f in all_files if f.startswith(dicom_prefix)]
|
| 175 |
+
|
| 176 |
+
for hf_path in dicom_files:
|
| 177 |
+
for attempt in range(max_retries):
|
| 178 |
+
try:
|
| 179 |
+
local_path = hf_hub_download(
|
| 180 |
+
repo_id=repo_id,
|
| 181 |
+
repo_type="dataset",
|
| 182 |
+
filename=hf_path,
|
| 183 |
+
local_dir=local_dir,
|
| 184 |
+
token=token
|
| 185 |
+
)
|
| 186 |
+
downloaded_files.append(local_path)
|
| 187 |
+
break
|
| 188 |
+
except Exception as e:
|
| 189 |
+
if attempt == max_retries - 1:
|
| 190 |
+
st.error(f"Failed to download {hf_path}: {e}")
|
| 191 |
+
|
| 192 |
+
except Exception as e:
|
| 193 |
+
st.error(f"Error listing DICOM files for {patient_id}: {e}")
|
| 194 |
if planner_beam_ids:
|
| 195 |
planner_file = os.path.join(local_dir, 'data', patient_id, "PlannerBeams.json")
|
| 196 |
try:
|
|
|
|
| 266 |
query_ptv_vol = st.sidebar.number_input("Minimum PTV volume (cc):", value=0)
|
| 267 |
|
| 268 |
# Checkbox: Only planner beams
|
| 269 |
+
only_planner = st.sidebar.checkbox(
|
| 270 |
+
"Show only planner beams (if selected it will download only planner beams)",
|
| 271 |
+
value=True,
|
| 272 |
+
)
|
| 273 |
|
| 274 |
results_df = filter_matched_data(
|
| 275 |
filtered_patients, query_ptv_vol, beam_gantry_filter,
|
|
|
|
| 301 |
# st.dataframe(pd.DataFrame(structs), use_container_width=True)
|
| 302 |
# st.subheader(f"📡 Beams for {selected_patient}")
|
| 303 |
# st.dataframe(pd.DataFrame(beams), use_container_width=True)
|
| 304 |
+
if "open_download_expander" not in st.session_state:
|
| 305 |
+
st.session_state["open_download_expander"] = False
|
| 306 |
+
with st.expander("Download matched patients", expanded=st.session_state["open_download_expander"]):
|
| 307 |
# Multi-select and download
|
| 308 |
to_download = st.sidebar.multiselect("Select Patients to Download", results_df["patient_id"].tolist())
|
| 309 |
+
# local_dir = st.sidebar.text_input("Enter local directory to download data:", value="./downloaded")
|
| 310 |
+
# if st.sidebar.button("Download Selected Patients"):
|
| 311 |
+
# if to_download:
|
| 312 |
+
# patient_to_beams = {
|
| 313 |
+
# row["patient_id"]: row["beam_ids"] for ind, row in results_df.iterrows() if ind in to_download
|
| 314 |
+
# }
|
| 315 |
+
# for pid, beam_ids in patient_to_beams.items():
|
| 316 |
+
# download_data(REPO_ID, [pid], beam_ids=beam_ids, planner_beam_ids=False, local_dir=local_dir)
|
| 317 |
+
# st.success("Download complete!")
|
| 318 |
+
# else:
|
| 319 |
+
# st.warning("No patients selected.")
|
| 320 |
+
|
| 321 |
if st.sidebar.button("Download Selected Patients"):
|
| 322 |
+
st.session_state["open_download_expander"] = True # Force open expander
|
| 323 |
+
if not to_download:
|
| 324 |
+
st.warning("No patients selected.")
|
| 325 |
+
else:
|
| 326 |
+
progress = st.progress(0)
|
| 327 |
+
status = st.empty()
|
| 328 |
+
|
| 329 |
+
local_dir = "./downloaded"
|
| 330 |
+
os.makedirs(local_dir, exist_ok=True)
|
| 331 |
+
|
| 332 |
patient_to_beams = {
|
| 333 |
+
row["patient_id"]: row["selected_beam_ids"]
|
| 334 |
+
for _, row in results_df.iterrows()
|
| 335 |
+
if row["patient_id"] in to_download
|
| 336 |
}
|
| 337 |
+
|
| 338 |
+
total = len(patient_to_beams)
|
| 339 |
+
for i, (pid, beam_ids) in enumerate(patient_to_beams.items(), start=1):
|
| 340 |
+
status.write(f"Downloading {pid} ({i}/{total})…")
|
| 341 |
+
|
| 342 |
+
download_data(REPO_ID, [pid], beam_ids=beam_ids,
|
| 343 |
+
planner_beam_ids=only_planner,
|
| 344 |
+
local_dir=local_dir)
|
| 345 |
+
|
| 346 |
+
progress.progress(i / total)
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
status.success("All downloads complete. Preparing zip…")
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
# Create zip in memory
|
| 353 |
+
buf = io.BytesIO()
|
| 354 |
+
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
|
| 355 |
+
for root, _, files in os.walk(local_dir):
|
| 356 |
+
for f in files:
|
| 357 |
+
full_path = os.path.join(root, f)
|
| 358 |
+
rel_path = os.path.relpath(full_path, local_dir)
|
| 359 |
+
zf.write(full_path, rel_path)
|
| 360 |
+
buf.seek(0)
|
| 361 |
+
|
| 362 |
+
# Trigger file download automatically from the SAME BUTTON CLICK
|
| 363 |
+
st.download_button(
|
| 364 |
+
label="Your download is ready! Click to save.",
|
| 365 |
+
data=buf,
|
| 366 |
+
file_name="portpy_patients.zip",
|
| 367 |
+
mime="application/zip",
|
| 368 |
+
)
|
| 369 |
|
| 370 |
# if st.button("Download Data"):
|
| 371 |
# patients_to_download = results_df["patient_id"].tolist()
|
|
|
|
| 373 |
# st.success("Download complete!")
|
| 374 |
|
| 375 |
if __name__ == "__main__":
|
| 376 |
+
main()
|
| 377 |
+
#to run: streamlit run app.py
|