gourav3017 commited on
Commit
bddf009
·
1 Parent(s): 4a31a2e

Update app.py for new hub version and download to local

Browse files
Files changed (1) hide show
  1. app.py +96 -15
app.py CHANGED
@@ -5,6 +5,8 @@ import os
5
  import posixpath
6
  from huggingface_hub import hf_hub_download
7
  from huggingface_hub import list_repo_files
 
 
8
 
9
  # Replace this with your actual Hugging Face repo ID
10
  REPO_ID = "PortPy-Project/PortPy_Dataset"
@@ -32,7 +34,7 @@ def load_all_metadata(disease_site):
32
  filtered_patients = patient_df[patient_df["disease_site"] == disease_site]
33
 
34
  metadata = {}
35
- for patient_id in filtered_patients["patient_id"]:
36
  # Load structure metadata for the patient
37
  structs = load_structure_metadata(patient_id)
38
  # Load beam metadata for the patient
@@ -144,8 +146,7 @@ def download_data(repo_id, patient_ids, beam_ids=None, planner_beam_ids=True, ma
144
  "CT_Data.h5", "CT_MetaData.json",
145
  "StructureSet_Data.h5", "StructureSet_MetaData.json",
146
  "OptimizationVoxels_Data.h5", "OptimizationVoxels_MetaData.json",
147
- "PlannerBeams.json",
148
- "rt_dose_echo_imrt.dcm", "rt_plan_echo_imrt.dcm"
149
  ]
150
  for filename in static_files:
151
  hf_path = posixpath.join("data", patient_id, filename)
@@ -164,6 +165,32 @@ def download_data(repo_id, patient_ids, beam_ids=None, planner_beam_ids=True, ma
164
  if attempt == max_retries - 1:
165
  st.error(f"Failed to download {hf_path}: {e}")
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  if planner_beam_ids:
168
  planner_file = os.path.join(local_dir, 'data', patient_id, "PlannerBeams.json")
169
  try:
@@ -239,7 +266,10 @@ def main():
239
  query_ptv_vol = st.sidebar.number_input("Minimum PTV volume (cc):", value=0)
240
 
241
  # Checkbox: Only planner beams
242
- only_planner = st.sidebar.checkbox("Show only planner beams", value=True)
 
 
 
243
 
244
  results_df = filter_matched_data(
245
  filtered_patients, query_ptv_vol, beam_gantry_filter,
@@ -271,21 +301,71 @@ def main():
271
  # st.dataframe(pd.DataFrame(structs), use_container_width=True)
272
  # st.subheader(f"📡 Beams for {selected_patient}")
273
  # st.dataframe(pd.DataFrame(beams), use_container_width=True)
274
-
275
- with st.expander("Download matched patients"):
 
276
  # Multi-select and download
277
  to_download = st.sidebar.multiselect("Select Patients to Download", results_df["patient_id"].tolist())
278
- local_dir = st.sidebar.text_input("Enter local directory to download data:", value="./downloaded")
 
 
 
 
 
 
 
 
 
 
 
279
  if st.sidebar.button("Download Selected Patients"):
280
- if to_download:
 
 
 
 
 
 
 
 
 
281
  patient_to_beams = {
282
- row["patient_id"]: row["beam_ids"] for ind, row in results_df.iterrows() if ind in to_download
 
 
283
  }
284
- for pid, beam_ids in patient_to_beams.items():
285
- download_data(REPO_ID, [pid], beam_ids=beam_ids, planner_beam_ids=False, local_dir=local_dir)
286
- st.success("Download complete!")
287
- else:
288
- st.warning("No patients selected.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
  # if st.button("Download Data"):
291
  # patients_to_download = results_df["patient_id"].tolist()
@@ -293,4 +373,5 @@ def main():
293
  # st.success("Download complete!")
294
 
295
  if __name__ == "__main__":
296
- main()
 
 
5
  import posixpath
6
  from huggingface_hub import hf_hub_download
7
  from huggingface_hub import list_repo_files
8
+ import io
9
+ import zipfile
10
 
11
  # Replace this with your actual Hugging Face repo ID
12
  REPO_ID = "PortPy-Project/PortPy_Dataset"
 
34
  filtered_patients = patient_df[patient_df["disease_site"] == disease_site]
35
 
36
  metadata = {}
37
+ for patient_id in filtered_patients["patient_id"]: # TODO: limit for testing
38
  # Load structure metadata for the patient
39
  structs = load_structure_metadata(patient_id)
40
  # Load beam metadata for the patient
 
146
  "CT_Data.h5", "CT_MetaData.json",
147
  "StructureSet_Data.h5", "StructureSet_MetaData.json",
148
  "OptimizationVoxels_Data.h5", "OptimizationVoxels_MetaData.json",
149
+ "PlannerBeams.json"
 
150
  ]
151
  for filename in static_files:
152
  hf_path = posixpath.join("data", patient_id, filename)
 
165
  if attempt == max_retries - 1:
166
  st.error(f"Failed to download {hf_path}: {e}")
167
 
168
+ # ---------------------------------------------------------------
169
+ # 2. Download all DICOM files under data/<patient_id>/DicomFiles/
170
+ # ---------------------------------------------------------------
171
+ try:
172
+ all_files = list_repo_files(repo_id, repo_type="dataset")
173
+ dicom_prefix = f"data/{patient_id}/DicomFiles/"
174
+ dicom_files = [f for f in all_files if f.startswith(dicom_prefix)]
175
+
176
+ for hf_path in dicom_files:
177
+ for attempt in range(max_retries):
178
+ try:
179
+ local_path = hf_hub_download(
180
+ repo_id=repo_id,
181
+ repo_type="dataset",
182
+ filename=hf_path,
183
+ local_dir=local_dir,
184
+ token=token
185
+ )
186
+ downloaded_files.append(local_path)
187
+ break
188
+ except Exception as e:
189
+ if attempt == max_retries - 1:
190
+ st.error(f"Failed to download {hf_path}: {e}")
191
+
192
+ except Exception as e:
193
+ st.error(f"Error listing DICOM files for {patient_id}: {e}")
194
  if planner_beam_ids:
195
  planner_file = os.path.join(local_dir, 'data', patient_id, "PlannerBeams.json")
196
  try:
 
266
  query_ptv_vol = st.sidebar.number_input("Minimum PTV volume (cc):", value=0)
267
 
268
  # Checkbox: Only planner beams
269
+ only_planner = st.sidebar.checkbox(
270
+ "Show only planner beams (if selected it will download only planner beams)",
271
+ value=True,
272
+ )
273
 
274
  results_df = filter_matched_data(
275
  filtered_patients, query_ptv_vol, beam_gantry_filter,
 
301
  # st.dataframe(pd.DataFrame(structs), use_container_width=True)
302
  # st.subheader(f"📡 Beams for {selected_patient}")
303
  # st.dataframe(pd.DataFrame(beams), use_container_width=True)
304
+ if "open_download_expander" not in st.session_state:
305
+ st.session_state["open_download_expander"] = False
306
+ with st.expander("Download matched patients", expanded=st.session_state["open_download_expander"]):
307
  # Multi-select and download
308
  to_download = st.sidebar.multiselect("Select Patients to Download", results_df["patient_id"].tolist())
309
+ # local_dir = st.sidebar.text_input("Enter local directory to download data:", value="./downloaded")
310
+ # if st.sidebar.button("Download Selected Patients"):
311
+ # if to_download:
312
+ # patient_to_beams = {
313
+ # row["patient_id"]: row["beam_ids"] for ind, row in results_df.iterrows() if ind in to_download
314
+ # }
315
+ # for pid, beam_ids in patient_to_beams.items():
316
+ # download_data(REPO_ID, [pid], beam_ids=beam_ids, planner_beam_ids=False, local_dir=local_dir)
317
+ # st.success("Download complete!")
318
+ # else:
319
+ # st.warning("No patients selected.")
320
+
321
  if st.sidebar.button("Download Selected Patients"):
322
+ st.session_state["open_download_expander"] = True # Force open expander
323
+ if not to_download:
324
+ st.warning("No patients selected.")
325
+ else:
326
+ progress = st.progress(0)
327
+ status = st.empty()
328
+
329
+ local_dir = "./downloaded"
330
+ os.makedirs(local_dir, exist_ok=True)
331
+
332
  patient_to_beams = {
333
+ row["patient_id"]: row["selected_beam_ids"]
334
+ for _, row in results_df.iterrows()
335
+ if row["patient_id"] in to_download
336
  }
337
+
338
+ total = len(patient_to_beams)
339
+ for i, (pid, beam_ids) in enumerate(patient_to_beams.items(), start=1):
340
+ status.write(f"Downloading {pid} ({i}/{total})…")
341
+
342
+ download_data(REPO_ID, [pid], beam_ids=beam_ids,
343
+ planner_beam_ids=only_planner,
344
+ local_dir=local_dir)
345
+
346
+ progress.progress(i / total)
347
+
348
+
349
+ status.success("All downloads complete. Preparing zip…")
350
+
351
+
352
+ # Create zip in memory
353
+ buf = io.BytesIO()
354
+ with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
355
+ for root, _, files in os.walk(local_dir):
356
+ for f in files:
357
+ full_path = os.path.join(root, f)
358
+ rel_path = os.path.relpath(full_path, local_dir)
359
+ zf.write(full_path, rel_path)
360
+ buf.seek(0)
361
+
362
+ # Trigger file download automatically from the SAME BUTTON CLICK
363
+ st.download_button(
364
+ label="Your download is ready! Click to save.",
365
+ data=buf,
366
+ file_name="portpy_patients.zip",
367
+ mime="application/zip",
368
+ )
369
 
370
  # if st.button("Download Data"):
371
  # patients_to_download = results_df["patient_id"].tolist()
 
373
  # st.success("Download complete!")
374
 
375
  if __name__ == "__main__":
376
+ main()
377
+ #to run: streamlit run app.py