CoCoOne commited on
Commit
ddd1f42
·
1 Parent(s): b073ba6

Improve space validation and layout

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. app.py +124 -96
  3. requirements.txt +1 -1
  4. validator.py +17 -18
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
app.py CHANGED
@@ -44,7 +44,6 @@ CSS = """
44
  --page-text: #0f172a;
45
  --page-muted: #526075;
46
  --page-line: rgba(15, 23, 42, 0.12);
47
- --page-surface: rgba(255, 255, 255, 0.78);
48
  --page-surface-strong: #ffffff;
49
  }
50
 
@@ -81,11 +80,20 @@ textarea {
81
 
82
  .page-shell {
83
  margin-top: 26px;
84
- padding: 36px 48px 42px;
85
  background: #ffffff;
86
  border: 1px solid rgba(15, 23, 42, 0.08);
87
  border-radius: 22px;
88
  box-shadow: 0 18px 48px rgba(15, 23, 42, 0.05);
 
 
 
 
 
 
 
 
 
 
89
  }
90
 
91
  .hero {
@@ -201,15 +209,23 @@ textarea {
201
  }
202
 
203
  .main-form {
204
- padding-right: 18px;
205
  }
206
 
207
  .side-notes {
208
- padding-left: 18px;
 
 
 
 
 
 
 
 
209
  }
210
 
211
  .caption {
212
- margin-top: 4px;
213
  color: var(--page-muted);
214
  font-size: 0.93rem;
215
  line-height: 1.6;
@@ -224,11 +240,15 @@ textarea {
224
  }
225
 
226
  .results-shell {
227
- margin-top: 26px;
228
  padding-top: 22px;
229
  border-top: 1px solid var(--page-line);
230
  }
231
 
 
 
 
 
232
  .action-row {
233
  margin-top: 10px;
234
  }
@@ -330,23 +350,17 @@ textarea {
330
  color: var(--page-text) !important;
331
  }
332
 
333
- .link-list a {
334
- color: #1346a2;
335
- text-decoration: none;
336
- font-weight: 600;
337
- }
338
-
339
- .link-list a:hover {
340
- text-decoration: underline;
341
- }
342
-
343
  @media (max-width: 900px) {
344
  .gradio-container {
345
  padding: 22px 18px 42px !important;
346
  }
347
 
348
  .page-shell {
349
- padding: 26px 22px 30px;
 
 
 
 
350
  }
351
 
352
  .hero {
@@ -410,6 +424,9 @@ your_submission.zip
410
  ├── task_info.json
411
  ├── data/
412
  ├── related_work/
 
 
 
413
  └── target_study/
414
  ├── checklist.json
415
  ├── paper.pdf
@@ -495,6 +512,7 @@ def validate_submission(
495
  return None, '', '## Validation failed\n\n- Please upload a zip file.', '{}', gr.update(interactive=False), ''
496
 
497
  domain = resolve_domain(suggested_domain, custom_domain)
 
498
  metadata = SubmissionMetadata(
499
  domain=domain,
500
  submitter=submitter,
@@ -505,10 +523,10 @@ def validate_submission(
505
  )
506
 
507
  try:
508
- existing_ids = list_existing_task_ids(repo_id=DEFAULT_REPO_ID, token=load_hf_token())
509
  assigned_task_id = allocate_next_task_id(domain, existing_ids)
510
  prepared = validate_and_prepare_submission(archive_path, metadata, assigned_task_id)
511
- pr_ready = bool(load_hf_token())
512
  return (
513
  prepared.to_state(),
514
  prepared.assigned_task_id,
@@ -539,18 +557,22 @@ def validate_submission(
539
 
540
  def create_pr(state: dict | None):
541
  if not state:
542
- return '## PR creation failed\n\n- Validate a submission first.'
543
 
544
  prepared = PreparedSubmission.from_state(state)
 
545
  try:
546
- commit_info = create_dataset_pr(prepared, repo_id=DEFAULT_REPO_ID, token=load_hf_token())
547
  pr_url = commit_info.pr_url or commit_info.commit_url
548
- return '\n'.join([
549
  '## PR created',
550
  '',
551
  f'- Task ID: `{prepared.assigned_task_id}`',
552
  f'- PR: {pr_url}',
553
  ])
 
 
 
554
  finally:
555
  cleanup_work_dir(prepared.work_dir)
556
 
@@ -562,85 +584,91 @@ with gr.Blocks(title=SPACE_TITLE, fill_width=True) as demo:
562
  gr.HTML(build_hero_html())
563
 
564
  with gr.Group(elem_classes=['page-shell']):
565
- with gr.Row(elem_classes=['section-row']):
566
- with gr.Column(scale=7, elem_classes=['section-copy', 'main-form']):
567
- gr.HTML(field_label_html('Task ZIP archive'))
568
- with gr.Row(elem_classes=['upload-row']):
569
- archive = gr.UploadButton(
570
- 'Select ZIP file',
571
- file_types=['.zip'],
572
- file_count='single',
573
- type='filepath',
574
- variant='secondary',
575
- elem_classes=['upload-button'],
576
- )
577
- archive_notice = gr.Markdown('No ZIP file selected yet.', elem_classes=['upload-status'])
578
- with gr.Row():
579
- with gr.Column():
580
- gr.HTML(field_label_html('Suggested domain'))
581
- suggested_domain = gr.Dropdown(
582
- choices=list(DOMAINS),
583
- value='Astronomy',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
  show_label=False,
585
  container=False,
586
  )
587
- with gr.Column():
588
- gr.HTML(field_label_html('Custom domain (optional)'))
589
- custom_domain = gr.Textbox(
590
- placeholder='e.g. Robotics or Robot-Learning',
591
  show_label=False,
592
  container=False,
593
  )
594
- gr.Markdown(
595
- '<div class="caption">Use the custom field if your task does not belong to the suggested list. '
596
- 'If the custom field is filled, it overrides the suggested domain and becomes the prefix of the final task ID.</div>'
597
- )
598
- gr.HTML(field_label_html('Submitter name or HF username'))
599
- submitter = gr.Textbox(
600
- placeholder='e.g. your-hf-handle',
601
- show_label=False,
602
- container=False,
603
- )
604
- gr.HTML(field_label_html('Contact email'))
605
- email = gr.Textbox(
606
- placeholder='name@example.com',
607
- show_label=False,
608
- container=False,
609
- )
610
- gr.HTML(field_label_html('Target paper title'))
611
- paper_title = gr.Textbox(show_label=False, container=False)
612
- gr.HTML(field_label_html('Target paper URL or DOI'))
613
- paper_url = gr.Textbox(
614
- placeholder='https://... or DOI',
615
- show_label=False,
616
- container=False,
617
- )
618
- gr.HTML(field_label_html('Optional notes for reviewers'))
619
- notes = gr.Textbox(
620
- lines=4,
621
- placeholder='Anything maintainers should know about licensing, preprocessing, or provenance.',
622
- show_label=False,
623
- container=False,
624
- )
625
- with gr.Column(scale=5, elem_classes=['section-copy', 'side-notes']):
626
- gr.Markdown(submission_guide_markdown(), elem_classes=['subtle-block'])
627
-
628
- with gr.Row(elem_classes=['action-row']):
629
- validate_btn = gr.Button('Validate ZIP', variant='primary', elem_classes=['primary-button'])
630
- create_pr_btn = gr.Button('Create Dataset PR', interactive=False, elem_classes=['secondary-button'])
631
-
632
- with gr.Column(elem_classes=['section-copy', 'results-shell']):
633
- gr.HTML(field_label_html('Final task ID (assigned automatically)'))
634
- assigned_task_id = gr.Textbox(
635
- interactive=False,
636
- show_label=False,
637
- container=False,
638
- )
639
- gr.Markdown(final_task_help_html())
640
- validation_md = gr.Markdown()
641
- gr.HTML(field_label_html('Validation report'))
642
- validation_report = gr.Code(language='json', show_label=False, container=False)
643
- pr_md = gr.Markdown()
644
 
645
  archive.upload(fn=handle_archive_upload, inputs=[archive], outputs=[archive_state, archive_notice])
646
 
@@ -659,7 +687,7 @@ with gr.Blocks(title=SPACE_TITLE, fill_width=True) as demo:
659
  ],
660
  outputs=[state, assigned_task_id, validation_md, validation_report, create_pr_btn, pr_md],
661
  )
662
- create_pr_btn.click(fn=create_pr, inputs=[state], outputs=[pr_md])
663
 
664
 
665
  if __name__ == '__main__':
 
44
  --page-text: #0f172a;
45
  --page-muted: #526075;
46
  --page-line: rgba(15, 23, 42, 0.12);
 
47
  --page-surface-strong: #ffffff;
48
  }
49
 
 
80
 
81
  .page-shell {
82
  margin-top: 26px;
 
83
  background: #ffffff;
84
  border: 1px solid rgba(15, 23, 42, 0.08);
85
  border-radius: 22px;
86
  box-shadow: 0 18px 48px rgba(15, 23, 42, 0.05);
87
+ overflow: hidden;
88
+ padding: 34px 0 40px;
89
+ }
90
+
91
+ .page-shell-content {
92
+ gap: 0 !important;
93
+ }
94
+
95
+ .shell-spacer {
96
+ min-width: 44px !important;
97
  }
98
 
99
  .hero {
 
209
  }
210
 
211
  .main-form {
212
+ padding-right: 0;
213
  }
214
 
215
  .side-notes {
216
+ padding-left: 0;
217
+ }
218
+
219
+ .main-form > div {
220
+ padding-right: 18px !important;
221
+ }
222
+
223
+ .side-notes > div {
224
+ padding-left: 18px !important;
225
  }
226
 
227
  .caption {
228
+ margin-top: 8px;
229
  color: var(--page-muted);
230
  font-size: 0.93rem;
231
  line-height: 1.6;
 
240
  }
241
 
242
  .results-shell {
243
+ margin-top: 0;
244
  padding-top: 22px;
245
  border-top: 1px solid var(--page-line);
246
  }
247
 
248
+ .results-shell > div {
249
+ margin-top: 26px;
250
+ }
251
+
252
  .action-row {
253
  margin-top: 10px;
254
  }
 
350
  color: var(--page-text) !important;
351
  }
352
 
 
 
 
 
 
 
 
 
 
 
353
  @media (max-width: 900px) {
354
  .gradio-container {
355
  padding: 22px 18px 42px !important;
356
  }
357
 
358
  .page-shell {
359
+ padding: 24px 0 30px;
360
+ }
361
+
362
+ .shell-spacer {
363
+ min-width: 18px !important;
364
  }
365
 
366
  .hero {
 
424
  ├── task_info.json
425
  ├── data/
426
  ├── related_work/
427
+ │ ├── paper_000.pdf
428
+ │ ├── paper_001.pdf
429
+ │ └── ...
430
  └── target_study/
431
  ├── checklist.json
432
  ├── paper.pdf
 
512
  return None, '', '## Validation failed\n\n- Please upload a zip file.', '{}', gr.update(interactive=False), ''
513
 
514
  domain = resolve_domain(suggested_domain, custom_domain)
515
+ token = load_hf_token()
516
  metadata = SubmissionMetadata(
517
  domain=domain,
518
  submitter=submitter,
 
523
  )
524
 
525
  try:
526
+ existing_ids = list_existing_task_ids(repo_id=DEFAULT_REPO_ID, token=token)
527
  assigned_task_id = allocate_next_task_id(domain, existing_ids)
528
  prepared = validate_and_prepare_submission(archive_path, metadata, assigned_task_id)
529
+ pr_ready = bool(token)
530
  return (
531
  prepared.to_state(),
532
  prepared.assigned_task_id,
 
557
 
558
  def create_pr(state: dict | None):
559
  if not state:
560
+ return None, gr.update(interactive=False), '## PR creation failed\n\n- Validate a submission first.'
561
 
562
  prepared = PreparedSubmission.from_state(state)
563
+ token = load_hf_token()
564
  try:
565
+ commit_info = create_dataset_pr(prepared, repo_id=DEFAULT_REPO_ID, token=token)
566
  pr_url = commit_info.pr_url or commit_info.commit_url
567
+ message = '\n'.join([
568
  '## PR created',
569
  '',
570
  f'- Task ID: `{prepared.assigned_task_id}`',
571
  f'- PR: {pr_url}',
572
  ])
573
+ return None, gr.update(interactive=False), message
574
+ except Exception as exc:
575
+ return None, gr.update(interactive=False), build_failure_markdown(str(exc).strip() or 'Unknown PR creation error')
576
  finally:
577
  cleanup_work_dir(prepared.work_dir)
578
 
 
584
  gr.HTML(build_hero_html())
585
 
586
  with gr.Group(elem_classes=['page-shell']):
587
+ with gr.Row():
588
+ with gr.Column(scale=1, min_width=0, elem_classes=['shell-spacer']):
589
+ gr.HTML('')
590
+ with gr.Column(scale=30, min_width=0, elem_classes=['page-shell-content']):
591
+ with gr.Row(elem_classes=['section-row']):
592
+ with gr.Column(scale=7, elem_classes=['section-copy', 'main-form']):
593
+ gr.HTML(field_label_html('Task ZIP archive'))
594
+ with gr.Row(elem_classes=['upload-row']):
595
+ archive = gr.UploadButton(
596
+ 'Select ZIP file',
597
+ file_types=['.zip'],
598
+ file_count='single',
599
+ type='filepath',
600
+ variant='secondary',
601
+ elem_classes=['upload-button'],
602
+ )
603
+ archive_notice = gr.Markdown('No ZIP file selected yet.', elem_classes=['upload-status'])
604
+ with gr.Row():
605
+ with gr.Column():
606
+ gr.HTML(field_label_html('Suggested domain'))
607
+ suggested_domain = gr.Dropdown(
608
+ choices=list(DOMAINS),
609
+ value='Astronomy',
610
+ show_label=False,
611
+ container=False,
612
+ )
613
+ with gr.Column():
614
+ gr.HTML(field_label_html('Custom domain (optional)'))
615
+ custom_domain = gr.Textbox(
616
+ placeholder='e.g. Robotics or Robot-Learning',
617
+ show_label=False,
618
+ container=False,
619
+ )
620
+ gr.Markdown(
621
+ '<div class="caption">Use the custom field if your task does not belong to the suggested list. '
622
+ 'If the custom field is filled, it overrides the suggested domain and becomes the prefix of the final task ID.</div>'
623
+ )
624
+ gr.HTML(field_label_html('Submitter name or HF username'))
625
+ submitter = gr.Textbox(
626
+ placeholder='e.g. your-hf-handle',
627
  show_label=False,
628
  container=False,
629
  )
630
+ gr.HTML(field_label_html('Contact email'))
631
+ email = gr.Textbox(
632
+ placeholder='name@example.com',
 
633
  show_label=False,
634
  container=False,
635
  )
636
+ gr.HTML(field_label_html('Target paper title'))
637
+ paper_title = gr.Textbox(show_label=False, container=False)
638
+ gr.HTML(field_label_html('Target paper URL or DOI'))
639
+ paper_url = gr.Textbox(
640
+ placeholder='https://... or DOI',
641
+ show_label=False,
642
+ container=False,
643
+ )
644
+ gr.HTML(field_label_html('Optional notes for reviewers'))
645
+ notes = gr.Textbox(
646
+ lines=4,
647
+ placeholder='Anything maintainers should know about licensing, preprocessing, or provenance.',
648
+ show_label=False,
649
+ container=False,
650
+ )
651
+ with gr.Column(scale=5, elem_classes=['section-copy', 'side-notes']):
652
+ gr.Markdown(submission_guide_markdown(), elem_classes=['subtle-block'])
653
+
654
+ with gr.Row(elem_classes=['action-row']):
655
+ validate_btn = gr.Button('Validate ZIP', variant='primary', elem_classes=['primary-button'])
656
+ create_pr_btn = gr.Button('Create Dataset PR', interactive=False, elem_classes=['secondary-button'])
657
+
658
+ with gr.Column(elem_classes=['section-copy', 'results-shell']):
659
+ gr.HTML(field_label_html('Final task ID (assigned automatically)'))
660
+ assigned_task_id = gr.Textbox(
661
+ interactive=False,
662
+ show_label=False,
663
+ container=False,
664
+ )
665
+ gr.Markdown(final_task_help_html())
666
+ validation_md = gr.Markdown()
667
+ gr.HTML(field_label_html('Validation report'))
668
+ validation_report = gr.Code(language='json', show_label=False, container=False)
669
+ pr_md = gr.Markdown()
670
+ with gr.Column(scale=1, min_width=0, elem_classes=['shell-spacer']):
671
+ gr.HTML('')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
 
673
  archive.upload(fn=handle_archive_upload, inputs=[archive], outputs=[archive_state, archive_notice])
674
 
 
687
  ],
688
  outputs=[state, assigned_task_id, validation_md, validation_report, create_pr_btn, pr_md],
689
  )
690
+ create_pr_btn.click(fn=create_pr, inputs=[state], outputs=[state, create_pr_btn, pr_md])
691
 
692
 
693
  if __name__ == '__main__':
requirements.txt CHANGED
@@ -1 +1 @@
1
- huggingface_hub>=0.34.0,<1
 
1
+ huggingface_hub>=1.3.0,<2
validator.py CHANGED
@@ -317,6 +317,10 @@ def validate_task_dir(
317
  errors.append(str(exc))
318
  return errors
319
 
 
 
 
 
320
  if tuple(sorted(task_info.keys())) != EXPECTED_TASK_INFO_KEYS:
321
  errors.append(f'task_info.json keys mismatch: {sorted(task_info.keys())}')
322
 
@@ -326,10 +330,12 @@ def validate_task_dir(
326
  errors.append('task_info.json field `data` must be a list')
327
  task_info['data'] = []
328
 
329
- covered_files: set[Path] = set()
330
  declared_paths: set[str] = set()
331
  for idx, item in enumerate(task_info['data']):
332
  prefix = f'task_info.data[{idx}]'
 
 
 
333
  if tuple(sorted(item.keys())) != EXPECTED_DATA_ITEM_KEYS:
334
  errors.append(f'{prefix} keys mismatch: {sorted(item.keys())}')
335
  continue
@@ -354,27 +360,16 @@ def validate_task_dir(
354
  if not target.exists():
355
  errors.append(f'{prefix}.path does not exist: {data_path}')
356
  continue
357
- if target.is_file():
358
- covered_files.add(target)
359
- elif target.is_dir():
360
  nested_files = {p for p in target.rglob('*') if p.is_file()}
361
  if not nested_files:
362
  errors.append(f'{prefix}.path points to an empty directory: {data_path}')
363
- covered_files.update(nested_files)
364
- else:
365
  errors.append(f'{prefix}.path is neither file nor directory: {data_path}')
366
  description = item.get('description', '')
367
  if any(token in description for token in STALE_TOKENS):
368
  errors.append(f'{prefix}.description still contains stale source paths or legacy directories')
369
 
370
- actual_data_files = _iter_visible_files(data_dir) if data_dir.exists() else set()
371
- uncovered = sorted(actual_data_files - covered_files)
372
- if uncovered:
373
- errors.append('data/ contains undeclared files: ' + ', '.join(rel(p, task_dir) for p in uncovered[:20]))
374
- missing_backing = sorted(covered_files - actual_data_files)
375
- if missing_backing:
376
- errors.append('declared data coverage points outside data/: ' + ', '.join(rel(p, task_dir) for p in missing_backing[:20]))
377
-
378
  related_entries = sorted(related_dir.iterdir(), key=lambda p: p.name) if related_dir.exists() else []
379
  related_files = [p for p in related_entries if p.is_file()]
380
  related_dirs = [p for p in related_entries if p.is_dir()]
@@ -400,13 +395,20 @@ def validate_task_dir(
400
  errors.append(str(exc))
401
  return errors
402
 
403
- if not isinstance(checklist, list) or not checklist:
 
 
 
 
404
  errors.append('checklist.json must be a non-empty list')
405
  checklist = []
406
 
407
  referenced_images: set[str] = set()
408
  for idx, item in enumerate(checklist):
409
  prefix = f'checklist[{idx}]'
 
 
 
410
  if tuple(sorted(item.keys())) != EXPECTED_CHECKLIST_ITEM_KEYS:
411
  errors.append(f'{prefix} keys mismatch: {sorted(item.keys())}')
412
  continue
@@ -437,10 +439,7 @@ def validate_task_dir(
437
  referenced_images.add(path_value)
438
 
439
  actual_image_files = {str(p.relative_to(target_dir)) for p in _iter_visible_files(images_dir)} if images_dir.exists() else set()
440
- extra_images = sorted(actual_image_files - referenced_images)
441
  missing_images = sorted(referenced_images - actual_image_files)
442
- if extra_images:
443
- errors.append('target_study/images contains unreferenced files: ' + ', '.join(extra_images[:20]))
444
  if missing_images:
445
  errors.append('checklist image references are missing from target_study/images: ' + ', '.join(missing_images[:20]))
446
 
 
317
  errors.append(str(exc))
318
  return errors
319
 
320
+ if not isinstance(task_info, dict):
321
+ errors.append('task_info.json root must be a JSON object')
322
+ return errors
323
+
324
  if tuple(sorted(task_info.keys())) != EXPECTED_TASK_INFO_KEYS:
325
  errors.append(f'task_info.json keys mismatch: {sorted(task_info.keys())}')
326
 
 
330
  errors.append('task_info.json field `data` must be a list')
331
  task_info['data'] = []
332
 
 
333
  declared_paths: set[str] = set()
334
  for idx, item in enumerate(task_info['data']):
335
  prefix = f'task_info.data[{idx}]'
336
+ if not isinstance(item, dict):
337
+ errors.append(f'{prefix} must be an object')
338
+ continue
339
  if tuple(sorted(item.keys())) != EXPECTED_DATA_ITEM_KEYS:
340
  errors.append(f'{prefix} keys mismatch: {sorted(item.keys())}')
341
  continue
 
360
  if not target.exists():
361
  errors.append(f'{prefix}.path does not exist: {data_path}')
362
  continue
363
+ if target.is_dir():
 
 
364
  nested_files = {p for p in target.rglob('*') if p.is_file()}
365
  if not nested_files:
366
  errors.append(f'{prefix}.path points to an empty directory: {data_path}')
367
+ elif not target.is_file():
 
368
  errors.append(f'{prefix}.path is neither file nor directory: {data_path}')
369
  description = item.get('description', '')
370
  if any(token in description for token in STALE_TOKENS):
371
  errors.append(f'{prefix}.description still contains stale source paths or legacy directories')
372
 
 
 
 
 
 
 
 
 
373
  related_entries = sorted(related_dir.iterdir(), key=lambda p: p.name) if related_dir.exists() else []
374
  related_files = [p for p in related_entries if p.is_file()]
375
  related_dirs = [p for p in related_entries if p.is_dir()]
 
395
  errors.append(str(exc))
396
  return errors
397
 
398
+ if not isinstance(checklist, list):
399
+ errors.append('checklist.json root must be a list')
400
+ return errors
401
+
402
+ if not checklist:
403
  errors.append('checklist.json must be a non-empty list')
404
  checklist = []
405
 
406
  referenced_images: set[str] = set()
407
  for idx, item in enumerate(checklist):
408
  prefix = f'checklist[{idx}]'
409
+ if not isinstance(item, dict):
410
+ errors.append(f'{prefix} must be an object')
411
+ continue
412
  if tuple(sorted(item.keys())) != EXPECTED_CHECKLIST_ITEM_KEYS:
413
  errors.append(f'{prefix} keys mismatch: {sorted(item.keys())}')
414
  continue
 
439
  referenced_images.add(path_value)
440
 
441
  actual_image_files = {str(p.relative_to(target_dir)) for p in _iter_visible_files(images_dir)} if images_dir.exists() else set()
 
442
  missing_images = sorted(referenced_images - actual_image_files)
 
 
443
  if missing_images:
444
  errors.append('checklist image references are missing from target_study/images: ' + ', '.join(missing_images[:20]))
445