Improve space validation and layout
Browse files- .gitignore +2 -0
- app.py +124 -96
- requirements.txt +1 -1
- validator.py +17 -18
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.py[cod]
|
app.py
CHANGED
|
@@ -44,7 +44,6 @@ CSS = """
|
|
| 44 |
--page-text: #0f172a;
|
| 45 |
--page-muted: #526075;
|
| 46 |
--page-line: rgba(15, 23, 42, 0.12);
|
| 47 |
-
--page-surface: rgba(255, 255, 255, 0.78);
|
| 48 |
--page-surface-strong: #ffffff;
|
| 49 |
}
|
| 50 |
|
|
@@ -81,11 +80,20 @@ textarea {
|
|
| 81 |
|
| 82 |
.page-shell {
|
| 83 |
margin-top: 26px;
|
| 84 |
-
padding: 36px 48px 42px;
|
| 85 |
background: #ffffff;
|
| 86 |
border: 1px solid rgba(15, 23, 42, 0.08);
|
| 87 |
border-radius: 22px;
|
| 88 |
box-shadow: 0 18px 48px rgba(15, 23, 42, 0.05);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
}
|
| 90 |
|
| 91 |
.hero {
|
|
@@ -201,15 +209,23 @@ textarea {
|
|
| 201 |
}
|
| 202 |
|
| 203 |
.main-form {
|
| 204 |
-
padding-right:
|
| 205 |
}
|
| 206 |
|
| 207 |
.side-notes {
|
| 208 |
-
padding-left:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
}
|
| 210 |
|
| 211 |
.caption {
|
| 212 |
-
margin-top:
|
| 213 |
color: var(--page-muted);
|
| 214 |
font-size: 0.93rem;
|
| 215 |
line-height: 1.6;
|
|
@@ -224,11 +240,15 @@ textarea {
|
|
| 224 |
}
|
| 225 |
|
| 226 |
.results-shell {
|
| 227 |
-
margin-top:
|
| 228 |
padding-top: 22px;
|
| 229 |
border-top: 1px solid var(--page-line);
|
| 230 |
}
|
| 231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
.action-row {
|
| 233 |
margin-top: 10px;
|
| 234 |
}
|
|
@@ -330,23 +350,17 @@ textarea {
|
|
| 330 |
color: var(--page-text) !important;
|
| 331 |
}
|
| 332 |
|
| 333 |
-
.link-list a {
|
| 334 |
-
color: #1346a2;
|
| 335 |
-
text-decoration: none;
|
| 336 |
-
font-weight: 600;
|
| 337 |
-
}
|
| 338 |
-
|
| 339 |
-
.link-list a:hover {
|
| 340 |
-
text-decoration: underline;
|
| 341 |
-
}
|
| 342 |
-
|
| 343 |
@media (max-width: 900px) {
|
| 344 |
.gradio-container {
|
| 345 |
padding: 22px 18px 42px !important;
|
| 346 |
}
|
| 347 |
|
| 348 |
.page-shell {
|
| 349 |
-
padding:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
}
|
| 351 |
|
| 352 |
.hero {
|
|
@@ -410,6 +424,9 @@ your_submission.zip
|
|
| 410 |
├── task_info.json
|
| 411 |
├── data/
|
| 412 |
├── related_work/
|
|
|
|
|
|
|
|
|
|
| 413 |
└── target_study/
|
| 414 |
├── checklist.json
|
| 415 |
├── paper.pdf
|
|
@@ -495,6 +512,7 @@ def validate_submission(
|
|
| 495 |
return None, '', '## Validation failed\n\n- Please upload a zip file.', '{}', gr.update(interactive=False), ''
|
| 496 |
|
| 497 |
domain = resolve_domain(suggested_domain, custom_domain)
|
|
|
|
| 498 |
metadata = SubmissionMetadata(
|
| 499 |
domain=domain,
|
| 500 |
submitter=submitter,
|
|
@@ -505,10 +523,10 @@ def validate_submission(
|
|
| 505 |
)
|
| 506 |
|
| 507 |
try:
|
| 508 |
-
existing_ids = list_existing_task_ids(repo_id=DEFAULT_REPO_ID, token=
|
| 509 |
assigned_task_id = allocate_next_task_id(domain, existing_ids)
|
| 510 |
prepared = validate_and_prepare_submission(archive_path, metadata, assigned_task_id)
|
| 511 |
-
pr_ready = bool(
|
| 512 |
return (
|
| 513 |
prepared.to_state(),
|
| 514 |
prepared.assigned_task_id,
|
|
@@ -539,18 +557,22 @@ def validate_submission(
|
|
| 539 |
|
| 540 |
def create_pr(state: dict | None):
|
| 541 |
if not state:
|
| 542 |
-
return '## PR creation failed\n\n- Validate a submission first.'
|
| 543 |
|
| 544 |
prepared = PreparedSubmission.from_state(state)
|
|
|
|
| 545 |
try:
|
| 546 |
-
commit_info = create_dataset_pr(prepared, repo_id=DEFAULT_REPO_ID, token=
|
| 547 |
pr_url = commit_info.pr_url or commit_info.commit_url
|
| 548 |
-
|
| 549 |
'## PR created',
|
| 550 |
'',
|
| 551 |
f'- Task ID: `{prepared.assigned_task_id}`',
|
| 552 |
f'- PR: {pr_url}',
|
| 553 |
])
|
|
|
|
|
|
|
|
|
|
| 554 |
finally:
|
| 555 |
cleanup_work_dir(prepared.work_dir)
|
| 556 |
|
|
@@ -562,85 +584,91 @@ with gr.Blocks(title=SPACE_TITLE, fill_width=True) as demo:
|
|
| 562 |
gr.HTML(build_hero_html())
|
| 563 |
|
| 564 |
with gr.Group(elem_classes=['page-shell']):
|
| 565 |
-
with gr.Row(
|
| 566 |
-
with gr.Column(scale=
|
| 567 |
-
gr.HTML(
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
show_label=False,
|
| 585 |
container=False,
|
| 586 |
)
|
| 587 |
-
|
| 588 |
-
gr.
|
| 589 |
-
|
| 590 |
-
placeholder='e.g. Robotics or Robot-Learning',
|
| 591 |
show_label=False,
|
| 592 |
container=False,
|
| 593 |
)
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
gr.
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
create_pr_btn = gr.Button('Create Dataset PR', interactive=False, elem_classes=['secondary-button'])
|
| 631 |
-
|
| 632 |
-
with gr.Column(elem_classes=['section-copy', 'results-shell']):
|
| 633 |
-
gr.HTML(field_label_html('Final task ID (assigned automatically)'))
|
| 634 |
-
assigned_task_id = gr.Textbox(
|
| 635 |
-
interactive=False,
|
| 636 |
-
show_label=False,
|
| 637 |
-
container=False,
|
| 638 |
-
)
|
| 639 |
-
gr.Markdown(final_task_help_html())
|
| 640 |
-
validation_md = gr.Markdown()
|
| 641 |
-
gr.HTML(field_label_html('Validation report'))
|
| 642 |
-
validation_report = gr.Code(language='json', show_label=False, container=False)
|
| 643 |
-
pr_md = gr.Markdown()
|
| 644 |
|
| 645 |
archive.upload(fn=handle_archive_upload, inputs=[archive], outputs=[archive_state, archive_notice])
|
| 646 |
|
|
@@ -659,7 +687,7 @@ with gr.Blocks(title=SPACE_TITLE, fill_width=True) as demo:
|
|
| 659 |
],
|
| 660 |
outputs=[state, assigned_task_id, validation_md, validation_report, create_pr_btn, pr_md],
|
| 661 |
)
|
| 662 |
-
create_pr_btn.click(fn=create_pr, inputs=[state], outputs=[pr_md])
|
| 663 |
|
| 664 |
|
| 665 |
if __name__ == '__main__':
|
|
|
|
| 44 |
--page-text: #0f172a;
|
| 45 |
--page-muted: #526075;
|
| 46 |
--page-line: rgba(15, 23, 42, 0.12);
|
|
|
|
| 47 |
--page-surface-strong: #ffffff;
|
| 48 |
}
|
| 49 |
|
|
|
|
| 80 |
|
| 81 |
.page-shell {
|
| 82 |
margin-top: 26px;
|
|
|
|
| 83 |
background: #ffffff;
|
| 84 |
border: 1px solid rgba(15, 23, 42, 0.08);
|
| 85 |
border-radius: 22px;
|
| 86 |
box-shadow: 0 18px 48px rgba(15, 23, 42, 0.05);
|
| 87 |
+
overflow: hidden;
|
| 88 |
+
padding: 34px 0 40px;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
.page-shell-content {
|
| 92 |
+
gap: 0 !important;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
.shell-spacer {
|
| 96 |
+
min-width: 44px !important;
|
| 97 |
}
|
| 98 |
|
| 99 |
.hero {
|
|
|
|
| 209 |
}
|
| 210 |
|
| 211 |
.main-form {
|
| 212 |
+
padding-right: 0;
|
| 213 |
}
|
| 214 |
|
| 215 |
.side-notes {
|
| 216 |
+
padding-left: 0;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
.main-form > div {
|
| 220 |
+
padding-right: 18px !important;
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
.side-notes > div {
|
| 224 |
+
padding-left: 18px !important;
|
| 225 |
}
|
| 226 |
|
| 227 |
.caption {
|
| 228 |
+
margin-top: 8px;
|
| 229 |
color: var(--page-muted);
|
| 230 |
font-size: 0.93rem;
|
| 231 |
line-height: 1.6;
|
|
|
|
| 240 |
}
|
| 241 |
|
| 242 |
.results-shell {
|
| 243 |
+
margin-top: 0;
|
| 244 |
padding-top: 22px;
|
| 245 |
border-top: 1px solid var(--page-line);
|
| 246 |
}
|
| 247 |
|
| 248 |
+
.results-shell > div {
|
| 249 |
+
margin-top: 26px;
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
.action-row {
|
| 253 |
margin-top: 10px;
|
| 254 |
}
|
|
|
|
| 350 |
color: var(--page-text) !important;
|
| 351 |
}
|
| 352 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
@media (max-width: 900px) {
|
| 354 |
.gradio-container {
|
| 355 |
padding: 22px 18px 42px !important;
|
| 356 |
}
|
| 357 |
|
| 358 |
.page-shell {
|
| 359 |
+
padding: 24px 0 30px;
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
.shell-spacer {
|
| 363 |
+
min-width: 18px !important;
|
| 364 |
}
|
| 365 |
|
| 366 |
.hero {
|
|
|
|
| 424 |
├── task_info.json
|
| 425 |
├── data/
|
| 426 |
├── related_work/
|
| 427 |
+
│ ├── paper_000.pdf
|
| 428 |
+
│ ├── paper_001.pdf
|
| 429 |
+
│ └── ...
|
| 430 |
└── target_study/
|
| 431 |
├── checklist.json
|
| 432 |
├── paper.pdf
|
|
|
|
| 512 |
return None, '', '## Validation failed\n\n- Please upload a zip file.', '{}', gr.update(interactive=False), ''
|
| 513 |
|
| 514 |
domain = resolve_domain(suggested_domain, custom_domain)
|
| 515 |
+
token = load_hf_token()
|
| 516 |
metadata = SubmissionMetadata(
|
| 517 |
domain=domain,
|
| 518 |
submitter=submitter,
|
|
|
|
| 523 |
)
|
| 524 |
|
| 525 |
try:
|
| 526 |
+
existing_ids = list_existing_task_ids(repo_id=DEFAULT_REPO_ID, token=token)
|
| 527 |
assigned_task_id = allocate_next_task_id(domain, existing_ids)
|
| 528 |
prepared = validate_and_prepare_submission(archive_path, metadata, assigned_task_id)
|
| 529 |
+
pr_ready = bool(token)
|
| 530 |
return (
|
| 531 |
prepared.to_state(),
|
| 532 |
prepared.assigned_task_id,
|
|
|
|
| 557 |
|
| 558 |
def create_pr(state: dict | None):
|
| 559 |
if not state:
|
| 560 |
+
return None, gr.update(interactive=False), '## PR creation failed\n\n- Validate a submission first.'
|
| 561 |
|
| 562 |
prepared = PreparedSubmission.from_state(state)
|
| 563 |
+
token = load_hf_token()
|
| 564 |
try:
|
| 565 |
+
commit_info = create_dataset_pr(prepared, repo_id=DEFAULT_REPO_ID, token=token)
|
| 566 |
pr_url = commit_info.pr_url or commit_info.commit_url
|
| 567 |
+
message = '\n'.join([
|
| 568 |
'## PR created',
|
| 569 |
'',
|
| 570 |
f'- Task ID: `{prepared.assigned_task_id}`',
|
| 571 |
f'- PR: {pr_url}',
|
| 572 |
])
|
| 573 |
+
return None, gr.update(interactive=False), message
|
| 574 |
+
except Exception as exc:
|
| 575 |
+
return None, gr.update(interactive=False), build_failure_markdown(str(exc).strip() or 'Unknown PR creation error')
|
| 576 |
finally:
|
| 577 |
cleanup_work_dir(prepared.work_dir)
|
| 578 |
|
|
|
|
| 584 |
gr.HTML(build_hero_html())
|
| 585 |
|
| 586 |
with gr.Group(elem_classes=['page-shell']):
|
| 587 |
+
with gr.Row():
|
| 588 |
+
with gr.Column(scale=1, min_width=0, elem_classes=['shell-spacer']):
|
| 589 |
+
gr.HTML('')
|
| 590 |
+
with gr.Column(scale=30, min_width=0, elem_classes=['page-shell-content']):
|
| 591 |
+
with gr.Row(elem_classes=['section-row']):
|
| 592 |
+
with gr.Column(scale=7, elem_classes=['section-copy', 'main-form']):
|
| 593 |
+
gr.HTML(field_label_html('Task ZIP archive'))
|
| 594 |
+
with gr.Row(elem_classes=['upload-row']):
|
| 595 |
+
archive = gr.UploadButton(
|
| 596 |
+
'Select ZIP file',
|
| 597 |
+
file_types=['.zip'],
|
| 598 |
+
file_count='single',
|
| 599 |
+
type='filepath',
|
| 600 |
+
variant='secondary',
|
| 601 |
+
elem_classes=['upload-button'],
|
| 602 |
+
)
|
| 603 |
+
archive_notice = gr.Markdown('No ZIP file selected yet.', elem_classes=['upload-status'])
|
| 604 |
+
with gr.Row():
|
| 605 |
+
with gr.Column():
|
| 606 |
+
gr.HTML(field_label_html('Suggested domain'))
|
| 607 |
+
suggested_domain = gr.Dropdown(
|
| 608 |
+
choices=list(DOMAINS),
|
| 609 |
+
value='Astronomy',
|
| 610 |
+
show_label=False,
|
| 611 |
+
container=False,
|
| 612 |
+
)
|
| 613 |
+
with gr.Column():
|
| 614 |
+
gr.HTML(field_label_html('Custom domain (optional)'))
|
| 615 |
+
custom_domain = gr.Textbox(
|
| 616 |
+
placeholder='e.g. Robotics or Robot-Learning',
|
| 617 |
+
show_label=False,
|
| 618 |
+
container=False,
|
| 619 |
+
)
|
| 620 |
+
gr.Markdown(
|
| 621 |
+
'<div class="caption">Use the custom field if your task does not belong to the suggested list. '
|
| 622 |
+
'If the custom field is filled, it overrides the suggested domain and becomes the prefix of the final task ID.</div>'
|
| 623 |
+
)
|
| 624 |
+
gr.HTML(field_label_html('Submitter name or HF username'))
|
| 625 |
+
submitter = gr.Textbox(
|
| 626 |
+
placeholder='e.g. your-hf-handle',
|
| 627 |
show_label=False,
|
| 628 |
container=False,
|
| 629 |
)
|
| 630 |
+
gr.HTML(field_label_html('Contact email'))
|
| 631 |
+
email = gr.Textbox(
|
| 632 |
+
placeholder='name@example.com',
|
|
|
|
| 633 |
show_label=False,
|
| 634 |
container=False,
|
| 635 |
)
|
| 636 |
+
gr.HTML(field_label_html('Target paper title'))
|
| 637 |
+
paper_title = gr.Textbox(show_label=False, container=False)
|
| 638 |
+
gr.HTML(field_label_html('Target paper URL or DOI'))
|
| 639 |
+
paper_url = gr.Textbox(
|
| 640 |
+
placeholder='https://... or DOI',
|
| 641 |
+
show_label=False,
|
| 642 |
+
container=False,
|
| 643 |
+
)
|
| 644 |
+
gr.HTML(field_label_html('Optional notes for reviewers'))
|
| 645 |
+
notes = gr.Textbox(
|
| 646 |
+
lines=4,
|
| 647 |
+
placeholder='Anything maintainers should know about licensing, preprocessing, or provenance.',
|
| 648 |
+
show_label=False,
|
| 649 |
+
container=False,
|
| 650 |
+
)
|
| 651 |
+
with gr.Column(scale=5, elem_classes=['section-copy', 'side-notes']):
|
| 652 |
+
gr.Markdown(submission_guide_markdown(), elem_classes=['subtle-block'])
|
| 653 |
+
|
| 654 |
+
with gr.Row(elem_classes=['action-row']):
|
| 655 |
+
validate_btn = gr.Button('Validate ZIP', variant='primary', elem_classes=['primary-button'])
|
| 656 |
+
create_pr_btn = gr.Button('Create Dataset PR', interactive=False, elem_classes=['secondary-button'])
|
| 657 |
+
|
| 658 |
+
with gr.Column(elem_classes=['section-copy', 'results-shell']):
|
| 659 |
+
gr.HTML(field_label_html('Final task ID (assigned automatically)'))
|
| 660 |
+
assigned_task_id = gr.Textbox(
|
| 661 |
+
interactive=False,
|
| 662 |
+
show_label=False,
|
| 663 |
+
container=False,
|
| 664 |
+
)
|
| 665 |
+
gr.Markdown(final_task_help_html())
|
| 666 |
+
validation_md = gr.Markdown()
|
| 667 |
+
gr.HTML(field_label_html('Validation report'))
|
| 668 |
+
validation_report = gr.Code(language='json', show_label=False, container=False)
|
| 669 |
+
pr_md = gr.Markdown()
|
| 670 |
+
with gr.Column(scale=1, min_width=0, elem_classes=['shell-spacer']):
|
| 671 |
+
gr.HTML('')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 672 |
|
| 673 |
archive.upload(fn=handle_archive_upload, inputs=[archive], outputs=[archive_state, archive_notice])
|
| 674 |
|
|
|
|
| 687 |
],
|
| 688 |
outputs=[state, assigned_task_id, validation_md, validation_report, create_pr_btn, pr_md],
|
| 689 |
)
|
| 690 |
+
create_pr_btn.click(fn=create_pr, inputs=[state], outputs=[state, create_pr_btn, pr_md])
|
| 691 |
|
| 692 |
|
| 693 |
if __name__ == '__main__':
|
requirements.txt
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
huggingface_hub>=
|
|
|
|
| 1 |
+
huggingface_hub>=1.3.0,<2
|
validator.py
CHANGED
|
@@ -317,6 +317,10 @@ def validate_task_dir(
|
|
| 317 |
errors.append(str(exc))
|
| 318 |
return errors
|
| 319 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
if tuple(sorted(task_info.keys())) != EXPECTED_TASK_INFO_KEYS:
|
| 321 |
errors.append(f'task_info.json keys mismatch: {sorted(task_info.keys())}')
|
| 322 |
|
|
@@ -326,10 +330,12 @@ def validate_task_dir(
|
|
| 326 |
errors.append('task_info.json field `data` must be a list')
|
| 327 |
task_info['data'] = []
|
| 328 |
|
| 329 |
-
covered_files: set[Path] = set()
|
| 330 |
declared_paths: set[str] = set()
|
| 331 |
for idx, item in enumerate(task_info['data']):
|
| 332 |
prefix = f'task_info.data[{idx}]'
|
|
|
|
|
|
|
|
|
|
| 333 |
if tuple(sorted(item.keys())) != EXPECTED_DATA_ITEM_KEYS:
|
| 334 |
errors.append(f'{prefix} keys mismatch: {sorted(item.keys())}')
|
| 335 |
continue
|
|
@@ -354,27 +360,16 @@ def validate_task_dir(
|
|
| 354 |
if not target.exists():
|
| 355 |
errors.append(f'{prefix}.path does not exist: {data_path}')
|
| 356 |
continue
|
| 357 |
-
if target.
|
| 358 |
-
covered_files.add(target)
|
| 359 |
-
elif target.is_dir():
|
| 360 |
nested_files = {p for p in target.rglob('*') if p.is_file()}
|
| 361 |
if not nested_files:
|
| 362 |
errors.append(f'{prefix}.path points to an empty directory: {data_path}')
|
| 363 |
-
|
| 364 |
-
else:
|
| 365 |
errors.append(f'{prefix}.path is neither file nor directory: {data_path}')
|
| 366 |
description = item.get('description', '')
|
| 367 |
if any(token in description for token in STALE_TOKENS):
|
| 368 |
errors.append(f'{prefix}.description still contains stale source paths or legacy directories')
|
| 369 |
|
| 370 |
-
actual_data_files = _iter_visible_files(data_dir) if data_dir.exists() else set()
|
| 371 |
-
uncovered = sorted(actual_data_files - covered_files)
|
| 372 |
-
if uncovered:
|
| 373 |
-
errors.append('data/ contains undeclared files: ' + ', '.join(rel(p, task_dir) for p in uncovered[:20]))
|
| 374 |
-
missing_backing = sorted(covered_files - actual_data_files)
|
| 375 |
-
if missing_backing:
|
| 376 |
-
errors.append('declared data coverage points outside data/: ' + ', '.join(rel(p, task_dir) for p in missing_backing[:20]))
|
| 377 |
-
|
| 378 |
related_entries = sorted(related_dir.iterdir(), key=lambda p: p.name) if related_dir.exists() else []
|
| 379 |
related_files = [p for p in related_entries if p.is_file()]
|
| 380 |
related_dirs = [p for p in related_entries if p.is_dir()]
|
|
@@ -400,13 +395,20 @@ def validate_task_dir(
|
|
| 400 |
errors.append(str(exc))
|
| 401 |
return errors
|
| 402 |
|
| 403 |
-
if not isinstance(checklist, list)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
errors.append('checklist.json must be a non-empty list')
|
| 405 |
checklist = []
|
| 406 |
|
| 407 |
referenced_images: set[str] = set()
|
| 408 |
for idx, item in enumerate(checklist):
|
| 409 |
prefix = f'checklist[{idx}]'
|
|
|
|
|
|
|
|
|
|
| 410 |
if tuple(sorted(item.keys())) != EXPECTED_CHECKLIST_ITEM_KEYS:
|
| 411 |
errors.append(f'{prefix} keys mismatch: {sorted(item.keys())}')
|
| 412 |
continue
|
|
@@ -437,10 +439,7 @@ def validate_task_dir(
|
|
| 437 |
referenced_images.add(path_value)
|
| 438 |
|
| 439 |
actual_image_files = {str(p.relative_to(target_dir)) for p in _iter_visible_files(images_dir)} if images_dir.exists() else set()
|
| 440 |
-
extra_images = sorted(actual_image_files - referenced_images)
|
| 441 |
missing_images = sorted(referenced_images - actual_image_files)
|
| 442 |
-
if extra_images:
|
| 443 |
-
errors.append('target_study/images contains unreferenced files: ' + ', '.join(extra_images[:20]))
|
| 444 |
if missing_images:
|
| 445 |
errors.append('checklist image references are missing from target_study/images: ' + ', '.join(missing_images[:20]))
|
| 446 |
|
|
|
|
| 317 |
errors.append(str(exc))
|
| 318 |
return errors
|
| 319 |
|
| 320 |
+
if not isinstance(task_info, dict):
|
| 321 |
+
errors.append('task_info.json root must be a JSON object')
|
| 322 |
+
return errors
|
| 323 |
+
|
| 324 |
if tuple(sorted(task_info.keys())) != EXPECTED_TASK_INFO_KEYS:
|
| 325 |
errors.append(f'task_info.json keys mismatch: {sorted(task_info.keys())}')
|
| 326 |
|
|
|
|
| 330 |
errors.append('task_info.json field `data` must be a list')
|
| 331 |
task_info['data'] = []
|
| 332 |
|
|
|
|
| 333 |
declared_paths: set[str] = set()
|
| 334 |
for idx, item in enumerate(task_info['data']):
|
| 335 |
prefix = f'task_info.data[{idx}]'
|
| 336 |
+
if not isinstance(item, dict):
|
| 337 |
+
errors.append(f'{prefix} must be an object')
|
| 338 |
+
continue
|
| 339 |
if tuple(sorted(item.keys())) != EXPECTED_DATA_ITEM_KEYS:
|
| 340 |
errors.append(f'{prefix} keys mismatch: {sorted(item.keys())}')
|
| 341 |
continue
|
|
|
|
| 360 |
if not target.exists():
|
| 361 |
errors.append(f'{prefix}.path does not exist: {data_path}')
|
| 362 |
continue
|
| 363 |
+
if target.is_dir():
|
|
|
|
|
|
|
| 364 |
nested_files = {p for p in target.rglob('*') if p.is_file()}
|
| 365 |
if not nested_files:
|
| 366 |
errors.append(f'{prefix}.path points to an empty directory: {data_path}')
|
| 367 |
+
elif not target.is_file():
|
|
|
|
| 368 |
errors.append(f'{prefix}.path is neither file nor directory: {data_path}')
|
| 369 |
description = item.get('description', '')
|
| 370 |
if any(token in description for token in STALE_TOKENS):
|
| 371 |
errors.append(f'{prefix}.description still contains stale source paths or legacy directories')
|
| 372 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
related_entries = sorted(related_dir.iterdir(), key=lambda p: p.name) if related_dir.exists() else []
|
| 374 |
related_files = [p for p in related_entries if p.is_file()]
|
| 375 |
related_dirs = [p for p in related_entries if p.is_dir()]
|
|
|
|
| 395 |
errors.append(str(exc))
|
| 396 |
return errors
|
| 397 |
|
| 398 |
+
if not isinstance(checklist, list):
|
| 399 |
+
errors.append('checklist.json root must be a list')
|
| 400 |
+
return errors
|
| 401 |
+
|
| 402 |
+
if not checklist:
|
| 403 |
errors.append('checklist.json must be a non-empty list')
|
| 404 |
checklist = []
|
| 405 |
|
| 406 |
referenced_images: set[str] = set()
|
| 407 |
for idx, item in enumerate(checklist):
|
| 408 |
prefix = f'checklist[{idx}]'
|
| 409 |
+
if not isinstance(item, dict):
|
| 410 |
+
errors.append(f'{prefix} must be an object')
|
| 411 |
+
continue
|
| 412 |
if tuple(sorted(item.keys())) != EXPECTED_CHECKLIST_ITEM_KEYS:
|
| 413 |
errors.append(f'{prefix} keys mismatch: {sorted(item.keys())}')
|
| 414 |
continue
|
|
|
|
| 439 |
referenced_images.add(path_value)
|
| 440 |
|
| 441 |
actual_image_files = {str(p.relative_to(target_dir)) for p in _iter_visible_files(images_dir)} if images_dir.exists() else set()
|
|
|
|
| 442 |
missing_images = sorted(referenced_images - actual_image_files)
|
|
|
|
|
|
|
| 443 |
if missing_images:
|
| 444 |
errors.append('checklist image references are missing from target_study/images: ' + ', '.join(missing_images[:20]))
|
| 445 |
|