Spaces:
Running
Running
| """ | |
| GitHub Notebook Fixer β Hugging Face Space | |
| Fixes .ipynb files so they render correctly on GitHub. | |
| Common issues fixed: | |
| 1. Missing 'state' key in metadata.widgets | |
| 2. Oversized cell outputs (GitHub has a ~1MB render limit) | |
| 3. Invalid/missing notebook metadata (kernelspec, language_info) | |
| 4. Large base64-encoded images in outputs | |
| 5. Widget metadata without required 'state' field | |
| """ | |
| import gradio as gr | |
| import json | |
| import copy | |
| import base64 | |
| import uuid | |
| import tempfile | |
| from pathlib import Path | |
| # --------------------------------------------------------------------------- | |
| # Analysis helpers | |
| # --------------------------------------------------------------------------- | |
| MAX_OUTPUT_SIZE = 500_000 # ~500 KB per cell output is risky for GitHub | |
| MAX_TOTAL_SIZE = 10_000_000 # ~10 MB total notebook size warning | |
| MAX_IMAGE_SIZE = 1_000_000 # ~1 MB per embedded image | |
| GITHUB_ISSUES = { | |
| "widget_state_missing": { | |
| "severity": "critical", | |
| "title": "Missing 'state' in metadata.widgets", | |
| "desc": ( | |
| "GitHub requires a 'state' key inside metadata.widgets. " | |
| "Without it the notebook preview shows 'Invalid Notebook'." | |
| ), | |
| }, | |
| "widgets_empty_state": { | |
| "severity": "info", | |
| "title": "metadata.widgets exists but 'state' is empty", | |
| "desc": "The widget state dict is present but empty β GitHub renders this fine.", | |
| }, | |
| "no_kernelspec": { | |
| "severity": "warning", | |
| "title": "Missing kernelspec in metadata", | |
| "desc": "GitHub may not detect the notebook language correctly.", | |
| }, | |
| "no_language_info": { | |
| "severity": "info", | |
| "title": "Missing language_info in metadata", | |
| "desc": "Minor β GitHub can usually infer the language from kernelspec.", | |
| }, | |
| "oversized_output": { | |
| "severity": "warning", | |
| "title": "Cell output exceeds ~500 KB", | |
| "desc": "Very large outputs can cause GitHub to skip rendering the notebook.", | |
| }, | |
| "oversized_notebook": { | |
| "severity": "critical", | |
| "title": "Notebook exceeds ~10 MB", | |
| "desc": "GitHub will refuse to render notebooks over ~10 MB.", | |
| }, | |
| "large_embedded_image": { | |
| "severity": "warning", | |
| "title": "Large base64 image embedded in output", | |
| "desc": "Images over ~1 MB bloat the notebook and slow GitHub rendering.", | |
| }, | |
| "invalid_nbformat": { | |
| "severity": "critical", | |
| "title": "Missing or invalid nbformat version", | |
| "desc": "GitHub needs nbformat >= 4 to render the notebook.", | |
| }, | |
| } | |
| def _sizeof(obj) -> int: | |
| """Rough byte size of a JSON-serializable object.""" | |
| return len(json.dumps(obj, ensure_ascii=False).encode()) | |
| def analyze_notebook(nb: dict) -> list[dict]: | |
| """Return a list of issue dicts found in the notebook.""" | |
| issues: list[dict] = [] | |
| # ---- nbformat version ---- | |
| nbf = nb.get("nbformat") | |
| if nbf is None or (isinstance(nbf, int) and nbf < 4): | |
| issues.append({**GITHUB_ISSUES["invalid_nbformat"], "location": "root"}) | |
| # ---- metadata.widgets ---- | |
| meta = nb.get("metadata", {}) | |
| widgets = meta.get("widgets") | |
| if widgets is not None: | |
| if "application/vnd.jupyter.widget-state+json" in widgets: | |
| ws = widgets["application/vnd.jupyter.widget-state+json"] | |
| if "state" not in ws: | |
| issues.append({**GITHUB_ISSUES["widget_state_missing"], "location": "metadata.widgets"}) | |
| elif not ws["state"]: | |
| issues.append({**GITHUB_ISSUES["widgets_empty_state"], "location": "metadata.widgets"}) | |
| else: | |
| # widgets key exists but no standard widget-state key | |
| for key, val in widgets.items(): | |
| if isinstance(val, dict) and "state" not in val: | |
| issues.append({ | |
| **GITHUB_ISSUES["widget_state_missing"], | |
| "location": f"metadata.widgets['{key}']", | |
| }) | |
| # ---- kernelspec / language_info ---- | |
| if "kernelspec" not in meta: | |
| issues.append({**GITHUB_ISSUES["no_kernelspec"], "location": "metadata"}) | |
| if "language_info" not in meta: | |
| issues.append({**GITHUB_ISSUES["no_language_info"], "location": "metadata"}) | |
| # ---- per-cell checks ---- | |
| for idx, cell in enumerate(nb.get("cells", [])): | |
| for out in cell.get("outputs", []): | |
| out_size = _sizeof(out) | |
| if out_size > MAX_OUTPUT_SIZE: | |
| issues.append({ | |
| **GITHUB_ISSUES["oversized_output"], | |
| "location": f"cells[{idx}]", | |
| "detail": f"{out_size / 1_000_000:.2f} MB", | |
| }) | |
| # check base64 images | |
| data = out.get("data", {}) | |
| for mime, content in data.items(): | |
| if mime.startswith("image/") and isinstance(content, str): | |
| try: | |
| img_bytes = len(base64.b64decode(content, validate=False)) | |
| except Exception: | |
| img_bytes = len(content) | |
| if img_bytes > MAX_IMAGE_SIZE: | |
| issues.append({ | |
| **GITHUB_ISSUES["large_embedded_image"], | |
| "location": f"cells[{idx}] ({mime})", | |
| "detail": f"{img_bytes / 1_000_000:.2f} MB", | |
| }) | |
| # ---- total size ---- | |
| total = _sizeof(nb) | |
| if total > MAX_TOTAL_SIZE: | |
| issues.append({ | |
| **GITHUB_ISSUES["oversized_notebook"], | |
| "location": "entire file", | |
| "detail": f"{total / 1_000_000:.2f} MB", | |
| }) | |
| return issues | |
| def predict_github_render(issues: list[dict]) -> str: | |
| """Return a human-readable prediction.""" | |
| crits = [i for i in issues if i["severity"] == "critical"] | |
| warns = [i for i in issues if i["severity"] == "warning"] | |
| if crits: | |
| return "β Will NOT render on GitHub" | |
| if warns: | |
| return "β οΈ Might render, but with issues" | |
| return "β Should render fine on GitHub" | |
| # --------------------------------------------------------------------------- | |
| # Fixer | |
| # --------------------------------------------------------------------------- | |
| def fix_notebook(nb: dict, strip_widgets: bool = False, strip_large_outputs: bool = True) -> dict: | |
| """Return a fixed copy of the notebook.""" | |
| nb = copy.deepcopy(nb) | |
| # ---- Ensure nbformat ---- | |
| if nb.get("nbformat") is None or nb.get("nbformat") < 4: | |
| nb["nbformat"] = 4 | |
| nb.setdefault("nbformat_minor", 5) | |
| # ---- metadata ---- | |
| meta = nb.setdefault("metadata", {}) | |
| # Fix widgets | |
| widgets = meta.get("widgets") | |
| if widgets is not None: | |
| if strip_widgets: | |
| del meta["widgets"] | |
| else: | |
| # Add missing 'state' key to every widget-state entry | |
| for key, val in list(widgets.items()): | |
| if isinstance(val, dict) and "state" not in val: | |
| val["state"] = {} | |
| # Also handle the standard key specifically | |
| if "application/vnd.jupyter.widget-state+json" in widgets: | |
| ws = widgets["application/vnd.jupyter.widget-state+json"] | |
| ws.setdefault("state", {}) | |
| # Ensure kernelspec | |
| if "kernelspec" not in meta: | |
| meta["kernelspec"] = { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3", | |
| } | |
| # Ensure language_info | |
| if "language_info" not in meta: | |
| meta["language_info"] = { | |
| "name": "python", | |
| "version": "3.10.0", | |
| } | |
| # ---- Per-cell fixes ---- | |
| for cell in nb.get("cells", []): | |
| new_outputs = [] | |
| for out in cell.get("outputs", []): | |
| out_size = _sizeof(out) | |
| # Strip very large outputs if requested | |
| if strip_large_outputs and out_size > MAX_OUTPUT_SIZE: | |
| new_outputs.append({ | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": ["[Output removed β too large for GitHub rendering]\n"], | |
| }) | |
| continue | |
| # Always strip oversized base64 images regardless of the checkbox | |
| data = out.get("data", {}) | |
| for mime in list(data.keys()): | |
| if mime.startswith("image/") and isinstance(data[mime], str): | |
| try: | |
| img_bytes = len(base64.b64decode(data[mime], validate=False)) | |
| except Exception: | |
| img_bytes = len(data[mime]) | |
| if img_bytes > MAX_IMAGE_SIZE: | |
| data[mime] = "" # clear the giant image | |
| data.setdefault("text/plain", ["[Large image removed for GitHub compatibility]"]) | |
| new_outputs.append(out) | |
| cell["outputs"] = new_outputs | |
| # Ensure every cell has a valid nbformat 4.5+ id (8-char hex) | |
| existing_id = cell.get("id", "") | |
| if not existing_id or len(existing_id) < 8: | |
| cell["id"] = uuid.uuid4().hex[:8] | |
| return nb | |
| # --------------------------------------------------------------------------- | |
| # Report builder | |
| # --------------------------------------------------------------------------- | |
| def build_report(issues: list[dict], prediction: str) -> str: | |
| lines = [f"## {prediction}\n"] | |
| if not issues: | |
| lines.append("No issues detected β this notebook looks good for GitHub!\n") | |
| return "\n".join(lines) | |
| severity_emoji = {"critical": "π΄", "warning": "π‘", "info": "π΅"} | |
| lines.append(f"**Found {len(issues)} issue(s):**\n") | |
| for i, issue in enumerate(issues, 1): | |
| emoji = severity_emoji.get(issue["severity"], "βͺ") | |
| detail = f" β {issue.get('detail', '')}" if "detail" in issue else "" | |
| lines.append(f"{i}. {emoji} **{issue['title']}**{detail}") | |
| lines.append(f" *Location:* `{issue['location']}`") | |
| lines.append(f" {issue['desc']}\n") | |
| return "\n".join(lines) | |
| # --------------------------------------------------------------------------- | |
| # Gradio handler | |
| # --------------------------------------------------------------------------- | |
| def process_notebook(file, strip_widgets: bool, strip_large_outputs: bool): | |
| """Main handler: analyse β predict β fix β return.""" | |
| if file is None: | |
| return "Upload a `.ipynb` file first.", None | |
| # Read the notebook | |
| try: | |
| with open(file.name, "r", encoding="utf-8") as f: | |
| nb = json.load(f) | |
| except json.JSONDecodeError: | |
| return "β The uploaded file is not valid JSON. Are you sure it's a `.ipynb`?", None | |
| except Exception as e: | |
| return f"β Could not read file: {e}", None | |
| # Analyse | |
| issues = analyze_notebook(nb) | |
| prediction = predict_github_render(issues) | |
| report = build_report(issues, prediction) | |
| # Fix | |
| fixed_nb = fix_notebook(nb, strip_widgets=strip_widgets, strip_large_outputs=strip_large_outputs) | |
| # Re-analyse fixed version | |
| fixed_issues = analyze_notebook(fixed_nb) | |
| fixed_prediction = predict_github_render(fixed_issues) | |
| report += "\n---\n" | |
| report += f"### After fix: {fixed_prediction}\n" | |
| if fixed_issues: | |
| remaining = [i for i in fixed_issues if i["severity"] in ("critical", "warning")] | |
| if remaining: | |
| report += f"β οΈ {len(remaining)} issue(s) remain (may need manual attention).\n" | |
| else: | |
| report += "Only informational notes remain β notebook should render on GitHub.\n" | |
| else: | |
| report += "All issues resolved! β \n" | |
| # Write fixed notebook to temp file | |
| out_path = tempfile.NamedTemporaryFile( | |
| suffix=".ipynb", delete=False, prefix="fixed_", dir=tempfile.gettempdir() | |
| ) | |
| with open(out_path.name, "w", encoding="utf-8") as f: | |
| json.dump(fixed_nb, f, ensure_ascii=False, indent=1) | |
| return report, out_path.name | |
| # --------------------------------------------------------------------------- | |
| # UI | |
| # --------------------------------------------------------------------------- | |
| DESCRIPTION = """ | |
| # π§ GitHub Notebook Fixer | |
| **Upload a `.ipynb` file** and this tool will: | |
| 1. **Predict** whether it will render on GitHub | |
| 2. **Diagnose** all issues (missing widget state, oversized outputs, bad metadataβ¦) | |
| 3. **Fix** the problems and return a clean `.ipynb` you can push to GitHub | |
| ### Common issues fixed | |
| - `metadata.widgets` missing the `state` key β **"Invalid Notebook"** on GitHub | |
| - Oversized cell outputs (>500 KB) that block rendering | |
| - Missing `kernelspec` / `language_info` metadata | |
| - Giant base64-encoded images bloating the file | |
| """ | |
| with gr.Blocks( | |
| title="GitHub Notebook Fixer", | |
| theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate"), | |
| ) as demo: | |
| gr.Markdown(DESCRIPTION) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| file_input = gr.File( | |
| label="Upload .ipynb file", | |
| file_types=[".ipynb"], | |
| type="filepath", | |
| ) | |
| strip_widgets = gr.Checkbox( | |
| label="Remove widget metadata entirely (instead of fixing it)", | |
| value=False, | |
| ) | |
| strip_large = gr.Checkbox( | |
| label="Strip oversized outputs (>500 KB per cell)", | |
| value=True, | |
| ) | |
| btn = gr.Button("π Analyze & Fix", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| report_output = gr.Markdown(label="Diagnosis Report") | |
| file_output = gr.File(label="Download Fixed Notebook") | |
| btn.click( | |
| fn=process_notebook, | |
| inputs=[file_input, strip_widgets, strip_large], | |
| outputs=[report_output, file_output], | |
| ) | |
| gr.Markdown( | |
| '---\n*Built to solve the classic GitHub "Invalid Notebook" error. ' | |
| "Works for Colab, Jupyter, and any nbformat-4 notebook.*" | |
| ) | |
| with gr.Accordion("π Privacy & Data β your files are never stored", open=False): | |
| gr.Markdown(""" | |
| ### How this tool works | |
| 1. You upload a `.ipynb` file β it is read **entirely in memory** on the server. | |
| 2. The tool analyses the JSON structure, fixes any issues, and writes a temporary output file. | |
| 3. You download the fixed file. | |
| 4. The temporary file is managed by the OS and is **automatically deleted** β it is never persisted beyond your session. | |
| ### What we do NOT do | |
| - β We do **not** save, log, or store your notebook file anywhere. | |
| - β We do **not** collect any personal information or usage metadata linked to your file. | |
| - β There is **no database** β nothing is written to permanent storage. | |
| - β Your notebook content is **never shared** with third parties. | |
| ### What happens to your data | |
| Your file lives only in the server's RAM and a short-lived OS temp file for the duration of your request. | |
| Once you close the session or the Space restarts, every trace of it is gone. | |
| **You are the only person who ever sees your notebook.** | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |