Spaces:

HongzeFu
/

RoboMME_Interactive_Demo_cpu

Sleeping

App Files Files Community

HongzeFu commited on Mar 16

Commit

59a743a

1 Parent(s): 2ec0072

cpu docker v1

Browse files

Files changed (8) hide show

Dockerfile +1 -4
README.md +3 -3
docker-entrypoint.sh +10 -40
gradio-web/main.py +32 -0
gradio-web/test/test_episode_builder_cpu_backend.py +71 -0
gradio-web/test/test_main_launch_config.py +21 -1
human_readme.md +13 -10
src/robomme/env_record_wrapper/episode_config_resolver.py +2 -0

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
 ENV DEBIAN_FRONTEND=noninteractive
@@ -11,10 +11,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     git \
     ffmpeg \
     libgl1 \
-    libglvnd-dev \
     libglib2.0-0 \
     libvulkan1 \
-    vulkan-tools \
     && add-apt-repository ppa:deadsnakes/ppa \
     && apt-get update && apt-get install -y --no-install-recommends \
     python3.11 \
@@ -32,7 +30,6 @@ RUN useradd -m -u 1000 user
 ENV PYTHONUNBUFFERED=1 \
     PIP_NO_CACHE_DIR=1 \
-    NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics \
     HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH \
     OMP_NUM_THREADS=1 \

+FROM ubuntu:22.04
 ENV DEBIAN_FRONTEND=noninteractive
     git \
     ffmpeg \
     libgl1 \
     libglib2.0-0 \
     libvulkan1 \
     && add-apt-repository ppa:deadsnakes/ppa \
     && apt-get update && apt-get install -y --no-install-recommends \
     python3.11 \
 ENV PYTHONUNBUFFERED=1 \
     PIP_NO_CACHE_DIR=1 \
     HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH \
     OMP_NUM_THREADS=1 \

README.md CHANGED Viewed

@@ -14,9 +14,9 @@ python3 gradio-web/main.py
 `app_file` is intentionally not set here because this is a Docker Space; the application entrypoint comes from Docker `CMD`, while `app_port: 7860` is the external port published by the Space.
-Local GPU Docker run:
 ```bash
-docker build -t robomme-gradio:gpu .
-docker run --rm --gpus all -p 7860:7860 robomme-gradio:gpu
 ```

 `app_file` is intentionally not set here because this is a Docker Space; the application entrypoint comes from Docker `CMD`, while `app_port: 7860` is the external port published by the Space.
+Local CPU Docker run:
 ```bash
+docker build -t robomme-gradio:cpu .
+docker run --rm -p 7860:7860 robomme-gradio:cpu
 ```

docker-entrypoint.sh CHANGED Viewed

@@ -1,50 +1,20 @@
 #!/bin/sh
 set -eu
-pick_vulkan_icd() {
-    for candidate in \
-        /etc/vulkan/icd.d/nvidia_icd.json \
-        /etc/vulkan/icd.d/nvidia_icd.x86_64.json \
-        /usr/share/vulkan/icd.d/nvidia_icd.json \
-        /usr/share/vulkan/icd.d/nvidia_icd.x86_64.json
-    do
-        if [ -f "$candidate" ]; then
-            printf '%s\n' "$candidate"
-            return 0
-        fi
-    done
-    return 1
-}
-run_diagnostic() {
-    label="$1"
-    shift
-    echo "[entrypoint] $label"
-    if "$@"; then
-        return 0
-    else
-        status=$?
-    fi
-    echo "[entrypoint] $label failed with exit code $status"
-    return 0
-}
 if [ -z "${OMP_NUM_THREADS:-}" ]; then
     export OMP_NUM_THREADS=1
 fi
-if [ -z "${VK_ICD_FILENAMES:-}" ]; then
-    if detected_icd="$(pick_vulkan_icd)"; then
-        export VK_ICD_FILENAMES="$detected_icd"
-        echo "[entrypoint] Using Vulkan ICD: $VK_ICD_FILENAMES"
-    else
-        echo "[entrypoint] Vulkan ICD file not found under /etc or /usr/share"
-    fi
-else
-    echo "[entrypoint] Respecting preset VK_ICD_FILENAMES: $VK_ICD_FILENAMES"
-fi
 echo "[entrypoint] OMP_NUM_THREADS=$OMP_NUM_THREADS"
-run_diagnostic "nvidia-smi" nvidia-smi
-run_diagnostic "vulkaninfo --summary" vulkaninfo --summary
 exec "$@"

 #!/bin/sh
 set -eu
 if [ -z "${OMP_NUM_THREADS:-}" ]; then
     export OMP_NUM_THREADS=1
 fi
+export CUDA_VISIBLE_DEVICES=-1
+export NVIDIA_VISIBLE_DEVICES=void
+export SAPIEN_RENDER_DEVICE=cpu
+unset NVIDIA_DRIVER_CAPABILITIES
+unset VK_ICD_FILENAMES
+unset MUJOCO_GL
+echo "[entrypoint] Starting RoboMME Gradio app in CPU-only mode"
 echo "[entrypoint] OMP_NUM_THREADS=$OMP_NUM_THREADS"
+echo "[entrypoint] CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
+echo "[entrypoint] NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES"
+echo "[entrypoint] SAPIEN_RENDER_DEVICE=$SAPIEN_RENDER_DEVICE"
 exec "$@"

gradio-web/main.py CHANGED Viewed

@@ -15,6 +15,16 @@ SRC_DIR = PROJECT_ROOT / "src"
 VIDEOS_DIR = APP_DIR / "videos"
 TEMP_DEMOS_DIR = PROJECT_ROOT / "temp_demos"
 CWD_TEMP_DEMOS_DIR = Path.cwd() / "temp_demos"
@@ -27,6 +37,27 @@ if str(SRC_DIR) not in sys.path:
     sys.path.insert(0, str(SRC_DIR))
 def setup_logging() -> logging.Logger:
     """Configure structured logging for Spaces runtime."""
     level_name = "DEBUG"
@@ -116,6 +147,7 @@ def build_allowed_paths():
 def main():
     from ui_layout import CSS, create_ui_blocks
     LOGGER.info("Starting Gradio real environment entrypoint: %s", __file__)

 VIDEOS_DIR = APP_DIR / "videos"
 TEMP_DEMOS_DIR = PROJECT_ROOT / "temp_demos"
 CWD_TEMP_DEMOS_DIR = Path.cwd() / "temp_demos"
+CPU_ONLY_ENV_OVERRIDES = {
+    "CUDA_VISIBLE_DEVICES": "-1",
+    "NVIDIA_VISIBLE_DEVICES": "void",
+    "SAPIEN_RENDER_DEVICE": "cpu",
+}
+CPU_ONLY_ENV_CLEAR_KEYS = (
+    "NVIDIA_DRIVER_CAPABILITIES",
+    "VK_ICD_FILENAMES",
+    "MUJOCO_GL",
+)
     sys.path.insert(0, str(SRC_DIR))
+def configure_cpu_only_runtime(logger: logging.Logger | None = None):
+    """Force CPU-only execution before importing project modules."""
+    cleared = {}
+    for key, value in CPU_ONLY_ENV_OVERRIDES.items():
+        os.environ[key] = value
+    for key in CPU_ONLY_ENV_CLEAR_KEYS:
+        previous = os.environ.pop(key, None)
+        if previous is not None:
+            cleared[key] = previous
+    if logger is not None:
+        logger.info(
+            "Configured CPU-only runtime overrides=%s cleared=%s",
+            CPU_ONLY_ENV_OVERRIDES,
+            cleared,
+        )
+    return cleared
+configure_cpu_only_runtime()
 def setup_logging() -> logging.Logger:
     """Configure structured logging for Spaces runtime."""
     level_name = "DEBUG"
 def main():
+    configure_cpu_only_runtime(LOGGER)
     from ui_layout import CSS, create_ui_blocks
     LOGGER.info("Starting Gradio real environment entrypoint: %s", __file__)

gradio-web/test/test_episode_builder_cpu_backend.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from __future__ import annotations
+import sys
+import types
+class _FakeEnv:
+    pass
+class _FakeDemonstrationWrapper:
+    last_env = None
+    last_kwargs = None
+    def __init__(self, env, **kwargs):
+        type(self).last_env = env
+        type(self).last_kwargs = kwargs
+        self.env = env
+class _FakeFailAwareWrapper:
+    last_env = None
+    def __init__(self, env):
+        type(self).last_env = env
+        self.env = env
+def test_builder_make_env_for_episode_forces_cpu_backends(monkeypatch, reload_module):
+    resolver = reload_module("robomme.env_record_wrapper.episode_config_resolver")
+    captured = {}
+    monkeypatch.setitem(
+        sys.modules,
+        "robomme.env_record_wrapper.DemonstrationWrapper",
+        types.SimpleNamespace(DemonstrationWrapper=_FakeDemonstrationWrapper),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "robomme.env_record_wrapper.FailAwareWrapper",
+        types.SimpleNamespace(FailAwareWrapper=_FakeFailAwareWrapper),
+    )
+    def fake_make(env_id, **kwargs):
+        captured["env_id"] = env_id
+        captured["kwargs"] = kwargs
+        return _FakeEnv()
+    monkeypatch.setattr(resolver.gym, "make", fake_make)
+    builder = resolver.BenchmarkEnvBuilder(
+        env_id="BinFill",
+        dataset="train",
+        action_space="joint_angle",
+        gui_render=False,
+    )
+    monkeypatch.setattr(builder, "resolve_episode", lambda episode_idx: (123, "hard"))
+    env = builder.make_env_for_episode(7)
+    assert captured["env_id"] == "BinFill"
+    assert captured["kwargs"]["obs_mode"] == "rgb+depth+segmentation"
+    assert captured["kwargs"]["control_mode"] == "pd_joint_pos"
+    assert captured["kwargs"]["render_mode"] == "rgb_array"
+    assert captured["kwargs"]["reward_mode"] == "dense"
+    assert captured["kwargs"]["sim_backend"] == "physx_cpu"
+    assert captured["kwargs"]["render_backend"] == "sapien_cpu"
+    assert captured["kwargs"]["seed"] == 123
+    assert captured["kwargs"]["difficulty"] == "hard"
+    assert _FakeDemonstrationWrapper.last_kwargs["gui_render"] is False
+    assert _FakeFailAwareWrapper.last_env is env.env

gradio-web/test/test_main_launch_config.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
 import sys
 import types
@@ -15,7 +16,14 @@ class _FakeDemo:
         return None
-def test_main_launch_passes_ui_css(monkeypatch, reload_module):
     main = reload_module("main")
     fake_demo = _FakeDemo()
     fake_ui_layout = types.SimpleNamespace(
@@ -25,6 +33,12 @@ def test_main_launch_passes_ui_css(monkeypatch, reload_module):
     monkeypatch.setitem(sys.modules, "ui_layout", fake_ui_layout)
     monkeypatch.setenv("PORT", "7861")
     main.main()
@@ -34,3 +48,9 @@ def test_main_launch_passes_ui_css(monkeypatch, reload_module):
     assert fake_demo.launch_kwargs["theme"] == fake_demo.theme
     assert fake_demo.launch_kwargs["css"] == fake_ui_layout.CSS
     assert fake_demo.launch_kwargs["head"] == fake_demo.head

 from __future__ import annotations
+import os
 import sys
 import types
         return None
+def test_main_launch_passes_ui_css_and_forces_cpu_runtime(monkeypatch, reload_module):
+    monkeypatch.setenv("CUDA_VISIBLE_DEVICES", "0")
+    monkeypatch.setenv("NVIDIA_VISIBLE_DEVICES", "all")
+    monkeypatch.setenv("SAPIEN_RENDER_DEVICE", "cuda")
+    monkeypatch.setenv("NVIDIA_DRIVER_CAPABILITIES", "compute,utility,graphics")
+    monkeypatch.setenv("VK_ICD_FILENAMES", "/tmp/nvidia_icd.json")
+    monkeypatch.setenv("MUJOCO_GL", "egl")
     main = reload_module("main")
     fake_demo = _FakeDemo()
     fake_ui_layout = types.SimpleNamespace(
     monkeypatch.setitem(sys.modules, "ui_layout", fake_ui_layout)
     monkeypatch.setenv("PORT", "7861")
+    monkeypatch.setenv("CUDA_VISIBLE_DEVICES", "2")
+    monkeypatch.setenv("NVIDIA_VISIBLE_DEVICES", "all")
+    monkeypatch.setenv("SAPIEN_RENDER_DEVICE", "cuda")
+    monkeypatch.setenv("NVIDIA_DRIVER_CAPABILITIES", "graphics")
+    monkeypatch.setenv("VK_ICD_FILENAMES", "/tmp/another_nvidia_icd.json")
+    monkeypatch.setenv("MUJOCO_GL", "egl")
     main.main()
     assert fake_demo.launch_kwargs["theme"] == fake_demo.theme
     assert fake_demo.launch_kwargs["css"] == fake_ui_layout.CSS
     assert fake_demo.launch_kwargs["head"] == fake_demo.head
+    assert os.environ["CUDA_VISIBLE_DEVICES"] == "-1"
+    assert os.environ["NVIDIA_VISIBLE_DEVICES"] == "void"
+    assert os.environ["SAPIEN_RENDER_DEVICE"] == "cpu"
+    assert "NVIDIA_DRIVER_CAPABILITIES" not in os.environ
+    assert "VK_ICD_FILENAMES" not in os.environ
+    assert "MUJOCO_GL" not in os.environ

human_readme.md CHANGED Viewed

@@ -15,7 +15,7 @@ uv sync
 uv pip install -e .
 ```
-## 🐳 Gradio Docker Deployment (HF Space + GPU)
 This repository also supports Docker deployment for the Gradio app entrypoint:
@@ -26,23 +26,23 @@ python3 gradio-web/main.py
 Build image:
 ```bash
-docker build -t robomme-gradio:gpu .
 ```
-Run container (GPU + Vulkan for ManiSkill/SAPIEN):
 ```bash
-docker run --rm --gpus all -p 7860:7860 robomme-gradio:gpu
 ```
-The image sets `NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics` so the NVIDIA container runtime exposes Vulkan/graphics driver files inside the container. Without graphics capability, ManiSkill/SAPIEN may fail with `vk::createInstanceUnique: ErrorIncompatibleDriver`.
 Optional metadata override:
 ```bash
-docker run --rm --gpus all -p 7860:7860 \
   -e ROBOMME_METADATA_ROOT=/home/user/app/src/robomme/env_metadata/train \
-  robomme-gradio:gpu
 ```
 Notes:
@@ -148,13 +148,16 @@ Want to add your model? Download the [dataset](https://huggingface.co/datasets/Y
 A1: Use a physical display or set up a virtual display for GUI rendering (e.g. install a VNC server and set the `DISPLAY` variable correctly).
-**Q2: Failure related to Vulkan installation.**
-A2: ManiSkill/SAPIEN requires both Vulkan userspace packages inside the container and NVIDIA graphics capability exposed by the container runtime. This image installs `libvulkan1`, `vulkan-tools`, and `libglvnd-dev`, and sets `NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics`. If it still does not work, first verify the host machine itself supports Vulkan (`vulkaninfo` on the host), then switch to CPU rendering:
 ```python
 os.environ['SAPIEN_RENDER_DEVICE'] = 'cpu'
-os.environ['MUJOCO_GL'] = 'osmesa'
 ```

 uv pip install -e .
 ```
+## 🐳 Gradio Docker Deployment (HF Space CPU-only)
 This repository also supports Docker deployment for the Gradio app entrypoint:
 Build image:
 ```bash
+docker build -t robomme-gradio:cpu .
 ```
+Run container:
 ```bash
+docker run --rm -p 7860:7860 robomme-gradio:cpu
 ```
+The container forces CPU-only ManiSkill/SAPIEN backends and does not require NVIDIA runtime or `--gpus all`, which keeps it aligned with Hugging Face Docker Spaces CPU deployments.
 Optional metadata override:
 ```bash
+docker run --rm -p 7860:7860 \
   -e ROBOMME_METADATA_ROOT=/home/user/app/src/robomme/env_metadata/train \
+  robomme-gradio:cpu
 ```
 Notes:
 A1: Use a physical display or set up a virtual display for GUI rendering (e.g. install a VNC server and set the `DISPLAY` variable correctly).
+**Q2: Failure related to ManiSkill/SAPIEN rendering initialization.**
+A2: This Docker image is configured for CPU-only execution and should not rely on NVIDIA runtime settings. If rendering still fails, first check that no external environment variables are forcing GPU paths, then keep the container on the CPU-only defaults:
 ```python
+os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
+os.environ['NVIDIA_VISIBLE_DEVICES'] = 'void'
 os.environ['SAPIEN_RENDER_DEVICE'] = 'cpu'
+os.environ.pop('VK_ICD_FILENAMES', None)
+os.environ.pop('MUJOCO_GL', None)
 ```

src/robomme/env_record_wrapper/episode_config_resolver.py CHANGED Viewed

@@ -195,6 +195,8 @@ class BenchmarkEnvBuilder:
             control_mode="pd_joint_pos",
             render_mode=self.render_mode,
             reward_mode="dense",
         )
         if seed is not None:
             env_kwargs["seed"] = seed

             control_mode="pd_joint_pos",
             render_mode=self.render_mode,
             reward_mode="dense",
+            sim_backend="physx_cpu",
+            render_backend="sapien_cpu",
         )
         if seed is not None:
             env_kwargs["seed"] = seed