HongzeFu commited on
Commit
59a743a
·
1 Parent(s): 2ec0072

cpu docker v1

Browse files
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
2
 
3
  ENV DEBIAN_FRONTEND=noninteractive
4
 
@@ -11,10 +11,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
11
  git \
12
  ffmpeg \
13
  libgl1 \
14
- libglvnd-dev \
15
  libglib2.0-0 \
16
  libvulkan1 \
17
- vulkan-tools \
18
  && add-apt-repository ppa:deadsnakes/ppa \
19
  && apt-get update && apt-get install -y --no-install-recommends \
20
  python3.11 \
@@ -32,7 +30,6 @@ RUN useradd -m -u 1000 user
32
 
33
  ENV PYTHONUNBUFFERED=1 \
34
  PIP_NO_CACHE_DIR=1 \
35
- NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics \
36
  HOME=/home/user \
37
  PATH=/home/user/.local/bin:$PATH \
38
  OMP_NUM_THREADS=1 \
 
1
+ FROM ubuntu:22.04
2
 
3
  ENV DEBIAN_FRONTEND=noninteractive
4
 
 
11
  git \
12
  ffmpeg \
13
  libgl1 \
 
14
  libglib2.0-0 \
15
  libvulkan1 \
 
16
  && add-apt-repository ppa:deadsnakes/ppa \
17
  && apt-get update && apt-get install -y --no-install-recommends \
18
  python3.11 \
 
30
 
31
  ENV PYTHONUNBUFFERED=1 \
32
  PIP_NO_CACHE_DIR=1 \
 
33
  HOME=/home/user \
34
  PATH=/home/user/.local/bin:$PATH \
35
  OMP_NUM_THREADS=1 \
README.md CHANGED
@@ -14,9 +14,9 @@ python3 gradio-web/main.py
14
 
15
  `app_file` is intentionally not set here because this is a Docker Space; the application entrypoint comes from Docker `CMD`, while `app_port: 7860` is the external port published by the Space.
16
 
17
- Local GPU Docker run:
18
 
19
  ```bash
20
- docker build -t robomme-gradio:gpu .
21
- docker run --rm --gpus all -p 7860:7860 robomme-gradio:gpu
22
  ```
 
14
 
15
  `app_file` is intentionally not set here because this is a Docker Space; the application entrypoint comes from Docker `CMD`, while `app_port: 7860` is the external port published by the Space.
16
 
17
+ Local CPU Docker run:
18
 
19
  ```bash
20
+ docker build -t robomme-gradio:cpu .
21
+ docker run --rm -p 7860:7860 robomme-gradio:cpu
22
  ```
docker-entrypoint.sh CHANGED
@@ -1,50 +1,20 @@
1
  #!/bin/sh
2
  set -eu
3
 
4
- pick_vulkan_icd() {
5
- for candidate in \
6
- /etc/vulkan/icd.d/nvidia_icd.json \
7
- /etc/vulkan/icd.d/nvidia_icd.x86_64.json \
8
- /usr/share/vulkan/icd.d/nvidia_icd.json \
9
- /usr/share/vulkan/icd.d/nvidia_icd.x86_64.json
10
- do
11
- if [ -f "$candidate" ]; then
12
- printf '%s\n' "$candidate"
13
- return 0
14
- fi
15
- done
16
- return 1
17
- }
18
-
19
- run_diagnostic() {
20
- label="$1"
21
- shift
22
- echo "[entrypoint] $label"
23
- if "$@"; then
24
- return 0
25
- else
26
- status=$?
27
- fi
28
- echo "[entrypoint] $label failed with exit code $status"
29
- return 0
30
- }
31
-
32
  if [ -z "${OMP_NUM_THREADS:-}" ]; then
33
  export OMP_NUM_THREADS=1
34
  fi
35
 
36
- if [ -z "${VK_ICD_FILENAMES:-}" ]; then
37
- if detected_icd="$(pick_vulkan_icd)"; then
38
- export VK_ICD_FILENAMES="$detected_icd"
39
- echo "[entrypoint] Using Vulkan ICD: $VK_ICD_FILENAMES"
40
- else
41
- echo "[entrypoint] Vulkan ICD file not found under /etc or /usr/share"
42
- fi
43
- else
44
- echo "[entrypoint] Respecting preset VK_ICD_FILENAMES: $VK_ICD_FILENAMES"
45
- fi
46
 
 
47
  echo "[entrypoint] OMP_NUM_THREADS=$OMP_NUM_THREADS"
48
- run_diagnostic "nvidia-smi" nvidia-smi
49
- run_diagnostic "vulkaninfo --summary" vulkaninfo --summary
 
50
  exec "$@"
 
1
  #!/bin/sh
2
  set -eu
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  if [ -z "${OMP_NUM_THREADS:-}" ]; then
5
  export OMP_NUM_THREADS=1
6
  fi
7
 
8
+ export CUDA_VISIBLE_DEVICES=-1
9
+ export NVIDIA_VISIBLE_DEVICES=void
10
+ export SAPIEN_RENDER_DEVICE=cpu
11
+ unset NVIDIA_DRIVER_CAPABILITIES
12
+ unset VK_ICD_FILENAMES
13
+ unset MUJOCO_GL
 
 
 
 
14
 
15
+ echo "[entrypoint] Starting RoboMME Gradio app in CPU-only mode"
16
  echo "[entrypoint] OMP_NUM_THREADS=$OMP_NUM_THREADS"
17
+ echo "[entrypoint] CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
18
+ echo "[entrypoint] NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES"
19
+ echo "[entrypoint] SAPIEN_RENDER_DEVICE=$SAPIEN_RENDER_DEVICE"
20
  exec "$@"
gradio-web/main.py CHANGED
@@ -15,6 +15,16 @@ SRC_DIR = PROJECT_ROOT / "src"
15
  VIDEOS_DIR = APP_DIR / "videos"
16
  TEMP_DEMOS_DIR = PROJECT_ROOT / "temp_demos"
17
  CWD_TEMP_DEMOS_DIR = Path.cwd() / "temp_demos"
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
 
@@ -27,6 +37,27 @@ if str(SRC_DIR) not in sys.path:
27
  sys.path.insert(0, str(SRC_DIR))
28
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  def setup_logging() -> logging.Logger:
31
  """Configure structured logging for Spaces runtime."""
32
  level_name = "DEBUG"
@@ -116,6 +147,7 @@ def build_allowed_paths():
116
 
117
 
118
  def main():
 
119
  from ui_layout import CSS, create_ui_blocks
120
 
121
  LOGGER.info("Starting Gradio real environment entrypoint: %s", __file__)
 
15
  VIDEOS_DIR = APP_DIR / "videos"
16
  TEMP_DEMOS_DIR = PROJECT_ROOT / "temp_demos"
17
  CWD_TEMP_DEMOS_DIR = Path.cwd() / "temp_demos"
18
+ CPU_ONLY_ENV_OVERRIDES = {
19
+ "CUDA_VISIBLE_DEVICES": "-1",
20
+ "NVIDIA_VISIBLE_DEVICES": "void",
21
+ "SAPIEN_RENDER_DEVICE": "cpu",
22
+ }
23
+ CPU_ONLY_ENV_CLEAR_KEYS = (
24
+ "NVIDIA_DRIVER_CAPABILITIES",
25
+ "VK_ICD_FILENAMES",
26
+ "MUJOCO_GL",
27
+ )
28
 
29
 
30
 
 
37
  sys.path.insert(0, str(SRC_DIR))
38
 
39
 
40
+ def configure_cpu_only_runtime(logger: logging.Logger | None = None):
41
+ """Force CPU-only execution before importing project modules."""
42
+ cleared = {}
43
+ for key, value in CPU_ONLY_ENV_OVERRIDES.items():
44
+ os.environ[key] = value
45
+ for key in CPU_ONLY_ENV_CLEAR_KEYS:
46
+ previous = os.environ.pop(key, None)
47
+ if previous is not None:
48
+ cleared[key] = previous
49
+ if logger is not None:
50
+ logger.info(
51
+ "Configured CPU-only runtime overrides=%s cleared=%s",
52
+ CPU_ONLY_ENV_OVERRIDES,
53
+ cleared,
54
+ )
55
+ return cleared
56
+
57
+
58
+ configure_cpu_only_runtime()
59
+
60
+
61
  def setup_logging() -> logging.Logger:
62
  """Configure structured logging for Spaces runtime."""
63
  level_name = "DEBUG"
 
147
 
148
 
149
  def main():
150
+ configure_cpu_only_runtime(LOGGER)
151
  from ui_layout import CSS, create_ui_blocks
152
 
153
  LOGGER.info("Starting Gradio real environment entrypoint: %s", __file__)
gradio-web/test/test_episode_builder_cpu_backend.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ import types
5
+
6
+
7
+ class _FakeEnv:
8
+ pass
9
+
10
+
11
+ class _FakeDemonstrationWrapper:
12
+ last_env = None
13
+ last_kwargs = None
14
+
15
+ def __init__(self, env, **kwargs):
16
+ type(self).last_env = env
17
+ type(self).last_kwargs = kwargs
18
+ self.env = env
19
+
20
+
21
+ class _FakeFailAwareWrapper:
22
+ last_env = None
23
+
24
+ def __init__(self, env):
25
+ type(self).last_env = env
26
+ self.env = env
27
+
28
+
29
+ def test_builder_make_env_for_episode_forces_cpu_backends(monkeypatch, reload_module):
30
+ resolver = reload_module("robomme.env_record_wrapper.episode_config_resolver")
31
+ captured = {}
32
+
33
+ monkeypatch.setitem(
34
+ sys.modules,
35
+ "robomme.env_record_wrapper.DemonstrationWrapper",
36
+ types.SimpleNamespace(DemonstrationWrapper=_FakeDemonstrationWrapper),
37
+ )
38
+ monkeypatch.setitem(
39
+ sys.modules,
40
+ "robomme.env_record_wrapper.FailAwareWrapper",
41
+ types.SimpleNamespace(FailAwareWrapper=_FakeFailAwareWrapper),
42
+ )
43
+
44
+ def fake_make(env_id, **kwargs):
45
+ captured["env_id"] = env_id
46
+ captured["kwargs"] = kwargs
47
+ return _FakeEnv()
48
+
49
+ monkeypatch.setattr(resolver.gym, "make", fake_make)
50
+
51
+ builder = resolver.BenchmarkEnvBuilder(
52
+ env_id="BinFill",
53
+ dataset="train",
54
+ action_space="joint_angle",
55
+ gui_render=False,
56
+ )
57
+ monkeypatch.setattr(builder, "resolve_episode", lambda episode_idx: (123, "hard"))
58
+
59
+ env = builder.make_env_for_episode(7)
60
+
61
+ assert captured["env_id"] == "BinFill"
62
+ assert captured["kwargs"]["obs_mode"] == "rgb+depth+segmentation"
63
+ assert captured["kwargs"]["control_mode"] == "pd_joint_pos"
64
+ assert captured["kwargs"]["render_mode"] == "rgb_array"
65
+ assert captured["kwargs"]["reward_mode"] == "dense"
66
+ assert captured["kwargs"]["sim_backend"] == "physx_cpu"
67
+ assert captured["kwargs"]["render_backend"] == "sapien_cpu"
68
+ assert captured["kwargs"]["seed"] == 123
69
+ assert captured["kwargs"]["difficulty"] == "hard"
70
+ assert _FakeDemonstrationWrapper.last_kwargs["gui_render"] is False
71
+ assert _FakeFailAwareWrapper.last_env is env.env
gradio-web/test/test_main_launch_config.py CHANGED
@@ -1,5 +1,6 @@
1
  from __future__ import annotations
2
 
 
3
  import sys
4
  import types
5
 
@@ -15,7 +16,14 @@ class _FakeDemo:
15
  return None
16
 
17
 
18
- def test_main_launch_passes_ui_css(monkeypatch, reload_module):
 
 
 
 
 
 
 
19
  main = reload_module("main")
20
  fake_demo = _FakeDemo()
21
  fake_ui_layout = types.SimpleNamespace(
@@ -25,6 +33,12 @@ def test_main_launch_passes_ui_css(monkeypatch, reload_module):
25
 
26
  monkeypatch.setitem(sys.modules, "ui_layout", fake_ui_layout)
27
  monkeypatch.setenv("PORT", "7861")
 
 
 
 
 
 
28
 
29
  main.main()
30
 
@@ -34,3 +48,9 @@ def test_main_launch_passes_ui_css(monkeypatch, reload_module):
34
  assert fake_demo.launch_kwargs["theme"] == fake_demo.theme
35
  assert fake_demo.launch_kwargs["css"] == fake_ui_layout.CSS
36
  assert fake_demo.launch_kwargs["head"] == fake_demo.head
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
+ import os
4
  import sys
5
  import types
6
 
 
16
  return None
17
 
18
 
19
+ def test_main_launch_passes_ui_css_and_forces_cpu_runtime(monkeypatch, reload_module):
20
+ monkeypatch.setenv("CUDA_VISIBLE_DEVICES", "0")
21
+ monkeypatch.setenv("NVIDIA_VISIBLE_DEVICES", "all")
22
+ monkeypatch.setenv("SAPIEN_RENDER_DEVICE", "cuda")
23
+ monkeypatch.setenv("NVIDIA_DRIVER_CAPABILITIES", "compute,utility,graphics")
24
+ monkeypatch.setenv("VK_ICD_FILENAMES", "/tmp/nvidia_icd.json")
25
+ monkeypatch.setenv("MUJOCO_GL", "egl")
26
+
27
  main = reload_module("main")
28
  fake_demo = _FakeDemo()
29
  fake_ui_layout = types.SimpleNamespace(
 
33
 
34
  monkeypatch.setitem(sys.modules, "ui_layout", fake_ui_layout)
35
  monkeypatch.setenv("PORT", "7861")
36
+ monkeypatch.setenv("CUDA_VISIBLE_DEVICES", "2")
37
+ monkeypatch.setenv("NVIDIA_VISIBLE_DEVICES", "all")
38
+ monkeypatch.setenv("SAPIEN_RENDER_DEVICE", "cuda")
39
+ monkeypatch.setenv("NVIDIA_DRIVER_CAPABILITIES", "graphics")
40
+ monkeypatch.setenv("VK_ICD_FILENAMES", "/tmp/another_nvidia_icd.json")
41
+ monkeypatch.setenv("MUJOCO_GL", "egl")
42
 
43
  main.main()
44
 
 
48
  assert fake_demo.launch_kwargs["theme"] == fake_demo.theme
49
  assert fake_demo.launch_kwargs["css"] == fake_ui_layout.CSS
50
  assert fake_demo.launch_kwargs["head"] == fake_demo.head
51
+ assert os.environ["CUDA_VISIBLE_DEVICES"] == "-1"
52
+ assert os.environ["NVIDIA_VISIBLE_DEVICES"] == "void"
53
+ assert os.environ["SAPIEN_RENDER_DEVICE"] == "cpu"
54
+ assert "NVIDIA_DRIVER_CAPABILITIES" not in os.environ
55
+ assert "VK_ICD_FILENAMES" not in os.environ
56
+ assert "MUJOCO_GL" not in os.environ
human_readme.md CHANGED
@@ -15,7 +15,7 @@ uv sync
15
  uv pip install -e .
16
  ```
17
 
18
- ## 🐳 Gradio Docker Deployment (HF Space + GPU)
19
 
20
  This repository also supports Docker deployment for the Gradio app entrypoint:
21
 
@@ -26,23 +26,23 @@ python3 gradio-web/main.py
26
  Build image:
27
 
28
  ```bash
29
- docker build -t robomme-gradio:gpu .
30
  ```
31
 
32
- Run container (GPU + Vulkan for ManiSkill/SAPIEN):
33
 
34
  ```bash
35
- docker run --rm --gpus all -p 7860:7860 robomme-gradio:gpu
36
  ```
37
 
38
- The image sets `NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics` so the NVIDIA container runtime exposes Vulkan/graphics driver files inside the container. Without graphics capability, ManiSkill/SAPIEN may fail with `vk::createInstanceUnique: ErrorIncompatibleDriver`.
39
 
40
  Optional metadata override:
41
 
42
  ```bash
43
- docker run --rm --gpus all -p 7860:7860 \
44
  -e ROBOMME_METADATA_ROOT=/home/user/app/src/robomme/env_metadata/train \
45
- robomme-gradio:gpu
46
  ```
47
 
48
  Notes:
@@ -148,13 +148,16 @@ Want to add your model? Download the [dataset](https://huggingface.co/datasets/Y
148
 
149
  A1: Use a physical display or set up a virtual display for GUI rendering (e.g. install a VNC server and set the `DISPLAY` variable correctly).
150
 
151
- **Q2: Failure related to Vulkan installation.**
152
 
153
- A2: ManiSkill/SAPIEN requires both Vulkan userspace packages inside the container and NVIDIA graphics capability exposed by the container runtime. This image installs `libvulkan1`, `vulkan-tools`, and `libglvnd-dev`, and sets `NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics`. If it still does not work, first verify the host machine itself supports Vulkan (`vulkaninfo` on the host), then switch to CPU rendering:
154
 
155
  ```python
 
 
156
  os.environ['SAPIEN_RENDER_DEVICE'] = 'cpu'
157
- os.environ['MUJOCO_GL'] = 'osmesa'
 
158
  ```
159
 
160
 
 
15
  uv pip install -e .
16
  ```
17
 
18
+ ## 🐳 Gradio Docker Deployment (HF Space CPU-only)
19
 
20
  This repository also supports Docker deployment for the Gradio app entrypoint:
21
 
 
26
  Build image:
27
 
28
  ```bash
29
+ docker build -t robomme-gradio:cpu .
30
  ```
31
 
32
+ Run container:
33
 
34
  ```bash
35
+ docker run --rm -p 7860:7860 robomme-gradio:cpu
36
  ```
37
 
38
+ The container forces CPU-only ManiSkill/SAPIEN backends and does not require NVIDIA runtime or `--gpus all`, which keeps it aligned with Hugging Face Docker Spaces CPU deployments.
39
 
40
  Optional metadata override:
41
 
42
  ```bash
43
+ docker run --rm -p 7860:7860 \
44
  -e ROBOMME_METADATA_ROOT=/home/user/app/src/robomme/env_metadata/train \
45
+ robomme-gradio:cpu
46
  ```
47
 
48
  Notes:
 
148
 
149
  A1: Use a physical display or set up a virtual display for GUI rendering (e.g. install a VNC server and set the `DISPLAY` variable correctly).
150
 
151
+ **Q2: Failure related to ManiSkill/SAPIEN rendering initialization.**
152
 
153
+ A2: This Docker image is configured for CPU-only execution and should not rely on NVIDIA runtime settings. If rendering still fails, first check that no external environment variables are forcing GPU paths, then keep the container on the CPU-only defaults:
154
 
155
  ```python
156
+ os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
157
+ os.environ['NVIDIA_VISIBLE_DEVICES'] = 'void'
158
  os.environ['SAPIEN_RENDER_DEVICE'] = 'cpu'
159
+ os.environ.pop('VK_ICD_FILENAMES', None)
160
+ os.environ.pop('MUJOCO_GL', None)
161
  ```
162
 
163
 
src/robomme/env_record_wrapper/episode_config_resolver.py CHANGED
@@ -195,6 +195,8 @@ class BenchmarkEnvBuilder:
195
  control_mode="pd_joint_pos",
196
  render_mode=self.render_mode,
197
  reward_mode="dense",
 
 
198
  )
199
  if seed is not None:
200
  env_kwargs["seed"] = seed
 
195
  control_mode="pd_joint_pos",
196
  render_mode=self.render_mode,
197
  reward_mode="dense",
198
+ sim_backend="physx_cpu",
199
+ render_backend="sapien_cpu",
200
  )
201
  if seed is not None:
202
  env_kwargs["seed"] = seed