Zach Wentz commited on
Commit
d0ae716
Β·
1 Parent(s): 2ed0abb

πŸ€– Deploy atari_env environment - 2025-10-19 22:32:33

Browse files
Files changed (47) hide show
  1. .gitattributes +0 -35
  2. Dockerfile +49 -0
  3. README.md +44 -5
  4. src/core/__init__.py +19 -0
  5. src/core/__pycache__/__init__.cpython-311.pyc +0 -0
  6. src/core/__pycache__/__init__.cpython-313.pyc +0 -0
  7. src/core/__pycache__/http_env_client.cpython-311.pyc +0 -0
  8. src/core/__pycache__/types.cpython-311.pyc +0 -0
  9. src/core/containers/__init__.py +7 -0
  10. src/core/containers/__pycache__/__init__.cpython-311.pyc +0 -0
  11. src/core/containers/images/Dockerfile +46 -0
  12. src/core/containers/images/README.md +92 -0
  13. src/core/containers/runtime/__init__.py +15 -0
  14. src/core/containers/runtime/__pycache__/__init__.cpython-311.pyc +0 -0
  15. src/core/containers/runtime/__pycache__/providers.cpython-311.pyc +0 -0
  16. src/core/containers/runtime/providers.py +289 -0
  17. src/core/containers/test_local_docker_provider.py +258 -0
  18. src/core/env_server/__init__.py +35 -0
  19. src/core/env_server/__pycache__/__init__.cpython-311.pyc +0 -0
  20. src/core/env_server/__pycache__/__init__.cpython-313.pyc +0 -0
  21. src/core/env_server/__pycache__/base_transforms.cpython-311.pyc +0 -0
  22. src/core/env_server/__pycache__/base_transforms.cpython-313.pyc +0 -0
  23. src/core/env_server/__pycache__/http_server.cpython-311.pyc +0 -0
  24. src/core/env_server/__pycache__/http_server.cpython-313.pyc +0 -0
  25. src/core/env_server/__pycache__/interfaces.cpython-311.pyc +0 -0
  26. src/core/env_server/__pycache__/interfaces.cpython-313.pyc +0 -0
  27. src/core/env_server/__pycache__/types.cpython-311.pyc +0 -0
  28. src/core/env_server/__pycache__/types.cpython-313.pyc +0 -0
  29. src/core/env_server/__pycache__/web_interface.cpython-311.pyc +0 -0
  30. src/core/env_server/base_transforms.py +29 -0
  31. src/core/env_server/http_server.py +231 -0
  32. src/core/env_server/interfaces.py +118 -0
  33. src/core/env_server/types.py +45 -0
  34. src/core/env_server/web_interface.py +764 -0
  35. src/core/http_env_client.py +175 -0
  36. src/core/tools/__init__.py +11 -0
  37. src/core/tools/local_python_executor.py +105 -0
  38. src/core/types.py +22 -0
  39. src/envs/atari_env/README.md +383 -0
  40. src/envs/atari_env/__init__.py +31 -0
  41. src/envs/atari_env/client.py +118 -0
  42. src/envs/atari_env/models.py +86 -0
  43. src/envs/atari_env/server/Dockerfile +43 -0
  44. src/envs/atari_env/server/__init__.py +15 -0
  45. src/envs/atari_env/server/app.py +73 -0
  46. src/envs/atari_env/server/atari_environment.py +245 -0
  47. src/envs/atari_env/test_atari_docker.sh +333 -0
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Multi-stage build: First stage builds the base image
8
+ FROM python:3.11-slim as base-builder
9
+
10
+ # Install system dependencies
11
+ RUN apt-get update && apt-get install -y --no-install-recommends \
12
+ curl \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Install Python dependencies that all environments need
16
+ RUN pip install --no-cache-dir \
17
+ fastapi>=0.104.0 \
18
+ "uvicorn[standard]>=0.24.0" \
19
+ requests>=2.25.0 \
20
+ wsproto>=1.0.0
21
+
22
+ # Set working directory
23
+ WORKDIR /app
24
+
25
+ # Default environment variables
26
+ ENV PYTHONPATH=/app/src
27
+ ENV PYTHONUNBUFFERED=1
28
+
29
+ # Second stage: Use the built base image and add environment-specific dependencies
30
+ FROM base-builder
31
+
32
+ # Install ALE-specific dependencies
33
+ RUN pip install --no-cache-dir \
34
+ gymnasium>=0.29.0 \
35
+ ale-py>=0.8.0 \
36
+ numpy>=1.24.0
37
+
38
+
39
+ # Copy only what's needed for this environment
40
+ COPY src/core/ /app/src/core/
41
+ COPY src/envs/atari_env/ /app/src/envs/atari_env/
42
+
43
+ # Health check
44
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
45
+ CMD curl -f http://localhost:8000/health || exit 1
46
+
47
+ # Run the FastAPI server
48
+ CMD ["uvicorn", "envs.atari_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
49
+ ENV ENABLE_WEB_INTERFACE=true
README.md CHANGED
@@ -1,10 +1,49 @@
1
  ---
2
- title: Atari Env
3
- emoji: πŸ¦€
4
- colorFrom: red
5
- colorTo: gray
6
  sdk: docker
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Atari_env Environment Server
3
+ emoji: 🐳
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: docker
7
  pinned: false
8
+ app_port: 8000
9
+ base_path: /web
10
  ---
11
 
12
+ # Atari_env Environment Server
13
+
14
+ FastAPI server for atari_env environment powered by Meta's OpenEnv.
15
+
16
+ ## About
17
+
18
+ This Space provides a containerized environment for atari_env interactions.
19
+ Built with FastAPI and OpenEnv framework.
20
+
21
+ ## Web Interface
22
+
23
+ This deployment includes an interactive web interface for exploring the environment:
24
+ - **HumanAgent Interface**: Interact with the environment using a web form
25
+ - **State Observer**: Real-time view of environment state and action history
26
+ - **Live Updates**: WebSocket-based real-time updates
27
+
28
+ Access the web interface at: `/web`
29
+
30
+ ## Atari Environment
31
+
32
+ Provides Atari 2600 games via the Arcade Learning Environment (ALE).
33
+
34
+ ### Usage
35
+ Send a POST request to `/step` with:
36
+ ```json
37
+ {
38
+ "action_id": 0,
39
+ "game_name": "pong"
40
+ }
41
+ ```
42
+
43
+ ## API Documentation
44
+
45
+ Visit `/docs` for interactive API documentation.
46
+
47
+ ## Health Check
48
+
49
+ The environment provides a health check endpoint at `/health`.
src/core/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Core components for agentic environments."""
8
+
9
+ # Re-export main components from submodules for convenience
10
+ from .env_server import *
11
+ from .http_env_client import HTTPEnvClient
12
+ from .types import StepResult
13
+
14
+ # Note: MCP module doesn't export anything yet
15
+
16
+ __all__ = [
17
+ "HTTPEnvClient",
18
+ "StepResult",
19
+ ]
src/core/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (400 Bytes). View file
 
src/core/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (383 Bytes). View file
 
src/core/__pycache__/http_env_client.cpython-311.pyc ADDED
Binary file (7.68 kB). View file
 
src/core/__pycache__/types.cpython-311.pyc ADDED
Binary file (1.09 kB). View file
 
src/core/containers/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Container management for environment servers."""
src/core/containers/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (206 Bytes). View file
 
src/core/containers/images/Dockerfile ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ #
8
+ # OpenEnv Base Image
9
+ #
10
+ # This is the standard base image for all OpenEnv environment servers.
11
+ # It includes the minimal dependencies needed to run HTTP environment servers.
12
+ #
13
+ # Build: docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
14
+ # Tag: docker tag openenv-base:latest openenv-base:0.1.0
15
+ #
16
+
17
+ FROM python:3.11-slim
18
+
19
+ # Set metadata
20
+ LABEL maintainer="OpenEnv Team"
21
+ LABEL description="Base image for OpenEnv based environment servers"
22
+ LABEL version="0.1.0"
23
+
24
+ # Install system dependencies
25
+ RUN apt-get update && apt-get install -y --no-install-recommends \
26
+ curl \
27
+ && rm -rf /var/lib/apt/lists/*
28
+
29
+ # Install Python dependencies that all environments need
30
+ RUN pip install --no-cache-dir \
31
+ fastapi>=0.104.0 \
32
+ "uvicorn[standard]>=0.24.0" \
33
+ requests>=2.25.0 \
34
+ wsproto>=1.0.0
35
+
36
+ # Set working directory
37
+ WORKDIR /app
38
+
39
+ # Default environment variables
40
+ ENV PYTHONPATH=/app/src
41
+ ENV PYTHONUNBUFFERED=1
42
+
43
+ # Default expose port (can be overridden)
44
+ EXPOSE 8000
45
+
46
+ # Note: CMD should be specified in child Dockerfiles
src/core/containers/images/README.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # OpenEnv Base Image
2
+
3
+ Standard base image for all OpenEnv environment servers.
4
+
5
+ ## What's Included
6
+
7
+ | Layer | Size | Contents |
8
+ |-------|------|----------|
9
+ | python:3.11-slim | 200 MB | Base Python runtime |
10
+ | + Dependencies | 100 MB | FastAPI, uvicorn, requests |
11
+ | **Total** | **~300 MB** | Ready for environment servers |
12
+
13
+ ## Image Sizes
14
+
15
+ ```
16
+ openenv-base:latest 300 MB (python + fastapi + uvicorn)
17
+ ```
18
+ echo-env:latest 500 MB (python + fastapi + uvicorn + app)
19
+ coding-env:latest 520 MB (python + fastapi + uvicorn + app + tools)
20
+ another-env:latest 510 MB (python + fastapi + uvicorn + app)
21
+ ---
22
+ Total: 1.5 GB (with lots of duplication)
23
+ ```
24
+
25
+ ### With Base Images (βœ… Solution)
26
+ ```
27
+ openenv-base:latest 300 MB (python + fastapi + uvicorn)
28
+ echo-env:latest 50 MB (app only, uses base)
29
+ coding-env:latest 70 MB (app + tools, uses base)
30
+ another-env:latest 45 MB (app only, uses base)
31
+ ---
32
+ Total: 465 MB (base shared, minimal duplication)
33
+ ```
34
+
35
+ ## Building the Base Image
36
+
37
+ ```bash
38
+ # From project root
39
+ docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
40
+ ```
41
+
42
+ ## Usage in Environment Dockerfiles
43
+
44
+ Each environment Dockerfile should start with:
45
+
46
+ ```dockerfile
47
+ FROM openenv-base:latest
48
+
49
+ # Copy only environment-specific files
50
+ COPY src/core/ /app/src/core/
51
+ COPY src/envs/my_env/ /app/src/envs/my_env/
52
+
53
+ # Run the server
54
+ CMD ["uvicorn", "envs.my_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
55
+ ```
56
+
57
+ ## Base Image Contents
58
+
59
+ - Python 3.11-slim
60
+ - FastAPI >= 0.104.0
61
+ - Uvicorn >= 0.24.0
62
+ - Requests >= 2.25.0
63
+ - curl (for health checks)
64
+
65
+ ## Example: Building Echo Environment
66
+
67
+ ```bash
68
+ # Step 1: Build base image (do this once)
69
+ docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
70
+
71
+ # Step 2: Build echo environment (uses base)
72
+ docker build -t echo-env:latest -f src/envs/echo_env/server/Dockerfile .
73
+
74
+ # Step 3: Run echo environment
75
+ docker run -p 8000:8000 echo-env:latest
76
+ ```
77
+
78
+ ## Updating the Base
79
+
80
+ When dependencies need updating:
81
+
82
+ 1. Update `src/core/containers/images/Dockerfile`
83
+ 2. Rebuild base image
84
+ 3. Rebuild all environment images (they'll use new base)
85
+
86
+ ```bash
87
+ # Update base
88
+ docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
89
+
90
+ # Rebuild environments (they automatically use new base)
91
+ docker build -t echo-env:latest -f src/envs/echo_env/server/Dockerfile .
92
+ ```
src/core/containers/runtime/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Container runtime providers."""
8
+
9
+ from .providers import ContainerProvider, KubernetesProvider, LocalDockerProvider
10
+
11
+ __all__ = [
12
+ "ContainerProvider",
13
+ "LocalDockerProvider",
14
+ "KubernetesProvider",
15
+ ]
src/core/containers/runtime/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (389 Bytes). View file
 
src/core/containers/runtime/__pycache__/providers.cpython-311.pyc ADDED
Binary file (10.9 kB). View file
 
src/core/containers/runtime/providers.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Container provider abstractions for running environment servers.
9
+
10
+ This module provides a pluggable architecture for different container providers
11
+ (local Docker, Kubernetes, cloud providers, etc.) to be used with HTTPEnvClient.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from abc import ABC, abstractmethod
17
+ from typing import Any, Dict, Optional
18
+
19
+
20
+ class ContainerProvider(ABC):
21
+ """
22
+ Abstract base class for container providers.
23
+
24
+ Providers implement this interface to support different container platforms:
25
+ - LocalDockerProvider: Runs containers on local Docker daemon
26
+ - KubernetesProvider: Runs containers in Kubernetes cluster
27
+ - FargateProvider: Runs containers on AWS Fargate
28
+ - CloudRunProvider: Runs containers on Google Cloud Run
29
+
30
+ The provider manages a single container lifecycle and provides the base URL
31
+ for connecting to it.
32
+
33
+ Example:
34
+ >>> provider = LocalDockerProvider()
35
+ >>> base_url = provider.start_container("echo-env:latest")
36
+ >>> print(base_url) # http://localhost:8000
37
+ >>> # Use the environment via base_url
38
+ >>> provider.stop_container()
39
+ """
40
+
41
+ @abstractmethod
42
+ def start_container(
43
+ self,
44
+ image: str,
45
+ port: Optional[int] = None,
46
+ env_vars: Optional[Dict[str, str]] = None,
47
+ **kwargs: Any,
48
+ ) -> str:
49
+ """
50
+ Start a container from the specified image.
51
+
52
+ Args:
53
+ image: Container image name (e.g., "echo-env:latest")
54
+ port: Port to expose (if None, provider chooses)
55
+ env_vars: Environment variables to pass to container
56
+ **kwargs: Provider-specific options
57
+
58
+ Returns:
59
+ Base URL to connect to the container (e.g., "http://localhost:8000")
60
+
61
+ Raises:
62
+ RuntimeError: If container fails to start
63
+ """
64
+ pass
65
+
66
+ @abstractmethod
67
+ def stop_container(self) -> None:
68
+ """
69
+ Stop and remove the running container.
70
+
71
+ This cleans up the container that was started by start_container().
72
+ """
73
+ pass
74
+
75
+ @abstractmethod
76
+ def wait_for_ready(self, base_url: str, timeout_s: float = 30.0) -> None:
77
+ """
78
+ Wait for the container to be ready to accept requests.
79
+
80
+ This typically polls the /health endpoint until it returns 200.
81
+
82
+ Args:
83
+ base_url: Base URL of the container
84
+ timeout_s: Maximum time to wait
85
+
86
+ Raises:
87
+ TimeoutError: If container doesn't become ready in time
88
+ """
89
+ pass
90
+
91
+
92
+ class LocalDockerProvider(ContainerProvider):
93
+ """
94
+ Container provider for local Docker daemon.
95
+
96
+ This provider runs containers on the local machine using Docker.
97
+ Useful for development and testing.
98
+
99
+ Example:
100
+ >>> provider = LocalDockerProvider()
101
+ >>> base_url = provider.start_container("echo-env:latest")
102
+ >>> # Container running on http://localhost:<random-port>
103
+ >>> provider.stop_container()
104
+ """
105
+
106
+ def __init__(self):
107
+ """Initialize the local Docker provider."""
108
+ self._container_id: Optional[str] = None
109
+ self._container_name: Optional[str] = None
110
+
111
+ # Check if Docker is available
112
+ import subprocess
113
+
114
+ try:
115
+ subprocess.run(
116
+ ["docker", "version"],
117
+ check=True,
118
+ capture_output=True,
119
+ timeout=5,
120
+ )
121
+ except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
122
+ raise RuntimeError(
123
+ "Docker is not available. Please install Docker Desktop or Docker Engine."
124
+ )
125
+
126
+ def start_container(
127
+ self,
128
+ image: str,
129
+ port: Optional[int] = None,
130
+ env_vars: Optional[Dict[str, str]] = None,
131
+ **kwargs: Any,
132
+ ) -> str:
133
+ """
134
+ Start a Docker container locally.
135
+
136
+ Args:
137
+ image: Docker image name
138
+ port: Port to expose (if None, finds available port)
139
+ env_vars: Environment variables for the container
140
+ **kwargs: Additional Docker run options
141
+
142
+ Returns:
143
+ Base URL to connect to the container
144
+ """
145
+ import subprocess
146
+ import time
147
+
148
+ # Find available port if not specified
149
+ if port is None:
150
+ port = self._find_available_port()
151
+
152
+ # Generate container name
153
+ self._container_name = self._generate_container_name(image)
154
+
155
+ # Build docker run command
156
+ cmd = [
157
+ "docker", "run",
158
+ "-d", # Detached
159
+ "--name", self._container_name,
160
+ "-p", f"{port}:8000", # Map port
161
+ ]
162
+
163
+ # Add environment variables
164
+ if env_vars:
165
+ for key, value in env_vars.items():
166
+ cmd.extend(["-e", f"{key}={value}"])
167
+
168
+ # Add image
169
+ cmd.append(image)
170
+
171
+ # Run container
172
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
173
+ self._container_id = result.stdout.strip()
174
+
175
+ # Wait a moment for container to start
176
+ time.sleep(1)
177
+
178
+ base_url = f"http://localhost:{port}"
179
+ return base_url
180
+
181
+ def stop_container(self) -> None:
182
+ """
183
+ Stop and remove the Docker container.
184
+ """
185
+ if self._container_id is None:
186
+ return
187
+
188
+ import subprocess
189
+
190
+ try:
191
+ # Stop container
192
+ subprocess.run(
193
+ ["docker", "stop", self._container_id],
194
+ capture_output=True,
195
+ check=True,
196
+ timeout=10,
197
+ )
198
+
199
+ # Remove container
200
+ subprocess.run(
201
+ ["docker", "rm", self._container_id],
202
+ capture_output=True,
203
+ check=True,
204
+ timeout=10,
205
+ )
206
+ except subprocess.CalledProcessError:
207
+ # Container might already be stopped/removed
208
+ pass
209
+ finally:
210
+ self._container_id = None
211
+ self._container_name = None
212
+
213
+ def wait_for_ready(self, base_url: str, timeout_s: float = 30.0) -> None:
214
+ """
215
+ Wait for container to be ready by polling /health endpoint.
216
+
217
+ Args:
218
+ base_url: Base URL of the container
219
+ timeout_s: Maximum time to wait
220
+
221
+ Raises:
222
+ TimeoutError: If container doesn't become ready
223
+ """
224
+ import time
225
+ import requests
226
+
227
+ start_time = time.time()
228
+ health_url = f"{base_url}/health"
229
+
230
+ while time.time() - start_time < timeout_s:
231
+ try:
232
+ response = requests.get(health_url, timeout=2.0)
233
+ if response.status_code == 200:
234
+ return
235
+ except requests.RequestException:
236
+ pass
237
+
238
+ time.sleep(0.5)
239
+
240
+ raise TimeoutError(
241
+ f"Container at {base_url} did not become ready within {timeout_s}s"
242
+ )
243
+
244
+ def _find_available_port(self) -> int:
245
+ """
246
+ Find an available port on localhost.
247
+
248
+ Returns:
249
+ An available port number
250
+ """
251
+ import socket
252
+
253
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
254
+ s.bind(("", 0))
255
+ s.listen(1)
256
+ port = s.getsockname()[1]
257
+ return port
258
+
259
+ def _generate_container_name(self, image: str) -> str:
260
+ """
261
+ Generate a unique container name based on image name and timestamp.
262
+
263
+ Args:
264
+ image: Docker image name
265
+
266
+ Returns:
267
+ A unique container name
268
+ """
269
+ import time
270
+
271
+ clean_image = image.split("/")[-1].split(":")[0]
272
+ timestamp = int(time.time() * 1000)
273
+ return f"{clean_image}-{timestamp}"
274
+
275
+
276
+ class KubernetesProvider(ContainerProvider):
277
+ """
278
+ Container provider for Kubernetes clusters.
279
+
280
+ This provider creates pods in a Kubernetes cluster and exposes them
281
+ via services or port-forwarding.
282
+
283
+ Example:
284
+ >>> provider = KubernetesProvider(namespace="envtorch-dev")
285
+ >>> base_url = provider.start_container("echo-env:latest")
286
+ >>> # Pod running in k8s, accessible via service or port-forward
287
+ >>> provider.stop_container()
288
+ """
289
+ pass
src/core/containers/test_local_docker_provider.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ End-to-end test for LocalDockerProvider.
4
+
5
+ This script tests the complete flow:
6
+ 1. Start a container using LocalDockerProvider
7
+ 2. Wait for it to be ready
8
+ 3. Make HTTP requests to test the environment
9
+ 4. Clean up the container
10
+ """
11
+
12
+ import sys
13
+ from pathlib import Path
14
+
15
+ # Add src to path
16
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
17
+
18
+ import requests
19
+
20
+ from core.containers.runtime import LocalDockerProvider
21
+
22
+ # TODO: Remove this test or make it a functional test sicne this will be tested in e2e test for echo env
23
+ def test_local_docker_provider():
24
+ """Test LocalDockerProvider end-to-end."""
25
+ print("=" * 60)
26
+ print("LocalDockerProvider End-to-End Test")
27
+ print("=" * 60)
28
+ print()
29
+
30
+ provider = None
31
+
32
+ try:
33
+ # Step 1: Create provider
34
+ print("Step 1: Creating LocalDockerProvider...")
35
+ provider = LocalDockerProvider()
36
+ print("βœ“ Provider created\n")
37
+
38
+ # Step 2: Start container
39
+ print("Step 2: Starting echo-env container...")
40
+ base_url = provider.start_container("echo-env:latest")
41
+ print(f"βœ“ Container started at: {base_url}")
42
+ if provider._container_id:
43
+ print(f" Container ID: {provider._container_id[:12]}...")
44
+ if provider._container_name:
45
+ print(f" Container name: {provider._container_name}\n")
46
+
47
+ # Step 3: Wait for ready
48
+ print("Step 3: Waiting for container to be ready...")
49
+ provider.wait_for_ready(base_url, timeout_s=30.0)
50
+ print("βœ“ Container is ready!\n")
51
+
52
+ # Step 4: Test health endpoint
53
+ print("Step 4: Testing /health endpoint...")
54
+ response = requests.get(f"{base_url}/health")
55
+ print(f" Status: {response.status_code}")
56
+ print(f" Response: {response.json()}")
57
+ assert response.status_code == 200
58
+ assert response.json()["status"] == "healthy"
59
+ print("βœ“ Health check passed\n")
60
+
61
+ # Step 5: Test reset endpoint
62
+ print("Step 5: Testing /reset endpoint...")
63
+ response = requests.post(
64
+ f"{base_url}/reset",
65
+ json={},
66
+ headers={"Content-Type": "application/json"},
67
+ )
68
+ print(f" Status: {response.status_code}")
69
+ data = response.json()
70
+ print(f" Message: {data['observation']['echoed_message']}")
71
+ print(f" Reward: {data['reward']}")
72
+ print(f" Done: {data['done']}")
73
+ assert response.status_code == 200
74
+ assert data["observation"]["echoed_message"] == "Echo environment ready!"
75
+ print("βœ“ Reset test passed\n")
76
+
77
+ # Step 6: Test step endpoint
78
+ print("Step 6: Testing /step endpoint...")
79
+ response = requests.post(
80
+ f"{base_url}/step",
81
+ json={"action": {"message": "Hello from LocalDockerProvider!"}},
82
+ headers={"Content-Type": "application/json"},
83
+ )
84
+ print(f" Status: {response.status_code}")
85
+ data = response.json()
86
+ print(f" Echoed: {data['observation']['echoed_message']}")
87
+ print(f" Length: {data['observation']['message_length']}")
88
+ print(f" Reward: {data['reward']}")
89
+ assert response.status_code == 200
90
+ assert data["observation"]["echoed_message"] == "Hello from LocalDockerProvider!"
91
+ assert data["observation"]["message_length"] == 31
92
+ print("βœ“ Step test passed\n")
93
+
94
+ # Step 7: Test state endpoint
95
+ print("Step 7: Testing /state endpoint...")
96
+ response = requests.get(f"{base_url}/state")
97
+ print(f" Status: {response.status_code}")
98
+ data = response.json()
99
+ print(f" Episode ID: {data['episode_id']}")
100
+ print(f" Step count: {data['step_count']}")
101
+ assert response.status_code == 200
102
+ assert data["step_count"] == 1 # One step from above
103
+ print("βœ“ State test passed\n")
104
+
105
+ # Step 8: Multiple steps
106
+ print("Step 8: Testing multiple steps...")
107
+ for i in range(3):
108
+ response = requests.post(
109
+ f"{base_url}/step",
110
+ json={"action": {"message": f"Message {i+1}"}},
111
+ headers={"Content-Type": "application/json"},
112
+ )
113
+ assert response.status_code == 200
114
+ print(f" Step {i+1}: βœ“")
115
+
116
+ # Check state updated
117
+ response = requests.get(f"{base_url}/state")
118
+ data = response.json()
119
+ assert data["step_count"] == 4 # 1 + 3 more steps
120
+ print(f" Final step count: {data['step_count']}")
121
+ print("βœ“ Multiple steps test passed\n")
122
+
123
+ print("=" * 60)
124
+ print("βœ“ All tests passed!")
125
+ print("=" * 60)
126
+ print()
127
+
128
+ return True
129
+
130
+ except Exception as e:
131
+ print(f"\n❌ Test failed: {e}")
132
+ import traceback
133
+ traceback.print_exc()
134
+ return False
135
+
136
+ finally:
137
+ # Step 9: Cleanup
138
+ if provider is not None:
139
+ print("\nStep 9: Cleaning up container...")
140
+ try:
141
+ provider.stop_container()
142
+ print("βœ“ Container stopped and removed\n")
143
+ except Exception as e:
144
+ print(f"⚠️ Cleanup warning: {e}\n")
145
+
146
+
147
+ def test_provider_with_custom_port():
148
+ """Test provider with custom port."""
149
+ print("=" * 60)
150
+ print("LocalDockerProvider with Custom Port Test")
151
+ print("=" * 60)
152
+ print()
153
+
154
+ provider = None
155
+
156
+ try:
157
+ provider = LocalDockerProvider()
158
+
159
+ print("Starting container on custom port 8123...")
160
+ base_url = provider.start_container("echo-env:latest", port=8123)
161
+ print(f"βœ“ Started at: {base_url}")
162
+ assert ":8123" in base_url
163
+
164
+ print("Waiting for ready...")
165
+ provider.wait_for_ready(base_url)
166
+ print("βœ“ Ready!")
167
+
168
+ print("Testing health...")
169
+ response = requests.get(f"{base_url}/health")
170
+ assert response.status_code == 200
171
+ print("βœ“ Health check passed")
172
+
173
+ print("\nβœ“ Custom port test passed!\n")
174
+ return True
175
+
176
+ except Exception as e:
177
+ print(f"\n❌ Test failed: {e}")
178
+ return False
179
+
180
+ finally:
181
+ if provider is not None:
182
+ provider.stop_container()
183
+ print("βœ“ Cleaned up\n")
184
+
185
+
186
+ def test_provider_with_env_vars():
187
+ """Test provider with environment variables."""
188
+ print("=" * 60)
189
+ print("LocalDockerProvider with Environment Variables Test")
190
+ print("=" * 60)
191
+ print()
192
+
193
+ provider = None
194
+
195
+ try:
196
+ provider = LocalDockerProvider()
197
+
198
+ print("Starting container with environment variables...")
199
+ base_url = provider.start_container(
200
+ "echo-env:latest",
201
+ env_vars={"DEBUG": "true", "LOG_LEVEL": "info"}
202
+ )
203
+ print(f"βœ“ Started at: {base_url}")
204
+
205
+ print("Waiting for ready...")
206
+ provider.wait_for_ready(base_url)
207
+ print("βœ“ Ready!")
208
+
209
+ print("Testing health...")
210
+ response = requests.get(f"{base_url}/health")
211
+ assert response.status_code == 200
212
+ print("βœ“ Health check passed")
213
+
214
+ print("\nβœ“ Environment variables test passed!\n")
215
+ return True
216
+
217
+ except Exception as e:
218
+ print(f"\n❌ Test failed: {e}")
219
+ return False
220
+
221
+ finally:
222
+ if provider is not None:
223
+ provider.stop_container()
224
+ print("βœ“ Cleaned up\n")
225
+
226
+
227
+ if __name__ == "__main__":
228
+ print()
229
+ print("🐳 LocalDockerProvider Test Suite")
230
+ print()
231
+
232
+ results = []
233
+
234
+ # Run basic test
235
+ results.append(("Basic End-to-End", test_local_docker_provider()))
236
+
237
+ # Run custom port test
238
+ results.append(("Custom Port", test_provider_with_custom_port()))
239
+
240
+ # Run environment variables test
241
+ results.append(("Environment Variables", test_provider_with_env_vars()))
242
+
243
+ # Summary
244
+ print("=" * 60)
245
+ print("Test Summary")
246
+ print("=" * 60)
247
+ for name, passed in results:
248
+ status = "βœ“ PASSED" if passed else "βœ— FAILED"
249
+ print(f"{name:25} {status}")
250
+ print("=" * 60)
251
+
252
+ all_passed = all(result for _, result in results)
253
+ if all_passed:
254
+ print("\nπŸŽ‰ All tests passed!")
255
+ exit(0)
256
+ else:
257
+ print("\n❌ Some tests failed")
258
+ exit(1)
src/core/env_server/__init__.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Core environment interfaces and types."""
8
+
9
+ from .base_transforms import CompositeTransform, NullTransform
10
+ from .http_server import HTTPEnvServer, create_app, create_fastapi_app
11
+ from .interfaces import Environment, Message, ModelTokenizer, Transform
12
+ from .types import Action, Observation, State
13
+ from .web_interface import create_web_interface_app, WebInterfaceManager
14
+
15
+ __all__ = [
16
+ # Core interfaces
17
+ "Environment",
18
+ "Transform",
19
+ "Message",
20
+ "ModelTokenizer",
21
+ # Types
22
+ "Action",
23
+ "Observation",
24
+ "State",
25
+ # Base transforms
26
+ "CompositeTransform",
27
+ "NullTransform",
28
+ # HTTP Server
29
+ "HTTPEnvServer",
30
+ "create_app",
31
+ "create_fastapi_app",
32
+ # Web Interface
33
+ "create_web_interface_app",
34
+ "WebInterfaceManager",
35
+ ]
src/core/env_server/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (898 Bytes). View file
 
src/core/env_server/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (940 Bytes). View file
 
src/core/env_server/__pycache__/base_transforms.cpython-311.pyc ADDED
Binary file (1.67 kB). View file
 
src/core/env_server/__pycache__/base_transforms.cpython-313.pyc ADDED
Binary file (1.57 kB). View file
 
src/core/env_server/__pycache__/http_server.cpython-311.pyc ADDED
Binary file (9.2 kB). View file
 
src/core/env_server/__pycache__/http_server.cpython-313.pyc ADDED
Binary file (7.14 kB). View file
 
src/core/env_server/__pycache__/interfaces.cpython-311.pyc ADDED
Binary file (5.22 kB). View file
 
src/core/env_server/__pycache__/interfaces.cpython-313.pyc ADDED
Binary file (4.68 kB). View file
 
src/core/env_server/__pycache__/types.cpython-311.pyc ADDED
Binary file (2.39 kB). View file
 
src/core/env_server/__pycache__/types.cpython-313.pyc ADDED
Binary file (2.1 kB). View file
 
src/core/env_server/__pycache__/web_interface.cpython-311.pyc ADDED
Binary file (29.9 kB). View file
 
src/core/env_server/base_transforms.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Base transform implementations for composing environment-specific transforms."""
8
+
9
+ from .interfaces import Transform
10
+ from .types import Observation
11
+
12
+
13
+ class CompositeTransform(Transform):
14
+ """Combines multiple transforms into a single transform."""
15
+
16
+ def __init__(self, transforms: list[Transform]):
17
+ self.transforms = transforms
18
+
19
+ def __call__(self, observation: Observation) -> Observation:
20
+ for transform in self.transforms:
21
+ observation = transform(observation)
22
+ return observation
23
+
24
+
25
+ class NullTransform(Transform):
26
+ """Default transform that passes through unchanged."""
27
+
28
+ def __call__(self, observation: Observation) -> Observation:
29
+ return observation
src/core/env_server/http_server.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ HTTP server wrapper for Environment instances.
9
+
10
+ This module provides utilities to wrap any Environment subclass and expose it
11
+ over HTTP endpoints that HTTPEnvClient can consume.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import os
17
+ from dataclasses import asdict
18
+ from typing import Any, Dict, Type
19
+
20
+ from .interfaces import Environment
21
+ from .types import Action, Observation
22
+ from fastapi import Body, FastAPI
23
+
24
+ class HTTPEnvServer:
25
+ """
26
+ HTTP server wrapper for Environment instances.
27
+
28
+ This class wraps an Environment and exposes its reset(), step(), and state
29
+ methods as HTTP endpoints compatible with HTTPEnvClient.
30
+
31
+ The server expects:
32
+ - Action deserialization: Converts JSON dict to Action subclass
33
+ - Observation serialization: Converts Observation subclass to JSON dict
34
+
35
+ Example:
36
+ >>> from core.env_server import HTTPEnvServer
37
+ >>> from envs.coding_env.server import CodeExecutionEnvironment
38
+ >>>
39
+ >>> env = CodeExecutionEnvironment()
40
+ >>> server = HTTPEnvServer(env)
41
+ >>>
42
+ >>> # Register routes with FastAPI
43
+ >>> from fastapi import FastAPI
44
+ >>> app = FastAPI()
45
+ >>> server.register_routes(app)
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ env: Environment,
51
+ action_cls: Type[Action],
52
+ observation_cls: Type[Observation],
53
+ ):
54
+ """
55
+ Initialize HTTP server wrapper.
56
+
57
+ Args:
58
+ env: The Environment instance to wrap
59
+ action_cls: The Action subclass this environment expects
60
+ observation_cls: The Observation subclass this environment returns
61
+ """
62
+ self.env = env
63
+ self.action_cls = action_cls
64
+ self.observation_cls = observation_cls
65
+
66
+ def register_routes(self, app: Any) -> None:
67
+ """
68
+ Register HTTP routes on a FastAPI application.
69
+
70
+ Args:
71
+ app: FastAPI application instance
72
+ """
73
+
74
+ if not isinstance(app, FastAPI):
75
+ raise TypeError("app must be a FastAPI instance")
76
+
77
+ @app.post("/reset")
78
+ async def reset(request: Dict[str, Any] = Body(default={})) -> Dict[str, Any]:
79
+ """Reset endpoint - returns initial observation."""
80
+ # TODO: Handle seed, episode_id from request if provided
81
+ observation = self.env.reset()
82
+ return self._serialize_observation(observation)
83
+
84
+ @app.post("/step")
85
+ async def step(request: Dict[str, Any]) -> Dict[str, Any]:
86
+ """Step endpoint - executes action and returns observation."""
87
+ action_data = request.get("action", {})
88
+ # TODO: Handle timeout_s, request_id, episode_id from request if provided
89
+
90
+ # Deserialize action
91
+ action = self._deserialize_action(action_data)
92
+
93
+ # Execute step
94
+ observation = self.env.step(action)
95
+
96
+ # Return serialized observation
97
+ return self._serialize_observation(observation)
98
+
99
+ @app.get("/state")
100
+ async def get_state() -> Dict[str, Any]:
101
+ """State endpoint - returns current environment state."""
102
+ state = self.env.state
103
+ return asdict(state)
104
+
105
+ @app.get("/health")
106
+ async def health() -> Dict[str, str]:
107
+ """Health check endpoint."""
108
+ return {"status": "healthy"}
109
+
110
+
111
+ def _deserialize_action(self, action_data: Dict[str, Any]) -> Action:
112
+ """
113
+ Convert JSON dict to Action instance.
114
+
115
+ Args:
116
+ action_data: Dictionary containing action data
117
+
118
+ Returns:
119
+ Action instance
120
+
121
+ Note:
122
+ This is a simple implementation. Subclasses may need to override
123
+ for more complex deserialization logic.
124
+ """
125
+ # Remove metadata if present (it will be set via kw_only field)
126
+ metadata = action_data.pop("metadata", {})
127
+ action = self.action_cls(**action_data)
128
+ action.metadata = metadata
129
+ return action
130
+
131
+ def _serialize_observation(self, observation: Observation) -> Dict[str, Any]:
132
+ """
133
+ Convert Observation instance to JSON-compatible dict.
134
+
135
+ Args:
136
+ observation: Observation instance
137
+
138
+ Returns:
139
+ Dictionary compatible with HTTPEnvClient._parse_result()
140
+
141
+ The format matches what HTTPEnvClient expects:
142
+ {
143
+ "observation": {...}, # Observation fields
144
+ "reward": float | None,
145
+ "done": bool,
146
+ }
147
+ """
148
+ obs_dict = asdict(observation)
149
+
150
+ # Extract reward and done (these are part of StepResult on client side)
151
+ reward = obs_dict.pop("reward", None)
152
+ done = obs_dict.pop("done", False)
153
+ obs_dict.pop("metadata", None) # Remove metadata from observation
154
+
155
+ # Return in HTTPEnvClient expected format
156
+ return {
157
+ "observation": obs_dict,
158
+ "reward": reward,
159
+ "done": done,
160
+ }
161
+
162
+ def create_app(
163
+ env: Environment,
164
+ action_cls: Type[Action],
165
+ observation_cls: Type[Observation],
166
+ ) -> Any:
167
+ """
168
+ Create a FastAPI application with web interface enabled for Hugging Face deployments.
169
+
170
+ This function checks for the ENABLE_WEB_INTERFACE environment variable to determine
171
+ whether to enable the web interface.
172
+
173
+ Args:
174
+ env: The Environment instance to serve
175
+ action_cls: The Action subclass this environment expects
176
+ observation_cls: The Observation subclass this environment returns
177
+
178
+ Returns:
179
+ FastAPI application instance with or without web interface based on environment
180
+ """
181
+ # Check if web interface should be enabled
182
+ # This can be controlled via environment variable or build argument
183
+ enable_web = (
184
+ os.getenv("ENABLE_WEB_INTERFACE", "false").lower() in ("true", "1", "yes")
185
+ )
186
+
187
+ if enable_web:
188
+ # Import web interface only when needed
189
+ from .web_interface import create_web_interface_app
190
+ return create_web_interface_app(env, action_cls, observation_cls)
191
+ else:
192
+ # Use standard FastAPI app without web interface
193
+ return create_fastapi_app(env, action_cls, observation_cls)
194
+
195
+
196
+ def create_fastapi_app(
197
+ env: Environment,
198
+ action_cls: Type[Action],
199
+ observation_cls: Type[Observation],
200
+ ) -> Any:
201
+ """
202
+ Create a FastAPI application with routes for the given environment.
203
+
204
+ Args:
205
+ env: The Environment instance to serve
206
+ action_cls: The Action subclass this environment expects
207
+ observation_cls: The Observation subclass this environment returns
208
+
209
+ Returns:
210
+ FastAPI application instance with routes registered
211
+
212
+ Example:
213
+ >>> from envs.coding_env.server import CodeExecutionEnvironment
214
+ >>> from envs.coding_env.models import CodeAction, CodeObservation
215
+ >>>
216
+ >>> env = CodeExecutionEnvironment()
217
+ >>> app = create_fastapi_app(env, CodeAction, CodeObservation)
218
+ >>>
219
+ >>> # Run with: uvicorn module:app --host 0.0.0.0 --port 8000
220
+ """
221
+ try:
222
+ from fastapi import FastAPI
223
+ except ImportError:
224
+ raise ImportError(
225
+ "FastAPI is required. Install with: pip install fastapi uvicorn"
226
+ )
227
+
228
+ app = FastAPI(title="Environment HTTP Server")
229
+ server = HTTPEnvServer(env, action_cls, observation_cls)
230
+ server.register_routes(app)
231
+ return app
src/core/env_server/interfaces.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from abc import ABC, abstractmethod
8
+ from typing import Any, Protocol, TypedDict
9
+
10
+ from .types import Action, Observation, State
11
+
12
+
13
+ class Message(TypedDict):
14
+ """A message in a conversation.
15
+
16
+ Compatible with Huggingface chat template format.
17
+ """
18
+
19
+ role: str
20
+ content: str
21
+
22
+
23
+ class ModelTokenizer(Protocol):
24
+ """Protocol for tokenizers that support chat templates.
25
+
26
+ This protocol defines the interface that tokenizers must implement
27
+ to work with chat-based environments. It's compatible with
28
+ Huggingface transformers tokenizers.
29
+ """
30
+
31
+ def apply_chat_template(
32
+ self,
33
+ conversation: list[Message],
34
+ tokenize: bool = True,
35
+ return_tensors: str | None = None,
36
+ **kwargs: Any,
37
+ ) -> Any:
38
+ """Apply a chat template to format and optionally tokenize a conversation.
39
+
40
+ Args:
41
+ conversation: List of message dictionaries with 'role' and 'content'
42
+ tokenize: Whether to tokenize the output
43
+ return_tensors: Format for returned tensors ('pt' for PyTorch)
44
+ **kwargs: Additional arguments
45
+
46
+ Returns:
47
+ Formatted and optionally tokenized conversation
48
+ """
49
+ ...
50
+
51
+ def decode(
52
+ self, token_ids: Any, skip_special_tokens: bool = False, **kwargs: Any
53
+ ) -> str:
54
+ """Decode token IDs back to text.
55
+
56
+ Args:
57
+ token_ids: Token IDs to decode
58
+ skip_special_tokens: Whether to skip special tokens in output
59
+ **kwargs: Additional arguments
60
+
61
+ Returns:
62
+ Decoded text string
63
+ """
64
+ ...
65
+
66
+
67
+ class Transform(ABC):
68
+ """Transform observations to add rewards, metrics, or other modifications.
69
+
70
+ Transforms follow the TorchRL pattern where they take an observation
71
+ and return a (potentially modified) observation. This allows for
72
+ flexible reward computation and observation augmentation.
73
+ """
74
+
75
+ @abstractmethod
76
+ def __call__(self, observation: Observation) -> Observation:
77
+ """Transform an observation.
78
+
79
+ Args:
80
+ observation: The input observation
81
+
82
+ Returns:
83
+ The transformed observation
84
+ """
85
+ pass
86
+
87
+
88
+ class Environment(ABC):
89
+ """Base class for all environment servers following Gym/Gymnasium API.
90
+
91
+ Args:
92
+ transform: Optional transform to apply to observations
93
+ """
94
+
95
+ def __init__(self, transform: Transform | None = None):
96
+ self.transform = transform
97
+
98
+ @abstractmethod
99
+ def reset(self) -> Observation:
100
+ """Reset the environment and return initial observation."""
101
+ pass
102
+
103
+ @abstractmethod
104
+ def step(self, action: Action) -> Observation:
105
+ """Take a step in the environment."""
106
+ pass
107
+
108
+ @property
109
+ @abstractmethod
110
+ def state(self) -> State:
111
+ """Get the current environment state."""
112
+ pass
113
+
114
+ def _apply_transform(self, observation: Observation) -> Observation:
115
+ """Apply transform if one is provided."""
116
+ if self.transform is not None:
117
+ return self.transform(observation)
118
+ return observation
src/core/env_server/types.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import Any, Dict, List, Optional, Union
9
+
10
+
11
+ # Type aliases
12
+ Scalar = Union[int, float, bool]
13
+
14
+
15
+ @dataclass(kw_only=True)
16
+ class Action:
17
+ """Base class for all environment actions."""
18
+
19
+ metadata: Dict[str, Any] = field(default_factory=dict)
20
+
21
+
22
+ @dataclass(kw_only=True)
23
+ class Observation:
24
+ """Base class for all environment observations."""
25
+
26
+ done: bool = False
27
+ reward: Union[bool, int, float, None] = None
28
+ metadata: Dict[str, Any] = field(default_factory=dict)
29
+
30
+
31
+ @dataclass
32
+ class State:
33
+ """Base class for environment state."""
34
+
35
+ episode_id: Optional[str] = None
36
+ step_count: int = 0
37
+
38
+
39
+ @dataclass
40
+ class CodeExecResult:
41
+ """Result of code execution containing stdout, stderr, and exit code."""
42
+
43
+ stdout: str
44
+ stderr: str
45
+ exit_code: int
src/core/env_server/web_interface.py ADDED
@@ -0,0 +1,764 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Web interface for OpenEnv environments.
9
+
10
+ This module provides a web-based interface for interacting with OpenEnv environments,
11
+ including a two-pane layout for HumanAgent interaction and state observation.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import time
18
+ from dataclasses import asdict, dataclass
19
+ from typing import Any, Dict, List, Optional, Type
20
+ from datetime import datetime
21
+
22
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request
23
+ from fastapi.responses import HTMLResponse, FileResponse
24
+ from fastapi.staticfiles import StaticFiles
25
+ from pydantic import BaseModel
26
+
27
+ from .interfaces import Environment
28
+ from .types import Action, Observation, State
29
+
30
+
31
+ @dataclass
32
+ class ActionLog:
33
+ """Log entry for an action taken."""
34
+ timestamp: str
35
+ action: Dict[str, Any]
36
+ observation: Dict[str, Any]
37
+ reward: Optional[float]
38
+ done: bool
39
+ step_count: int
40
+
41
+
42
+ @dataclass
43
+ class EpisodeState:
44
+ """Current episode state for the web interface."""
45
+ episode_id: Optional[str]
46
+ step_count: int
47
+ current_observation: Optional[Dict[str, Any]]
48
+ action_logs: List[ActionLog]
49
+ is_reset: bool = True
50
+
51
+
52
+ class WebInterfaceManager:
53
+ """Manages the web interface for an environment."""
54
+
55
+ def __init__(
56
+ self,
57
+ env: Environment,
58
+ action_cls: Type[Action],
59
+ observation_cls: Type[Observation],
60
+ ):
61
+ self.env = env
62
+ self.action_cls = action_cls
63
+ self.observation_cls = observation_cls
64
+ self.episode_state = EpisodeState(
65
+ episode_id=None,
66
+ step_count=0,
67
+ current_observation=None,
68
+ action_logs=[]
69
+ )
70
+ self.connected_clients: List[WebSocket] = []
71
+
72
+ async def connect_websocket(self, websocket: WebSocket):
73
+ """Connect a new WebSocket client."""
74
+ await websocket.accept()
75
+ self.connected_clients.append(websocket)
76
+
77
+ # Send current state to the new client
78
+ await self._send_state_update()
79
+
80
+ async def disconnect_websocket(self, websocket: WebSocket):
81
+ """Disconnect a WebSocket client."""
82
+ if websocket in self.connected_clients:
83
+ self.connected_clients.remove(websocket)
84
+
85
+ async def _send_state_update(self):
86
+ """Send current state to all connected clients."""
87
+ if not self.connected_clients:
88
+ return
89
+
90
+ state_data = {
91
+ "type": "state_update",
92
+ "episode_state": asdict(self.episode_state)
93
+ }
94
+
95
+ # Send to all connected clients
96
+ disconnected_clients = []
97
+ for client in self.connected_clients:
98
+ try:
99
+ await client.send_text(json.dumps(state_data))
100
+ except:
101
+ disconnected_clients.append(client)
102
+
103
+ # Remove disconnected clients
104
+ for client in disconnected_clients:
105
+ self.connected_clients.remove(client)
106
+
107
+ async def reset_environment(self) -> Dict[str, Any]:
108
+ """Reset the environment and update state."""
109
+ observation = self.env.reset()
110
+ state = self.env.state
111
+
112
+ # Update episode state
113
+ self.episode_state.episode_id = state.episode_id
114
+ self.episode_state.step_count = 0
115
+ self.episode_state.current_observation = asdict(observation)
116
+ self.episode_state.action_logs = []
117
+ self.episode_state.is_reset = True
118
+
119
+ # Send state update
120
+ await self._send_state_update()
121
+
122
+ return {
123
+ "observation": asdict(observation),
124
+ "reward": observation.reward,
125
+ "done": observation.done,
126
+ }
127
+
128
+ async def step_environment(self, action_data: Dict[str, Any]) -> Dict[str, Any]:
129
+ """Execute a step in the environment and update state."""
130
+ # Deserialize action
131
+ action = self._deserialize_action(action_data)
132
+
133
+ # Execute step
134
+ observation = self.env.step(action)
135
+ state = self.env.state
136
+
137
+ # Create action log
138
+ action_log = ActionLog(
139
+ timestamp=datetime.now().isoformat(),
140
+ action=asdict(action),
141
+ observation=asdict(observation),
142
+ reward=observation.reward,
143
+ done=observation.done,
144
+ step_count=state.step_count
145
+ )
146
+
147
+ # Update episode state
148
+ self.episode_state.episode_id = state.episode_id
149
+ self.episode_state.step_count = state.step_count
150
+ self.episode_state.current_observation = asdict(observation)
151
+ self.episode_state.action_logs.append(action_log)
152
+ self.episode_state.is_reset = False
153
+
154
+ # Send state update
155
+ await self._send_state_update()
156
+
157
+ return {
158
+ "observation": asdict(observation),
159
+ "reward": observation.reward,
160
+ "done": observation.done,
161
+ }
162
+
163
+ def get_state(self) -> Dict[str, Any]:
164
+ """Get current environment state."""
165
+ state = self.env.state
166
+ return asdict(state)
167
+
168
+ def _deserialize_action(self, action_data: Dict[str, Any]) -> Action:
169
+ """Convert JSON dict to Action instance."""
170
+ metadata = action_data.pop("metadata", {})
171
+ action = self.action_cls(**action_data)
172
+ action.metadata = metadata
173
+ return action
174
+
175
+
176
+ def create_web_interface_app(
177
+ env: Environment,
178
+ action_cls: Type[Action],
179
+ observation_cls: Type[Observation],
180
+ ) -> FastAPI:
181
+ """
182
+ Create a FastAPI application with web interface for the given environment.
183
+
184
+ Args:
185
+ env: The Environment instance to serve
186
+ action_cls: The Action subclass this environment expects
187
+ observation_cls: The Observation subclass this environment returns
188
+
189
+ Returns:
190
+ FastAPI application instance with web interface
191
+ """
192
+ from .http_server import create_fastapi_app
193
+
194
+ # Create the base environment app
195
+ app = create_fastapi_app(env, action_cls, observation_cls)
196
+
197
+ # Create web interface manager
198
+ web_manager = WebInterfaceManager(env, action_cls, observation_cls)
199
+
200
+ # Add web interface routes
201
+ @app.get("/web", response_class=HTMLResponse)
202
+ async def web_interface():
203
+ """Serve the web interface."""
204
+ return get_web_interface_html(action_cls)
205
+
206
+ @app.websocket("/ws")
207
+ async def websocket_endpoint(websocket: WebSocket):
208
+ """WebSocket endpoint for real-time updates."""
209
+ await web_manager.connect_websocket(websocket)
210
+ try:
211
+ while True:
212
+ # Keep connection alive
213
+ await websocket.receive_text()
214
+ except WebSocketDisconnect:
215
+ await web_manager.disconnect_websocket(websocket)
216
+
217
+ @app.post("/web/reset")
218
+ async def web_reset():
219
+ """Reset endpoint for web interface."""
220
+ return await web_manager.reset_environment()
221
+
222
+ @app.post("/web/step")
223
+ async def web_step(request: Dict[str, Any]):
224
+ """Step endpoint for web interface."""
225
+ action_data = request.get("action", {})
226
+ return await web_manager.step_environment(action_data)
227
+
228
+ @app.get("/web/state")
229
+ async def web_state():
230
+ """State endpoint for web interface."""
231
+ return web_manager.get_state()
232
+
233
+ return app
234
+
235
+
236
+ def get_web_interface_html(action_cls: Type[Action]) -> str:
237
+ """Generate the HTML for the web interface."""
238
+
239
+ # Get action fields for dynamic form generation
240
+ action_fields = []
241
+ if hasattr(action_cls, '__dataclass_fields__'):
242
+ for field_name, field_info in action_cls.__dataclass_fields__.items():
243
+ if field_name != 'metadata':
244
+ field_type = field_info.type
245
+ if field_type == str:
246
+ input_type = "text"
247
+ elif field_type == int:
248
+ input_type = "number"
249
+ elif field_type == float:
250
+ input_type = "number"
251
+ elif field_type == bool:
252
+ input_type = "checkbox"
253
+ else:
254
+ input_type = "text"
255
+
256
+ action_fields.append({
257
+ 'name': field_name,
258
+ 'type': input_type,
259
+ 'required': field_info.default is field_info.default_factory
260
+ })
261
+
262
+ return f"""
263
+ <!DOCTYPE html>
264
+ <html lang="en">
265
+ <head>
266
+ <meta charset="UTF-8">
267
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
268
+ <title>OpenEnv Web Interface</title>
269
+ <style>
270
+ * {{
271
+ margin: 0;
272
+ padding: 0;
273
+ box-sizing: border-box;
274
+ }}
275
+
276
+ body {{
277
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
278
+ background-color: #f5f5f5;
279
+ height: 100vh;
280
+ overflow: hidden;
281
+ }}
282
+
283
+ .container {{
284
+ display: flex;
285
+ height: 100vh;
286
+ }}
287
+
288
+ .left-pane {{
289
+ width: 50%;
290
+ background: white;
291
+ border-right: 1px solid #e0e0e0;
292
+ display: flex;
293
+ flex-direction: column;
294
+ }}
295
+
296
+ .right-pane {{
297
+ width: 50%;
298
+ background: #fafafa;
299
+ display: flex;
300
+ flex-direction: column;
301
+ }}
302
+
303
+ .pane-header {{
304
+ padding: 20px;
305
+ border-bottom: 1px solid #e0e0e0;
306
+ background: #f8f9fa;
307
+ font-weight: 600;
308
+ font-size: 16px;
309
+ }}
310
+
311
+ .pane-content {{
312
+ flex: 1;
313
+ padding: 20px;
314
+ overflow-y: auto;
315
+ }}
316
+
317
+ .action-form {{
318
+ background: white;
319
+ border: 1px solid #e0e0e0;
320
+ border-radius: 8px;
321
+ padding: 20px;
322
+ margin-bottom: 20px;
323
+ }}
324
+
325
+ .form-group {{
326
+ margin-bottom: 15px;
327
+ }}
328
+
329
+ .form-group label {{
330
+ display: block;
331
+ margin-bottom: 5px;
332
+ font-weight: 500;
333
+ color: #333;
334
+ }}
335
+
336
+ .form-group input, .form-group textarea {{
337
+ width: 100%;
338
+ padding: 8px 12px;
339
+ border: 1px solid #ddd;
340
+ border-radius: 4px;
341
+ font-size: 14px;
342
+ }}
343
+
344
+ .form-group input:focus, .form-group textarea:focus {{
345
+ outline: none;
346
+ border-color: #007bff;
347
+ box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.25);
348
+ }}
349
+
350
+ .btn {{
351
+ background: #007bff;
352
+ color: white;
353
+ border: none;
354
+ padding: 10px 20px;
355
+ border-radius: 4px;
356
+ cursor: pointer;
357
+ font-size: 14px;
358
+ margin-right: 10px;
359
+ margin-bottom: 10px;
360
+ }}
361
+
362
+ .btn:hover {{
363
+ background: #0056b3;
364
+ }}
365
+
366
+ .btn:disabled {{
367
+ background: #6c757d;
368
+ cursor: not-allowed;
369
+ }}
370
+
371
+ .btn-secondary {{
372
+ background: #6c757d;
373
+ }}
374
+
375
+ .btn-secondary:hover {{
376
+ background: #545b62;
377
+ }}
378
+
379
+ .state-display {{
380
+ background: white;
381
+ border: 1px solid #e0e0e0;
382
+ border-radius: 8px;
383
+ padding: 15px;
384
+ margin-bottom: 20px;
385
+ }}
386
+
387
+ .state-item {{
388
+ margin-bottom: 8px;
389
+ }}
390
+
391
+ .state-label {{
392
+ font-weight: 500;
393
+ color: #666;
394
+ }}
395
+
396
+ .state-value {{
397
+ color: #333;
398
+ font-family: monospace;
399
+ }}
400
+
401
+ .logs-container {{
402
+ background: white;
403
+ border: 1px solid #e0e0e0;
404
+ border-radius: 8px;
405
+ padding: 15px;
406
+ max-height: 400px;
407
+ overflow-y: auto;
408
+ }}
409
+
410
+ .log-entry {{
411
+ border-bottom: 1px solid #f0f0f0;
412
+ padding: 10px 0;
413
+ }}
414
+
415
+ .log-entry:last-child {{
416
+ border-bottom: none;
417
+ }}
418
+
419
+ .log-timestamp {{
420
+ font-size: 12px;
421
+ color: #666;
422
+ margin-bottom: 5px;
423
+ }}
424
+
425
+ .log-action {{
426
+ background: #e3f2fd;
427
+ padding: 8px;
428
+ border-radius: 4px;
429
+ margin-bottom: 5px;
430
+ font-family: monospace;
431
+ font-size: 12px;
432
+ }}
433
+
434
+ .log-observation {{
435
+ background: #f3e5f5;
436
+ padding: 8px;
437
+ border-radius: 4px;
438
+ font-family: monospace;
439
+ font-size: 12px;
440
+ }}
441
+
442
+ .log-reward {{
443
+ font-weight: 600;
444
+ color: #28a745;
445
+ }}
446
+
447
+ .log-done {{
448
+ font-weight: 600;
449
+ color: #dc3545;
450
+ }}
451
+
452
+ .status-indicator {{
453
+ display: inline-block;
454
+ width: 8px;
455
+ height: 8px;
456
+ border-radius: 50%;
457
+ margin-right: 8px;
458
+ }}
459
+
460
+ .status-connected {{
461
+ background: #28a745;
462
+ }}
463
+
464
+ .status-disconnected {{
465
+ background: #dc3545;
466
+ }}
467
+
468
+ .json-display {{
469
+ background: #f8f9fa;
470
+ border: 1px solid #e9ecef;
471
+ border-radius: 4px;
472
+ padding: 10px;
473
+ font-family: monospace;
474
+ font-size: 12px;
475
+ white-space: pre-wrap;
476
+ max-height: 200px;
477
+ overflow-y: auto;
478
+ }}
479
+ </style>
480
+ </head>
481
+ <body>
482
+ <div class="container">
483
+ <!-- Left Pane: HumanAgent Interface -->
484
+ <div class="left-pane">
485
+ <div class="pane-header">
486
+ <span class="status-indicator status-disconnected" id="connection-status"></span>
487
+ HumanAgent Interface
488
+ </div>
489
+ <div class="pane-content">
490
+ <!-- Action Form -->
491
+ <div class="action-form">
492
+ <h3>Take Action</h3>
493
+ <form id="action-form">
494
+ {_generate_action_form_fields(action_fields)}
495
+ <button type="submit" class="btn" id="step-btn">Step</button>
496
+ </form>
497
+ </div>
498
+
499
+ <!-- Control Buttons -->
500
+ <div style="margin-bottom: 20px;">
501
+ <button class="btn btn-secondary" id="reset-btn">Reset Environment</button>
502
+ <button class="btn btn-secondary" id="state-btn">Get State</button>
503
+ </div>
504
+
505
+ <!-- Current State Display -->
506
+ <div class="state-display">
507
+ <h3>Current State</h3>
508
+ <div id="current-state">
509
+ <div class="state-item">
510
+ <span class="state-label">Status:</span>
511
+ <span class="state-value" id="env-status">Not initialized</span>
512
+ </div>
513
+ <div class="state-item">
514
+ <span class="state-label">Episode ID:</span>
515
+ <span class="state-value" id="episode-id">-</span>
516
+ </div>
517
+ <div class="state-item">
518
+ <span class="state-label">Step Count:</span>
519
+ <span class="state-value" id="step-count">0</span>
520
+ </div>
521
+ </div>
522
+ </div>
523
+ </div>
524
+ </div>
525
+
526
+ <!-- Right Pane: State Observer -->
527
+ <div class="right-pane">
528
+ <div class="pane-header">
529
+ State Observer
530
+ </div>
531
+ <div class="pane-content">
532
+ <!-- Current Observation -->
533
+ <div class="state-display">
534
+ <h3>Current Observation</h3>
535
+ <div id="current-observation" class="json-display">
536
+ No observation yet
537
+ </div>
538
+ </div>
539
+
540
+ <!-- Action Logs -->
541
+ <div class="logs-container">
542
+ <h3>Action History</h3>
543
+ <div id="action-logs">
544
+ No actions taken yet
545
+ </div>
546
+ </div>
547
+ </div>
548
+ </div>
549
+ </div>
550
+
551
+ <script>
552
+ class OpenEnvWebInterface {{
553
+ constructor() {{
554
+ this.ws = null;
555
+ this.isConnected = false;
556
+ this.init();
557
+ }}
558
+
559
+ init() {{
560
+ this.connectWebSocket();
561
+ this.setupEventListeners();
562
+ }}
563
+
564
+ connectWebSocket() {{
565
+ const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
566
+ const wsUrl = `${{protocol}}//${{window.location.host}}/ws`;
567
+
568
+ this.ws = new WebSocket(wsUrl);
569
+
570
+ this.ws.onopen = () => {{
571
+ this.isConnected = true;
572
+ this.updateConnectionStatus(true);
573
+ console.log('WebSocket connected');
574
+ }};
575
+
576
+ this.ws.onmessage = (event) => {{
577
+ const data = JSON.parse(event.data);
578
+ if (data.type === 'state_update') {{
579
+ this.updateUI(data.episode_state);
580
+ }}
581
+ }};
582
+
583
+ this.ws.onclose = () => {{
584
+ this.isConnected = false;
585
+ this.updateConnectionStatus(false);
586
+ console.log('WebSocket disconnected');
587
+ // Attempt to reconnect after 3 seconds
588
+ setTimeout(() => this.connectWebSocket(), 3000);
589
+ }};
590
+
591
+ this.ws.onerror = (error) => {{
592
+ console.error('WebSocket error:', error);
593
+ }};
594
+ }}
595
+
596
+ setupEventListeners() {{
597
+ // Action form submission
598
+ document.getElementById('action-form').addEventListener('submit', (e) => {{
599
+ e.preventDefault();
600
+ this.submitAction();
601
+ }});
602
+
603
+ // Reset button
604
+ document.getElementById('reset-btn').addEventListener('click', () => {{
605
+ this.resetEnvironment();
606
+ }});
607
+
608
+ // State button
609
+ document.getElementById('state-btn').addEventListener('click', () => {{
610
+ this.getState();
611
+ }});
612
+ }}
613
+
614
+ async submitAction() {{
615
+ const formData = new FormData(document.getElementById('action-form'));
616
+ const action = {{}};
617
+
618
+ // Collect form data
619
+ for (const [key, value] of formData.entries()) {{
620
+ if (value !== '') {{
621
+ action[key] = value;
622
+ }}
623
+ }}
624
+
625
+ try {{
626
+ const response = await fetch('/web/step', {{
627
+ method: 'POST',
628
+ headers: {{ 'Content-Type': 'application/json' }},
629
+ body: JSON.stringify({{ action }})
630
+ }});
631
+
632
+ if (!response.ok) {{
633
+ throw new Error(`HTTP error! status: ${{response.status}}`);
634
+ }}
635
+
636
+ const result = await response.json();
637
+ console.log('Step result:', result);
638
+ }} catch (error) {{
639
+ console.error('Error submitting action:', error);
640
+ alert('Error submitting action: ' + error.message);
641
+ }}
642
+ }}
643
+
644
+ async resetEnvironment() {{
645
+ try {{
646
+ const response = await fetch('/web/reset', {{
647
+ method: 'POST',
648
+ headers: {{ 'Content-Type': 'application/json' }}
649
+ }});
650
+
651
+ if (!response.ok) {{
652
+ throw new Error(`HTTP error! status: ${{response.status}}`);
653
+ }}
654
+
655
+ const result = await response.json();
656
+ console.log('Reset result:', result);
657
+ }} catch (error) {{
658
+ console.error('Error resetting environment:', error);
659
+ alert('Error resetting environment: ' + error.message);
660
+ }}
661
+ }}
662
+
663
+ async getState() {{
664
+ try {{
665
+ const response = await fetch('/web/state');
666
+ const state = await response.json();
667
+ console.log('Current state:', state);
668
+ alert('Current state: ' + JSON.stringify(state, null, 2));
669
+ }} catch (error) {{
670
+ console.error('Error getting state:', error);
671
+ alert('Error getting state: ' + error.message);
672
+ }}
673
+ }}
674
+
675
+ updateConnectionStatus(connected) {{
676
+ const indicator = document.getElementById('connection-status');
677
+ if (connected) {{
678
+ indicator.className = 'status-indicator status-connected';
679
+ }} else {{
680
+ indicator.className = 'status-indicator status-disconnected';
681
+ }}
682
+ }}
683
+
684
+ updateUI(episodeState) {{
685
+ // Update current state
686
+ document.getElementById('env-status').textContent =
687
+ episodeState.is_reset ? 'Reset' : 'Running';
688
+ document.getElementById('episode-id').textContent =
689
+ episodeState.episode_id || '-';
690
+ document.getElementById('step-count').textContent =
691
+ episodeState.step_count.toString();
692
+
693
+ // Update current observation
694
+ const observationDiv = document.getElementById('current-observation');
695
+ if (episodeState.current_observation) {{
696
+ observationDiv.textContent = JSON.stringify(
697
+ episodeState.current_observation, null, 2
698
+ );
699
+ }} else {{
700
+ observationDiv.textContent = 'No observation yet';
701
+ }}
702
+
703
+ // Update action logs
704
+ const logsDiv = document.getElementById('action-logs');
705
+ if (episodeState.action_logs.length === 0) {{
706
+ logsDiv.innerHTML = 'No actions taken yet';
707
+ }} else {{
708
+ logsDiv.innerHTML = episodeState.action_logs.map(log => `
709
+ <div class="log-entry">
710
+ <div class="log-timestamp">${{log.timestamp}} (Step ${{log.step_count}})</div>
711
+ <div class="log-action">Action: ${{JSON.stringify(log.action, null, 2)}}</div>
712
+ <div class="log-observation">Observation: ${{JSON.stringify(log.observation, null, 2)}}</div>
713
+ <div>
714
+ <span class="log-reward">Reward: ${{log.reward !== null ? log.reward : 'None'}}</span>
715
+ ${{log.done ? '<span class="log-done">DONE</span>' : ''}}
716
+ </div>
717
+ </div>
718
+ `).join('');
719
+ }}
720
+ }}
721
+ }}
722
+
723
+ // Initialize the web interface when the page loads
724
+ document.addEventListener('DOMContentLoaded', () => {{
725
+ new OpenEnvWebInterface();
726
+ }});
727
+ </script>
728
+ </body>
729
+ </html>
730
+ """.replace('{_generate_action_form_fields(action_fields)}', _generate_action_form_fields(action_fields))
731
+
732
+
733
+ def _generate_action_form_fields(action_fields: List[Dict[str, Any]]) -> str:
734
+ """Generate HTML form fields for action input."""
735
+ if not action_fields:
736
+ return '<p>No action fields available</p>'
737
+
738
+ fields_html = []
739
+ for field in action_fields:
740
+ if field['type'] == 'checkbox':
741
+ fields_html.append(f'''
742
+ <div class="form-group">
743
+ <label>
744
+ <input type="checkbox" name="{field['name']}" value="true">
745
+ {field['name']}
746
+ </label>
747
+ </div>
748
+ ''')
749
+ elif field['type'] == 'text' and 'message' in field['name'].lower():
750
+ fields_html.append(f'''
751
+ <div class="form-group">
752
+ <label for="{field['name']}">{field['name']}:</label>
753
+ <textarea name="{field['name']}" id="{field['name']}" rows="3" placeholder="Enter {field['name']}..."></textarea>
754
+ </div>
755
+ ''')
756
+ else:
757
+ fields_html.append(f'''
758
+ <div class="form-group">
759
+ <label for="{field['name']}">{field['name']}:</label>
760
+ <input type="{field['type']}" name="{field['name']}" id="{field['name']}" placeholder="Enter {field['name']}..." {"required" if field['required'] else ""}>
761
+ </div>
762
+ ''')
763
+
764
+ return '\n'.join(fields_html)
src/core/http_env_client.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ core/runner_env.py
3
+ Minimal HTTP-based environment client.
4
+ - Talks to a single env worker exposing: POST /reset, POST /step
5
+
6
+ Future hooks (commented below) for:
7
+ - episode_id, seed on reset
8
+ - request_id on step
9
+ - custom headers (auth/trace)
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from abc import ABC, abstractmethod
15
+ from typing import TYPE_CHECKING, Any, Dict, Generic, Optional, Type, TypeVar
16
+ from .containers.runtime import LocalDockerProvider
17
+ import requests
18
+
19
+ from .types import StepResult
20
+
21
+ if TYPE_CHECKING:
22
+ from .containers.runtime import ContainerProvider
23
+
24
+ ActT = TypeVar("ActT")
25
+ ObsT = TypeVar("ObsT")
26
+ EnvClientT = TypeVar("EnvClientT", bound="HTTPEnvClient")
27
+
28
+
29
+ class HTTPEnvClient(ABC, Generic[ActT, ObsT]):
30
+ def __init__(
31
+ self,
32
+ base_url: str,
33
+ request_timeout_s: float = 15.0,
34
+ default_headers: Optional[Dict[str, str]] = None,
35
+ provider: Optional["ContainerProvider"] = None,
36
+ ):
37
+ self._base = base_url.rstrip("/")
38
+ self._timeout = float(request_timeout_s)
39
+ self._http = requests.Session()
40
+ self._headers = default_headers or {}
41
+ self._provider = provider
42
+
43
+ @classmethod
44
+ def from_docker_image(
45
+ cls: Type[EnvClientT],
46
+ image: str,
47
+ provider: Optional["ContainerProvider"] = None,
48
+ ) -> EnvClientT:
49
+ """
50
+ Create an environment client by spinning up a Docker container locally.
51
+
52
+ This is a development utility that:
53
+ 1. Starts a Docker container from the specified image
54
+ 2. Waits for the server to be ready
55
+ 3. Creates and returns a client instance connected to the container
56
+
57
+ Note: The container lifecycle management is left to the user or higher-level
58
+ orchestration. The container will keep running until manually stopped.
59
+
60
+ Args:
61
+ image: Docker image name to run (e.g., "echo-env:latest")
62
+ provider: Container provider to use (defaults to LocalDockerProvider)
63
+
64
+ Returns:
65
+ An instance of the client class connected to the running container
66
+
67
+ Example:
68
+ >>> from envs.coding_env.client import CodingEnv
69
+ >>> from envs.coding_env.models import CodeAction
70
+ >>>
71
+ >>> # Create environment from image
72
+ >>> env = CodingEnv.from_docker_image("coding-env:latest")
73
+ >>>
74
+ >>> # Use the environment
75
+ >>> result = env.reset()
76
+ >>> print(result.observation)
77
+ >>>
78
+ >>> step_result = env.step(CodeAction(code="print('hello')"))
79
+ >>> print(step_result.observation.stdout)
80
+ >>>
81
+ >>> # Cleanup (optional)
82
+ >>> env.close()
83
+ """
84
+
85
+ # Use default provider if none provided
86
+ if provider is None:
87
+ provider = LocalDockerProvider()
88
+
89
+ # 1. Start container
90
+ base_url = provider.start_container(image)
91
+
92
+ # 2. Wait for server to be ready
93
+ provider.wait_for_ready(base_url)
94
+
95
+ # 3. Create and return client instance with provider reference
96
+ return cls(base_url=base_url, provider=provider)
97
+
98
+ @abstractmethod
99
+ def _step_payload(self, action: ActT) -> dict:
100
+ """Convert an Action object to the JSON body expected by the env server."""
101
+ raise NotImplementedError
102
+
103
+ @abstractmethod
104
+ def _parse_result(self, payload: dict) -> StepResult[ObsT]:
105
+ """Convert a JSON response from the env server to StepResult[ObsT]."""
106
+ raise NotImplementedError
107
+
108
+ @abstractmethod
109
+ def _parse_state(self, payload: dict) -> Any:
110
+ """Convert a JSON response from the state endpoint to a State object."""
111
+ raise NotImplementedError
112
+
113
+ # ---------- Environment Server Interface Methods ----------
114
+ def reset(self) -> StepResult[ObsT]:
115
+ body: Dict[str, Any] = {}
116
+ # TODO: later:
117
+ # body["seed"] = seed
118
+ # body["episode_id"] = episode_id
119
+ r = self._http.post(
120
+ f"{self._base}/reset",
121
+ json=body,
122
+ headers=self._headers,
123
+ timeout=self._timeout,
124
+ )
125
+ r.raise_for_status()
126
+ return self._parse_result(r.json())
127
+
128
+ def step(self, action: ActT) -> StepResult[ObsT]:
129
+ body: Dict[str, Any] = {
130
+ "action": self._step_payload(action),
131
+ "timeout_s": int(self._timeout),
132
+ }
133
+ # TODO: later:
134
+ # body["request_id"] = str(uuid.uuid4())
135
+ # body["episode_id"] = current_episode_id
136
+ r = self._http.post(
137
+ f"{self._base}/step",
138
+ json=body,
139
+ headers=self._headers,
140
+ timeout=self._timeout,
141
+ )
142
+ r.raise_for_status()
143
+ return self._parse_result(r.json())
144
+
145
+ def state(self) -> Any:
146
+ """
147
+ Get the current environment state from the server.
148
+
149
+ Returns:
150
+ State object with environment state information (e.g., episode_id, step_count)
151
+
152
+ Example:
153
+ >>> client = EchoEnv.from_docker_image("echo-env:latest")
154
+ >>> result = client.reset()
155
+ >>> state = client.state()
156
+ >>> print(state.episode_id)
157
+ >>> print(state.step_count)
158
+ """
159
+ r = self._http.get(
160
+ f"{self._base}/state",
161
+ headers=self._headers,
162
+ timeout=self._timeout,
163
+ )
164
+ r.raise_for_status()
165
+ return self._parse_state(r.json())
166
+
167
+ def close(self) -> None:
168
+ """
169
+ Close the environment and clean up resources.
170
+
171
+ If this client was created via from_docker_image(), this will stop
172
+ and remove the associated container.
173
+ """
174
+ if self._provider is not None:
175
+ self._provider.stop_container()
src/core/tools/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Core tools for code execution and other utilities."""
8
+
9
+ from .local_python_executor import PyExecutor
10
+
11
+ __all__ = ["PyExecutor"]
src/core/tools/local_python_executor.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Local Python Executor.
9
+
10
+ This module provides functionality for executing Python code locally by wrapping
11
+ the smolagents LocalPythonExecutor.
12
+ """
13
+
14
+ from smolagents import LocalPythonExecutor
15
+
16
+ from core.env_server.types import CodeExecResult
17
+
18
+
19
+ class PyExecutor:
20
+ """
21
+ Wrapper around smolagents LocalPythonExecutor for executing Python code.
22
+
23
+ This class provides a simple interface to execute Python code in a subprocess
24
+ and capture the results including stdout, stderr, and exit code.
25
+
26
+ Args:
27
+ additional_imports: List of additional module imports to authorize.
28
+ For example: ["numpy", "pandas", "matplotlib"]
29
+ These will be added to the base authorized imports.
30
+
31
+ Example:
32
+ >>> # Basic usage with default imports
33
+ >>> executor = PyExecutor()
34
+ >>> result = executor.run("print('Hello, World!')")
35
+ >>> print(result.stdout) # "Hello, World!\n"
36
+ >>> print(result.exit_code) # 0
37
+ >>>
38
+ >>> # Usage with additional imports
39
+ >>> executor = PyExecutor(additional_imports=["numpy", "pandas"])
40
+ >>> result = executor.run("import numpy as np\\nprint(np.array([1, 2, 3]))")
41
+ >>> print(result.stdout) # "[1 2 3]\n"
42
+ """
43
+
44
+ def __init__(self, additional_imports: list[str] | None = None):
45
+ """
46
+ Initialize the PyExecutor with a LocalPythonExecutor instance.
47
+
48
+ Args:
49
+ additional_imports: List of additional module names to authorize for import.
50
+ Defaults to an empty list if not provided.
51
+ """
52
+ if additional_imports is None:
53
+ additional_imports = []
54
+ self._executor = LocalPythonExecutor(
55
+ additional_authorized_imports=additional_imports
56
+ )
57
+ # Initialize tools to make BASE_PYTHON_TOOLS available (including print)
58
+ self._executor.send_tools({})
59
+
60
+ def run(self, code: str) -> CodeExecResult:
61
+ """
62
+ Execute Python code and return the result.
63
+
64
+ Args:
65
+ code: Python code string to execute
66
+
67
+ Returns:
68
+ CodeExecResult containing stdout, stderr, and exit_code
69
+
70
+ Example:
71
+ >>> executor = PyExecutor()
72
+ >>> result = executor.run("x = 5 + 3\\nprint(x)")
73
+ >>> print(result.stdout) # "8\n"
74
+ >>> print(result.exit_code) # 0
75
+ >>>
76
+ >>> # Error handling
77
+ >>> result = executor.run("1 / 0")
78
+ >>> print(result.exit_code) # 1
79
+ >>> print(result.stderr) # Contains error message
80
+ """
81
+ try:
82
+ # Execute the code using LocalPythonExecutor
83
+ # LocalPythonExecutor returns a CodeOutput object with output, logs, is_final_answer
84
+ exec_result = self._executor(code)
85
+
86
+ # Extract the logs (which contain print outputs) as stdout
87
+ # The output field contains the return value of the code
88
+ stdout = exec_result.logs
89
+ stderr = ""
90
+ exit_code = 0 # Success
91
+
92
+ return CodeExecResult(
93
+ stdout=stdout,
94
+ stderr=stderr,
95
+ exit_code=exit_code,
96
+ )
97
+
98
+ except Exception as e:
99
+ # LocalPythonExecutor raises InterpreterError for various issues
100
+ # (syntax errors, forbidden operations, runtime errors, etc.)
101
+ return CodeExecResult(
102
+ stdout="",
103
+ stderr=str(e),
104
+ exit_code=1, # Non-zero indicates error
105
+ )
src/core/types.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Type definitions for EnvTorch
2
+ from dataclasses import dataclass
3
+ from typing import Any, Generic, Optional, TypeVar
4
+
5
+ # Generic type for observations
6
+ ObsT = TypeVar("ObsT") # TypeVar for typehinting in IDEs
7
+
8
+
9
+ @dataclass
10
+ class StepResult(Generic[ObsT]):
11
+ """
12
+ Represents the result of one environment step.
13
+
14
+ Attributes:
15
+ observation: The environment's observation after the action.
16
+ reward: Scalar reward for this step (optional).
17
+ done: Whether the episode is finished.
18
+ """
19
+
20
+ observation: ObsT
21
+ reward: Optional[float] = None
22
+ done: bool = False
src/envs/atari_env/README.md ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Atari Environment
2
+
3
+ Integration of Atari 2600 games with the OpenEnv framework via the Arcade Learning Environment (ALE). ALE provides access to 100+ classic Atari games for RL research.
4
+
5
+ ## Supported Games
6
+
7
+ ALE supports 100+ Atari 2600 games including:
8
+
9
+ ### Popular Games
10
+ - **Pong** - Classic two-player tennis
11
+ - **Breakout** - Break bricks with a ball
12
+ - **Space Invaders** - Shoot descending aliens
13
+ - **Pac-Man / Ms. Pac-Man** - Navigate mazes and eat pellets
14
+ - **Asteroids** - Destroy asteroids in space
15
+ - **Defender** - Side-scrolling space shooter
16
+ - **Centipede** - Shoot segmented centipede
17
+ - **Donkey Kong** - Jump over barrels to save princess
18
+ - **Frogger** - Cross road and river safely
19
+ - **Q*bert** - Jump on pyramid cubes
20
+
21
+ And many more! For a complete list, see [ALE documentation](https://ale.farama.org/environments/complete_list/).
22
+
23
+ ## Architecture
24
+
25
+ ```
26
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
27
+ β”‚ RL Training Code (Client) β”‚
28
+ β”‚ AtariEnv.step(action) β”‚
29
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
30
+ β”‚ HTTP
31
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
32
+ β”‚ FastAPI Server (Docker) β”‚
33
+ β”‚ AtariEnvironment β”‚
34
+ β”‚ β”œβ”€ Wraps ALEInterface β”‚
35
+ β”‚ β”œβ”€ Handles observations β”‚
36
+ β”‚ └─ Action execution β”‚
37
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
38
+ ```
39
+
40
+ ## Installation & Usage
41
+
42
+ ### Option 1: Local Development (without Docker)
43
+
44
+ **Requirements:**
45
+ - Python 3.11+
46
+ - ale-py installed: `pip install ale-py`
47
+
48
+ ```python
49
+ from envs.atari_env import AtariEnv, AtariAction
50
+
51
+ # Start local server manually
52
+ # python -m envs.atari_env.server.app
53
+
54
+ # Connect to local server
55
+ env = AtariEnv(base_url="http://localhost:8000")
56
+
57
+ # Reset environment
58
+ result = env.reset()
59
+ print(f"Screen shape: {result.observation.screen_shape}")
60
+ print(f"Legal actions: {result.observation.legal_actions}")
61
+ print(f"Lives: {result.observation.lives}")
62
+
63
+ # Take actions
64
+ for _ in range(10):
65
+ action_id = 2 # UP action
66
+ result = env.step(AtariAction(action_id=action_id, game_name="pong"))
67
+ print(f"Reward: {result.reward}, Done: {result.done}")
68
+ if result.done:
69
+ break
70
+
71
+ # Cleanup
72
+ env.close()
73
+ ```
74
+
75
+ ### Option 2: Docker (Recommended)
76
+
77
+ **Build Atari image:**
78
+
79
+ ```bash
80
+ cd OpenEnv
81
+
82
+ # Build the image
83
+ docker build \
84
+ -f src/envs/atari_env/server/Dockerfile \
85
+ -t atari-env:latest \
86
+ .
87
+ ```
88
+
89
+ **Run specific games:**
90
+
91
+ ```bash
92
+ # Pong (default)
93
+ docker run -p 8000:8000 atari-env:latest
94
+
95
+ # Breakout
96
+ docker run -p 8000:8000 -e ATARI_GAME=breakout atari-env:latest
97
+
98
+ # Space Invaders with grayscale observation
99
+ docker run -p 8000:8000 \
100
+ -e ATARI_GAME=space_invaders \
101
+ -e ATARI_OBS_TYPE=grayscale \
102
+ atari-env:latest
103
+
104
+ # Ms. Pac-Man with full action space
105
+ docker run -p 8000:8000 \
106
+ -e ATARI_GAME=ms_pacman \
107
+ -e ATARI_FULL_ACTION_SPACE=true \
108
+ atari-env:latest
109
+ ```
110
+
111
+ **Use with from_docker_image():**
112
+
113
+ ```python
114
+ from envs.atari_env import AtariEnv, AtariAction
115
+ import numpy as np
116
+
117
+ # Automatically starts container
118
+ env = AtariEnv.from_docker_image("atari-env:latest")
119
+
120
+ result = env.reset()
121
+ result = env.step(AtariAction(action_id=2)) # UP
122
+
123
+ # Reshape screen for visualization
124
+ screen = np.array(result.observation.screen).reshape(result.observation.screen_shape)
125
+ print(f"Screen shape: {screen.shape}") # (210, 160, 3) for RGB
126
+
127
+ env.close() # Stops container
128
+ ```
129
+
130
+ ## Observation Types
131
+
132
+ ### 1. RGB (Default)
133
+ - **Shape**: [210, 160, 3]
134
+ - **Description**: Full-color screen observation
135
+ - **Usage**: Most realistic, good for vision-based learning
136
+
137
+ ```python
138
+ docker run -p 8000:8000 -e ATARI_OBS_TYPE=rgb atari-env:latest
139
+ ```
140
+
141
+ ### 2. Grayscale
142
+ - **Shape**: [210, 160]
143
+ - **Description**: Grayscale screen observation
144
+ - **Usage**: Reduced dimensionality, faster processing
145
+
146
+ ```python
147
+ docker run -p 8000:8000 -e ATARI_OBS_TYPE=grayscale atari-env:latest
148
+ ```
149
+
150
+ ### 3. RAM
151
+ - **Shape**: [128]
152
+ - **Description**: Raw 128-byte Atari 2600 RAM contents
153
+ - **Usage**: Compact representation, useful for specific research
154
+
155
+ ```python
156
+ docker run -p 8000:8000 -e ATARI_OBS_TYPE=ram atari-env:latest
157
+ ```
158
+
159
+ ## Action Spaces
160
+
161
+ ### Minimal Action Set (Default)
162
+ Game-specific minimal actions (typically 4-9 actions).
163
+ - Pong: 6 actions (NOOP, FIRE, UP, DOWN, etc.)
164
+ - Breakout: 4 actions (NOOP, FIRE, LEFT, RIGHT)
165
+
166
+ ```python
167
+ docker run -p 8000:8000 -e ATARI_FULL_ACTION_SPACE=false atari-env:latest
168
+ ```
169
+
170
+ ### Full Action Set
171
+ All 18 possible Atari 2600 actions:
172
+ 0. NOOP
173
+ 1. FIRE
174
+ 2. UP
175
+ 3. RIGHT
176
+ 4. LEFT
177
+ 5. DOWN
178
+ 6. UPRIGHT
179
+ 7. UPLEFT
180
+ 8. DOWNRIGHT
181
+ 9. DOWNLEFT
182
+ 10. UPFIRE
183
+ 11. RIGHTFIRE
184
+ 12. LEFTFIRE
185
+ 13. DOWNFIRE
186
+ 14. UPRIGHTFIRE
187
+ 15. UPLEFTFIRE
188
+ 16. DOWNRIGHTFIRE
189
+ 17. DOWNLEFTFIRE
190
+
191
+ ```python
192
+ docker run -p 8000:8000 -e ATARI_FULL_ACTION_SPACE=true atari-env:latest
193
+ ```
194
+
195
+ ## Configuration
196
+
197
+ ### Environment Variables
198
+
199
+ - `ATARI_GAME`: Game name (default: "pong")
200
+ - `ATARI_OBS_TYPE`: Observation type - "rgb", "grayscale", "ram" (default: "rgb")
201
+ - `ATARI_FULL_ACTION_SPACE`: Use full action space - "true"/"false" (default: "false")
202
+ - `ATARI_MODE`: Game mode (optional, game-specific)
203
+ - `ATARI_DIFFICULTY`: Game difficulty (optional, game-specific)
204
+ - `ATARI_REPEAT_ACTION_PROB`: Sticky action probability 0.0-1.0 (default: "0.0")
205
+ - `ATARI_FRAMESKIP`: Frames to skip per action (default: "4")
206
+
207
+ ### Example: Breakout with Custom Settings
208
+
209
+ ```bash
210
+ docker run -p 8000:8000 \
211
+ -e ATARI_GAME=breakout \
212
+ -e ATARI_OBS_TYPE=grayscale \
213
+ -e ATARI_FULL_ACTION_SPACE=true \
214
+ -e ATARI_REPEAT_ACTION_PROB=0.25 \
215
+ -e ATARI_FRAMESKIP=4 \
216
+ atari-env:latest
217
+ ```
218
+
219
+ ## API Reference
220
+
221
+ ### AtariAction
222
+
223
+ ```python
224
+ @dataclass
225
+ class AtariAction(Action):
226
+ action_id: int # Action index to execute
227
+ game_name: str = "pong" # Game name
228
+ obs_type: str = "rgb" # Observation type
229
+ full_action_space: bool = False # Full or minimal action space
230
+ ```
231
+
232
+ ### AtariObservation
233
+
234
+ ```python
235
+ @dataclass
236
+ class AtariObservation(Observation):
237
+ screen: List[int] # Flattened screen pixels
238
+ screen_shape: List[int] # Original screen shape
239
+ legal_actions: List[int] # Legal action indices
240
+ lives: int # Lives remaining
241
+ episode_frame_number: int # Frame # in episode
242
+ frame_number: int # Total frame #
243
+ done: bool # Episode finished
244
+ reward: Optional[float] # Reward from last action
245
+ ```
246
+
247
+ ### AtariState
248
+
249
+ ```python
250
+ @dataclass
251
+ class AtariState(State):
252
+ episode_id: str # Unique episode ID
253
+ step_count: int # Number of steps
254
+ game_name: str # Game name
255
+ obs_type: str # Observation type
256
+ full_action_space: bool # Action space type
257
+ mode: Optional[int] # Game mode
258
+ difficulty: Optional[int] # Game difficulty
259
+ repeat_action_probability: float # Sticky action prob
260
+ frameskip: int # Frameskip setting
261
+ ```
262
+
263
+ ## Example Script
264
+
265
+ ```python
266
+ #!/usr/bin/env python3
267
+ """Example training loop with Atari environment."""
268
+
269
+ import numpy as np
270
+ from envs.atari_env import AtariEnv, AtariAction
271
+
272
+ # Start environment
273
+ env = AtariEnv.from_docker_image("atari-env:latest")
274
+
275
+ # Training loop
276
+ for episode in range(10):
277
+ result = env.reset()
278
+ episode_reward = 0
279
+ steps = 0
280
+
281
+ while not result.done:
282
+ # Random policy (replace with your RL agent)
283
+ action_id = np.random.choice(result.observation.legal_actions)
284
+
285
+ # Take action
286
+ result = env.step(AtariAction(action_id=action_id))
287
+
288
+ episode_reward += result.reward or 0
289
+ steps += 1
290
+
291
+ # Reshape screen for processing
292
+ screen = np.array(result.observation.screen).reshape(
293
+ result.observation.screen_shape
294
+ )
295
+
296
+ # Your RL training code here
297
+ # ...
298
+
299
+ print(f"Episode {episode}: reward={episode_reward:.2f}, steps={steps}")
300
+
301
+ env.close()
302
+ ```
303
+
304
+ ## Testing
305
+
306
+ ### Local Testing
307
+
308
+ ```bash
309
+ # Install dependencies
310
+ pip install ale-py fastapi uvicorn requests
311
+
312
+ # Start server
313
+ cd /Users/sanyambhutani/OpenEnv/OpenEnv
314
+ export PYTHONPATH=/Users/sanyambhutani/OpenEnv/OpenEnv/src
315
+ python -m envs.atari_env.server.app
316
+
317
+ # Test from another terminal
318
+ python -c "
319
+ from envs.atari_env import AtariEnv, AtariAction
320
+ env = AtariEnv(base_url='http://localhost:8000')
321
+ result = env.reset()
322
+ print(f'Initial obs: {result.observation.screen_shape}')
323
+ result = env.step(AtariAction(action_id=2))
324
+ print(f'After step: reward={result.reward}, done={result.done}')
325
+ env.close()
326
+ "
327
+ ```
328
+
329
+ ### Docker Testing
330
+
331
+ ```bash
332
+ # Build and run
333
+ docker build -f src/envs/atari_env/server/Dockerfile -t atari-env:latest .
334
+ docker run -p 8000:8000 atari-env:latest
335
+
336
+ # Test in another terminal
337
+ curl http://localhost:8000/health
338
+ curl -X POST http://localhost:8000/reset
339
+ ```
340
+
341
+ ## Popular Games and Their Characteristics
342
+
343
+ | Game | Minimal Actions | Lives | Difficulty | Notes |
344
+ |------|----------------|-------|-----------|-------|
345
+ | Pong | 6 | 1 | Low | Good for learning basics |
346
+ | Breakout | 4 | 5 | Medium | Classic RL benchmark |
347
+ | Space Invaders | 6 | 3 | Medium | Shooting game |
348
+ | Ms. Pac-Man | 9 | 3 | High | Complex navigation |
349
+ | Asteroids | 14 | 3 | Medium | Continuous shooting |
350
+ | Montezuma's Revenge | 18 | 5 | Very High | Exploration challenge |
351
+ | Pitfall | 18 | 1 | High | Platformer |
352
+ | Seaquest | 18 | 3 | High | Submarine rescue |
353
+
354
+ ## Limitations & Notes
355
+
356
+ - **Frame perfect timing**: Some games require precise timing
357
+ - **Exploration**: Games like Montezuma's Revenge are notoriously difficult
358
+ - **Observation delay**: HTTP adds minimal latency vs local gym
359
+ - **Determinism**: Set `ATARI_REPEAT_ACTION_PROB=0.0` for deterministic behavior
360
+ - **ROMs**: All ROMs are bundled with ale-py package
361
+
362
+ ## References
363
+
364
+ - [Arcade Learning Environment Paper (2013)](https://jair.org/index.php/jair/article/view/10819)
365
+ - [ALE GitHub](https://github.com/Farama-Foundation/Arcade-Learning-Environment)
366
+ - [ALE Documentation](https://ale.farama.org/)
367
+ - [Gymnasium Atari Environments](https://gymnasium.farama.org/environments/atari/)
368
+
369
+ ## Citation
370
+
371
+ If you use ALE in your research, please cite:
372
+
373
+ ```bibtex
374
+ @Article{bellemare13arcade,
375
+ author = {{Bellemare}, M.~G. and {Naddaf}, Y. and {Veness}, J. and {Bowling}, M.},
376
+ title = {The Arcade Learning Environment: An Evaluation Platform for General Agents},
377
+ journal = {Journal of Artificial Intelligence Research},
378
+ year = "2013",
379
+ month = "jun",
380
+ volume = "47",
381
+ pages = "253--279",
382
+ }
383
+ ```
src/envs/atari_env/__init__.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Atari Environment for OpenEnv.
9
+
10
+ This module provides OpenEnv integration for Atari 2600 games via the
11
+ Arcade Learning Environment (ALE).
12
+
13
+ Example:
14
+ >>> from envs.atari_env import AtariEnv, AtariAction
15
+ >>>
16
+ >>> # Connect to a running server or start via Docker
17
+ >>> env = AtariEnv.from_docker_image("atari-env:latest")
18
+ >>>
19
+ >>> # Reset and interact
20
+ >>> result = env.reset()
21
+ >>> result = env.step(AtariAction(action_id=2)) # UP
22
+ >>> print(result.reward, result.done)
23
+ >>>
24
+ >>> # Cleanup
25
+ >>> env.close()
26
+ """
27
+
28
+ from .client import AtariEnv
29
+ from .models import AtariAction, AtariObservation, AtariState
30
+
31
+ __all__ = ["AtariEnv", "AtariAction", "AtariObservation", "AtariState"]
src/envs/atari_env/client.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Atari Environment HTTP Client.
9
+
10
+ This module provides the client for connecting to an Atari Environment server
11
+ over HTTP.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import Any, Dict, TYPE_CHECKING
17
+
18
+ from core.http_env_client import HTTPEnvClient
19
+ from core.types import StepResult
20
+
21
+ from .models import AtariAction, AtariObservation, AtariState
22
+
23
+ if TYPE_CHECKING:
24
+ from core.containers.runtime import ContainerProvider
25
+
26
+
27
+ class AtariEnv(HTTPEnvClient[AtariAction, AtariObservation]):
28
+ """
29
+ HTTP client for Atari Environment.
30
+
31
+ This client connects to an AtariEnvironment HTTP server and provides
32
+ methods to interact with it: reset(), step(), and state access.
33
+
34
+ Example:
35
+ >>> # Connect to a running server
36
+ >>> client = AtariEnv(base_url="http://localhost:8000")
37
+ >>> result = client.reset()
38
+ >>> print(result.observation.screen_shape)
39
+ >>>
40
+ >>> # Take an action
41
+ >>> result = client.step(AtariAction(action_id=2)) # UP
42
+ >>> print(result.reward, result.done)
43
+
44
+ Example with Docker:
45
+ >>> # Automatically start container and connect
46
+ >>> client = AtariEnv.from_docker_image("atari-env:latest")
47
+ >>> result = client.reset()
48
+ >>> result = client.step(AtariAction(action_id=0)) # NOOP
49
+ """
50
+
51
+ def _step_payload(self, action: AtariAction) -> Dict[str, Any]:
52
+ """
53
+ Convert AtariAction to JSON payload for step request.
54
+
55
+ Args:
56
+ action: AtariAction instance.
57
+
58
+ Returns:
59
+ Dictionary representation suitable for JSON encoding.
60
+ """
61
+ return {
62
+ "action_id": action.action_id,
63
+ "game_name": action.game_name,
64
+ "obs_type": action.obs_type,
65
+ "full_action_space": action.full_action_space,
66
+ }
67
+
68
+ def _parse_result(self, payload: Dict[str, Any]) -> StepResult[AtariObservation]:
69
+ """
70
+ Parse server response into StepResult[AtariObservation].
71
+
72
+ Args:
73
+ payload: JSON response from server.
74
+
75
+ Returns:
76
+ StepResult with AtariObservation.
77
+ """
78
+ obs_data = payload.get("observation", {})
79
+
80
+ observation = AtariObservation(
81
+ screen=obs_data.get("screen", []),
82
+ screen_shape=obs_data.get("screen_shape", []),
83
+ legal_actions=obs_data.get("legal_actions", []),
84
+ lives=obs_data.get("lives", 0),
85
+ episode_frame_number=obs_data.get("episode_frame_number", 0),
86
+ frame_number=obs_data.get("frame_number", 0),
87
+ done=payload.get("done", False),
88
+ reward=payload.get("reward"),
89
+ metadata=obs_data.get("metadata", {}),
90
+ )
91
+
92
+ return StepResult(
93
+ observation=observation,
94
+ reward=payload.get("reward"),
95
+ done=payload.get("done", False),
96
+ )
97
+
98
+ def _parse_state(self, payload: Dict[str, Any]) -> AtariState:
99
+ """
100
+ Parse server response into AtariState object.
101
+
102
+ Args:
103
+ payload: JSON response from /state endpoint.
104
+
105
+ Returns:
106
+ AtariState object with environment state information.
107
+ """
108
+ return AtariState(
109
+ episode_id=payload.get("episode_id"),
110
+ step_count=payload.get("step_count", 0),
111
+ game_name=payload.get("game_name", "unknown"),
112
+ obs_type=payload.get("obs_type", "rgb"),
113
+ full_action_space=payload.get("full_action_space", False),
114
+ mode=payload.get("mode"),
115
+ difficulty=payload.get("difficulty"),
116
+ repeat_action_probability=payload.get("repeat_action_probability", 0.0),
117
+ frameskip=payload.get("frameskip", 4),
118
+ )
src/envs/atari_env/models.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Data models for Atari Environment.
9
+
10
+ This module defines the Action, Observation, and State types for Atari games
11
+ via the Arcade Learning Environment (ALE).
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from dataclasses import dataclass, field
17
+ from typing import Any, Dict, List, Literal, Optional
18
+
19
+ from core.env_server import Action, Observation, State
20
+
21
+
22
+ @dataclass
23
+ class AtariAction(Action):
24
+ """
25
+ Action for Atari environments.
26
+
27
+ Attributes:
28
+ action_id: The integer action ID to take (from legal_actions).
29
+ game_name: Name of the Atari game (e.g., "pong", "breakout", "space_invaders").
30
+ obs_type: Observation type ("rgb", "grayscale", or "ram").
31
+ full_action_space: Whether to use full (18 actions) or minimal action space.
32
+ """
33
+ action_id: int
34
+ game_name: str = "pong"
35
+ obs_type: Literal["rgb", "grayscale", "ram"] = "rgb"
36
+ full_action_space: bool = False
37
+
38
+
39
+ @dataclass
40
+ class AtariObservation(Observation):
41
+ """
42
+ Observation from Atari environment.
43
+
44
+ This represents what the agent sees after taking an action.
45
+
46
+ Attributes:
47
+ screen: Screen observation as a flattened list of pixels.
48
+ Shape depends on obs_type:
49
+ - rgb: [210, 160, 3] flattened
50
+ - grayscale: [210, 160] flattened
51
+ - ram: [128] (RAM contents)
52
+ screen_shape: Original shape of the screen before flattening.
53
+ legal_actions: List of legal action IDs the agent can take.
54
+ lives: Number of lives remaining.
55
+ episode_frame_number: Frame number within current episode.
56
+ frame_number: Total frame number since environment creation.
57
+ """
58
+ screen: List[int]
59
+ screen_shape: List[int]
60
+ legal_actions: List[int]
61
+ lives: int = 0
62
+ episode_frame_number: int = 0
63
+ frame_number: int = 0
64
+
65
+
66
+ @dataclass
67
+ class AtariState(State):
68
+ """
69
+ State for Atari environment.
70
+
71
+ Attributes:
72
+ game_name: Name of the Atari game.
73
+ obs_type: Observation type ("rgb", "grayscale", or "ram").
74
+ full_action_space: Whether using full or minimal action space.
75
+ mode: Game mode (if applicable).
76
+ difficulty: Game difficulty (if applicable).
77
+ repeat_action_probability: Probability of repeating previous action (sticky actions).
78
+ frameskip: Number of frames to skip per action.
79
+ """
80
+ game_name: str = "pong"
81
+ obs_type: Literal["rgb", "grayscale", "ram"] = "rgb"
82
+ full_action_space: bool = False
83
+ mode: Optional[int] = None
84
+ difficulty: Optional[int] = None
85
+ repeat_action_probability: float = 0.0
86
+ frameskip: int = 4
src/envs/atari_env/server/Dockerfile ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile for Atari Environment
2
+ # This image provides Atari 2600 games via the Arcade Learning Environment (ALE)
3
+
4
+ # Configurable base image - defaults to local build, can be overridden for CI/CD
5
+ # Base image provides: fastapi, uvicorn, requests, curl, PYTHONPATH=/app/src
6
+ #
7
+ # Local build: docker build -t envtorch-base:latest -f src/core/containers/images/Dockerfile .
8
+ # docker build -f src/envs/atari_env/server/Dockerfile -t atari-env:latest .
9
+ #
10
+ # CI/CD build: docker build --build-arg BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest \
11
+ # -f src/envs/atari_env/server/Dockerfile -t atari-env:latest .
12
+ ARG BASE_IMAGE=envtorch-base:latest
13
+ FROM ${BASE_IMAGE}
14
+
15
+ # Install ALE-specific dependencies
16
+ # ale-py includes all Atari ROMs by default and requires gymnasium
17
+ RUN pip install --no-cache-dir \
18
+ gymnasium>=0.29.0 \
19
+ ale-py>=0.8.0 \
20
+ numpy>=1.24.0
21
+
22
+ # Copy OpenEnv core (base image already set WORKDIR=/app)
23
+ COPY src/core/ /app/src/core/
24
+
25
+ # Copy Atari environment code
26
+ COPY src/envs/atari_env/ /app/src/envs/atari_env/
27
+
28
+ # Atari-specific environment variables (can be overridden at runtime)
29
+ ENV ATARI_GAME=pong
30
+ ENV ATARI_OBS_TYPE=rgb
31
+ ENV ATARI_FULL_ACTION_SPACE=false
32
+ ENV ATARI_REPEAT_ACTION_PROB=0.0
33
+ ENV ATARI_FRAMESKIP=4
34
+
35
+ # Expose port
36
+ EXPOSE 8000
37
+
38
+ # Health check
39
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
40
+ CMD curl -f http://localhost:8000/health || exit 1
41
+
42
+ # Run the FastAPI server
43
+ CMD ["uvicorn", "envs.atari_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
src/envs/atari_env/server/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Atari Environment Server.
9
+
10
+ Server-side implementation of Atari environment for OpenEnv.
11
+ """
12
+
13
+ from .atari_environment import AtariEnvironment
14
+
15
+ __all__ = ["AtariEnvironment"]
src/envs/atari_env/server/app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ FastAPI application for the Atari Environment.
9
+
10
+ This module creates an HTTP server that exposes Atari games
11
+ over HTTP endpoints, making them compatible with HTTPEnvClient.
12
+
13
+ Usage:
14
+ # Development (with auto-reload):
15
+ uvicorn envs.atari_env.server.app:app --reload --host 0.0.0.0 --port 8000
16
+
17
+ # Production:
18
+ uvicorn envs.atari_env.server.app:app --host 0.0.0.0 --port 8000 --workers 4
19
+
20
+ # Or run directly:
21
+ python -m envs.atari_env.server.app
22
+
23
+ Environment variables:
24
+ ATARI_GAME: Game name to serve (default: "pong")
25
+ ATARI_OBS_TYPE: Observation type (default: "rgb")
26
+ ATARI_FULL_ACTION_SPACE: Use full action space (default: "false")
27
+ ATARI_MODE: Game mode (optional)
28
+ ATARI_DIFFICULTY: Game difficulty (optional)
29
+ ATARI_REPEAT_ACTION_PROB: Sticky action probability (default: "0.0")
30
+ ATARI_FRAMESKIP: Frameskip (default: "4")
31
+ """
32
+
33
+ import os
34
+
35
+ from core.env_server import create_app
36
+
37
+ from ..models import AtariAction, AtariObservation
38
+ from .atari_environment import AtariEnvironment
39
+
40
+ # Get configuration from environment variables
41
+ game_name = os.getenv("ATARI_GAME", "pong")
42
+ obs_type = os.getenv("ATARI_OBS_TYPE", "rgb")
43
+ full_action_space = os.getenv("ATARI_FULL_ACTION_SPACE", "false").lower() == "true"
44
+ repeat_action_prob = float(os.getenv("ATARI_REPEAT_ACTION_PROB", "0.0"))
45
+ frameskip = int(os.getenv("ATARI_FRAMESKIP", "4"))
46
+
47
+ # Optional parameters
48
+ mode = os.getenv("ATARI_MODE")
49
+ difficulty = os.getenv("ATARI_DIFFICULTY")
50
+
51
+ # Convert to int if specified
52
+ mode = int(mode) if mode is not None else None
53
+ difficulty = int(difficulty) if difficulty is not None else None
54
+
55
+ # Create the environment instance
56
+ env = AtariEnvironment(
57
+ game_name=game_name,
58
+ obs_type=obs_type,
59
+ full_action_space=full_action_space,
60
+ mode=mode,
61
+ difficulty=difficulty,
62
+ repeat_action_probability=repeat_action_prob,
63
+ frameskip=frameskip,
64
+ )
65
+
66
+ # Create the FastAPI app with routes
67
+ app = create_app(env, AtariAction, AtariObservation)
68
+
69
+
70
+ if __name__ == "__main__":
71
+ import uvicorn
72
+
73
+ uvicorn.run(app, host="0.0.0.0", port=8000)
src/envs/atari_env/server/atari_environment.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Atari Environment Server Implementation.
9
+
10
+ This module wraps ALE's ALEInterface and exposes it
11
+ via the OpenEnv Environment interface.
12
+ """
13
+
14
+ import uuid
15
+ from typing import Any, Dict, Literal, Optional
16
+
17
+ from core.env_server import Action, Environment, Observation
18
+
19
+ from ..models import AtariAction, AtariObservation, AtariState
20
+
21
+ # Import ALE
22
+ try:
23
+ from ale_py import ALEInterface, roms
24
+ import numpy as np
25
+ except ImportError as e:
26
+ raise ImportError(
27
+ "ALE (Arcade Learning Environment) is not installed. "
28
+ "Please install it with: pip install ale-py"
29
+ ) from e
30
+
31
+
32
+ class AtariEnvironment(Environment):
33
+ """
34
+ Atari Environment wrapper for OpenEnv.
35
+
36
+ This environment wraps Atari 2600 games via the Arcade Learning Environment (ALE)
37
+ and provides a clean interface for RL training.
38
+
39
+ Supported games include: pong, breakout, space_invaders, and 100+ others.
40
+
41
+ Args:
42
+ game_name: Name of the Atari game (e.g., "pong", "breakout").
43
+ obs_type: Observation type - "rgb", "grayscale", or "ram".
44
+ full_action_space: Use full action space (18 actions) vs minimal.
45
+ mode: Game mode (if applicable).
46
+ difficulty: Game difficulty (if applicable).
47
+ repeat_action_probability: Sticky action probability (default 0.0).
48
+ frameskip: Number of frames to skip per action (default 4).
49
+
50
+ Example:
51
+ >>> env = AtariEnvironment("pong")
52
+ >>> obs = env.reset()
53
+ >>> print(obs.screen_shape) # [210, 160, 3]
54
+ >>> obs = env.step(AtariAction(action_id=2)) # UP
55
+ >>> print(obs.reward, obs.done)
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ game_name: str = "pong",
61
+ obs_type: Literal["rgb", "grayscale", "ram"] = "rgb",
62
+ full_action_space: bool = False,
63
+ mode: Optional[int] = None,
64
+ difficulty: Optional[int] = None,
65
+ repeat_action_probability: float = 0.0,
66
+ frameskip: int = 4,
67
+ ):
68
+ """Initialize Atari environment."""
69
+ super().__init__()
70
+
71
+ self.game_name = game_name
72
+ self.obs_type = obs_type
73
+ self.full_action_space = full_action_space
74
+ self.mode = mode
75
+ self.difficulty = difficulty
76
+ self.repeat_action_probability = repeat_action_probability
77
+ self.frameskip = frameskip
78
+
79
+ # Create ALE interface
80
+ self.ale = ALEInterface()
81
+
82
+ # Configure ALE
83
+ from ale_py import LoggerMode
84
+ self.ale.setLoggerMode(LoggerMode.Error) # Error mode only
85
+ self.ale.setFloat("repeat_action_probability", repeat_action_probability)
86
+
87
+ # Load ROM
88
+ try:
89
+ rom_path = roms.get_rom_path(game_name)
90
+ self.ale.loadROM(rom_path)
91
+ except Exception as e:
92
+ raise ValueError(
93
+ f"Failed to load Atari game '{game_name}': {e}\n"
94
+ f"Available games can be found via: ale_py.roms.list_roms()"
95
+ ) from e
96
+
97
+ # Set mode and difficulty if specified
98
+ if mode is not None:
99
+ self.ale.setMode(mode)
100
+ if difficulty is not None:
101
+ self.ale.setDifficulty(difficulty)
102
+
103
+ # Get action set
104
+ if full_action_space:
105
+ self._action_set = self.ale.getLegalActionSet()
106
+ else:
107
+ self._action_set = self.ale.getMinimalActionSet()
108
+
109
+ # Get screen dimensions for observation space
110
+ self.screen_height, self.screen_width = self.ale.getScreenDims()
111
+ if obs_type == "rgb":
112
+ self.screen_shape = [self.screen_height, self.screen_width, 3]
113
+ elif obs_type == "grayscale":
114
+ self.screen_shape = [self.screen_height, self.screen_width]
115
+ elif obs_type == "ram":
116
+ self.screen_shape = [self.ale.getRAMSize()]
117
+ else:
118
+ raise ValueError(f"Invalid obs_type: {obs_type}")
119
+
120
+ # Initialize state
121
+ self._state = AtariState(
122
+ game_name=game_name,
123
+ obs_type=obs_type,
124
+ full_action_space=full_action_space,
125
+ mode=mode,
126
+ difficulty=difficulty,
127
+ repeat_action_probability=repeat_action_probability,
128
+ frameskip=frameskip,
129
+ )
130
+
131
+ def reset(self) -> Observation:
132
+ """
133
+ Reset the environment and return initial observation.
134
+
135
+ Returns:
136
+ Initial observation for the agent.
137
+ """
138
+ # Reset ALE
139
+ self.ale.reset_game()
140
+
141
+ # Reset state tracking
142
+ self._state.episode_id = str(uuid.uuid4())
143
+ self._state.step_count = 0
144
+
145
+ # Get initial observation
146
+ return self._make_observation()
147
+
148
+ def step(self, action: Action) -> Observation:
149
+ """
150
+ Execute agent's action and return resulting observation.
151
+
152
+ Args:
153
+ action: AtariAction containing the action_id to execute.
154
+
155
+ Returns:
156
+ Observation after action execution.
157
+
158
+ Raises:
159
+ ValueError: If action is not an AtariAction.
160
+ """
161
+ if not isinstance(action, AtariAction):
162
+ raise ValueError(f"Expected AtariAction, got {type(action)}")
163
+
164
+ # Validate action_id
165
+ if action.action_id < 0 or action.action_id >= len(self._action_set):
166
+ raise ValueError(
167
+ f"Invalid action_id: {action.action_id}. "
168
+ f"Valid range: [0, {len(self._action_set) - 1}]"
169
+ )
170
+
171
+ # Get actual ALE action
172
+ ale_action = self._action_set[action.action_id]
173
+
174
+ # Execute action with frameskip
175
+ total_reward = 0.0
176
+ for _ in range(self.frameskip):
177
+ total_reward += self.ale.act(ale_action)
178
+ if self.ale.game_over():
179
+ break
180
+
181
+ self._state.step_count += 1
182
+
183
+ # Get observation
184
+ obs = self._make_observation()
185
+ obs.reward = total_reward
186
+
187
+ return obs
188
+
189
+ @property
190
+ def state(self) -> AtariState:
191
+ """Get current environment state."""
192
+ return self._state
193
+
194
+ def _make_observation(self) -> AtariObservation:
195
+ """
196
+ Create an AtariObservation from current ALE state.
197
+
198
+ Returns:
199
+ AtariObservation for the agent.
200
+ """
201
+ # Get screen observation
202
+ if self.obs_type == "rgb":
203
+ screen = self.ale.getScreenRGB()
204
+ elif self.obs_type == "grayscale":
205
+ screen = self.ale.getScreenGrayscale()
206
+ elif self.obs_type == "ram":
207
+ screen = self.ale.getRAM()
208
+ else:
209
+ raise ValueError(f"Invalid obs_type: {self.obs_type}")
210
+
211
+ # Flatten screen for JSON serialization
212
+ # Handle both numpy arrays and lists
213
+ if hasattr(screen, "flatten"):
214
+ screen_flat = screen.flatten().tolist()
215
+ elif hasattr(screen, "tolist"):
216
+ screen_flat = screen.tolist()
217
+ else:
218
+ screen_flat = list(screen)
219
+
220
+ # Get game info
221
+ lives = self.ale.lives()
222
+ episode_frame_number = self.ale.getEpisodeFrameNumber()
223
+ frame_number = self.ale.getFrameNumber()
224
+ done = self.ale.game_over()
225
+
226
+ # Create legal actions list (indices into action_set)
227
+ legal_actions = list(range(len(self._action_set)))
228
+
229
+ # Create observation
230
+ obs = AtariObservation(
231
+ screen=screen_flat,
232
+ screen_shape=self.screen_shape,
233
+ legal_actions=legal_actions,
234
+ lives=lives,
235
+ episode_frame_number=episode_frame_number,
236
+ frame_number=frame_number,
237
+ done=done,
238
+ reward=0.0, # Will be filled in by step()
239
+ metadata={
240
+ "game_name": self.game_name,
241
+ "action_meanings": [str(a) for a in self._action_set],
242
+ },
243
+ )
244
+
245
+ return obs
src/envs/atari_env/test_atari_docker.sh ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Comprehensive Docker test for Atari environment
3
+ # Tests: Build, Start, Health, Reset, Step, State, Cleanup
4
+
5
+ set -e # Exit on error
6
+
7
+ # Colors for output
8
+ RED='\033[0;31m'
9
+ GREEN='\033[0;32m'
10
+ YELLOW='\033[1;33m'
11
+ BLUE='\033[0;34m'
12
+ NC='\033[0m' # No Color
13
+
14
+ # Configuration
15
+ IMAGE_NAME="atari-env"
16
+ IMAGE_TAG="test"
17
+ CONTAINER_NAME="atari-env-test"
18
+ PORT="8765" # Use non-standard port to avoid conflicts
19
+ HEALTH_RETRIES=30
20
+ HEALTH_DELAY=2
21
+
22
+ # Cleanup function
23
+ cleanup() {
24
+ echo -e "\n${BLUE}Cleaning up...${NC}"
25
+ docker stop ${CONTAINER_NAME} 2>/dev/null || true
26
+ docker rm ${CONTAINER_NAME} 2>/dev/null || true
27
+ echo -e "${GREEN}βœ“${NC} Cleanup complete"
28
+ }
29
+
30
+ # Set trap to cleanup on exit
31
+ trap cleanup EXIT
32
+
33
+ # Header
34
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
35
+ echo " ATARI ENVIRONMENT DOCKER TEST"
36
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
37
+ echo ""
38
+
39
+ # Check prerequisites
40
+ echo -e "${BLUE}Checking prerequisites...${NC}"
41
+ if ! command -v docker &> /dev/null; then
42
+ echo -e "${RED}βœ—${NC} Docker is not installed"
43
+ exit 1
44
+ fi
45
+ echo -e "${GREEN}βœ“${NC} Docker is installed"
46
+
47
+ if ! command -v curl &> /dev/null; then
48
+ echo -e "${RED}βœ—${NC} curl is not installed"
49
+ exit 1
50
+ fi
51
+ echo -e "${GREEN}βœ“${NC} curl is installed"
52
+
53
+ # Check if we're in the right directory
54
+ if [ ! -f "src/envs/atari_env/server/Dockerfile" ]; then
55
+ echo -e "${RED}βœ—${NC} Must run from OpenEnv root directory"
56
+ exit 1
57
+ fi
58
+ echo -e "${GREEN}βœ“${NC} In correct directory"
59
+
60
+ # Step 1: Build Docker image
61
+ echo ""
62
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
63
+ echo -e "${BLUE}STEP 1: Building Docker Image${NC}"
64
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
65
+
66
+ echo "Building ${IMAGE_NAME}:${IMAGE_TAG}..."
67
+ if docker build -f src/envs/atari_env/server/Dockerfile -t ${IMAGE_NAME}:${IMAGE_TAG} . 2>&1 | tee /tmp/atari_build.log | tail -n 20; then
68
+ echo -e "${GREEN}βœ“${NC} Docker image built successfully"
69
+ else
70
+ echo -e "${RED}βœ—${NC} Docker build failed"
71
+ echo "See /tmp/atari_build.log for full output"
72
+ exit 1
73
+ fi
74
+
75
+ # Check image exists
76
+ if docker image inspect ${IMAGE_NAME}:${IMAGE_TAG} &> /dev/null; then
77
+ IMAGE_SIZE=$(docker image inspect ${IMAGE_NAME}:${IMAGE_TAG} --format='{{.Size}}' | awk '{print $1/1024/1024}')
78
+ echo -e "${GREEN}βœ“${NC} Image size: ${IMAGE_SIZE} MB"
79
+ else
80
+ echo -e "${RED}βœ—${NC} Image not found after build"
81
+ exit 1
82
+ fi
83
+
84
+ # Step 2: Start container
85
+ echo ""
86
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
87
+ echo -e "${BLUE}STEP 2: Starting Container${NC}"
88
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
89
+
90
+ # Clean up any existing container
91
+ docker rm -f ${CONTAINER_NAME} 2>/dev/null || true
92
+
93
+ echo "Starting container on port ${PORT}..."
94
+ docker run -d \
95
+ --name ${CONTAINER_NAME} \
96
+ -p ${PORT}:8000 \
97
+ -e ATARI_GAME=pong \
98
+ -e ATARI_OBS_TYPE=ram \
99
+ -e ATARI_FRAMESKIP=4 \
100
+ ${IMAGE_NAME}:${IMAGE_TAG}
101
+
102
+ if [ $? -eq 0 ]; then
103
+ echo -e "${GREEN}βœ“${NC} Container started: ${CONTAINER_NAME}"
104
+ else
105
+ echo -e "${RED}βœ—${NC} Failed to start container"
106
+ exit 1
107
+ fi
108
+
109
+ # Wait for container to be running
110
+ sleep 2
111
+ if docker ps | grep -q ${CONTAINER_NAME}; then
112
+ echo -e "${GREEN}βœ“${NC} Container is running"
113
+ else
114
+ echo -e "${RED}βœ—${NC} Container is not running"
115
+ docker logs ${CONTAINER_NAME}
116
+ exit 1
117
+ fi
118
+
119
+ # Step 3: Wait for health check
120
+ echo ""
121
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
122
+ echo -e "${BLUE}STEP 3: Waiting for Server${NC}"
123
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
124
+
125
+ echo "Waiting for server to be ready (timeout: ${HEALTH_RETRIES}s)..."
126
+ for i in $(seq 1 ${HEALTH_RETRIES}); do
127
+ if curl -s http://localhost:${PORT}/health > /dev/null 2>&1; then
128
+ echo -e "${GREEN}βœ“${NC} Server is ready (${i}s)"
129
+ break
130
+ fi
131
+
132
+ if [ $i -eq ${HEALTH_RETRIES} ]; then
133
+ echo -e "${RED}βœ—${NC} Server did not become ready in time"
134
+ echo "Container logs:"
135
+ docker logs ${CONTAINER_NAME}
136
+ exit 1
137
+ fi
138
+
139
+ echo -n "."
140
+ sleep ${HEALTH_DELAY}
141
+ done
142
+
143
+ # Step 4: Test health endpoint
144
+ echo ""
145
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
146
+ echo -e "${BLUE}STEP 4: Testing Health Endpoint${NC}"
147
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
148
+
149
+ HEALTH_RESPONSE=$(curl -s http://localhost:${PORT}/health)
150
+ echo "Response: ${HEALTH_RESPONSE}"
151
+
152
+ if echo "${HEALTH_RESPONSE}" | grep -q "healthy"; then
153
+ echo -e "${GREEN}βœ“${NC} Health endpoint working"
154
+ else
155
+ echo -e "${RED}βœ—${NC} Health endpoint failed"
156
+ exit 1
157
+ fi
158
+
159
+ # Step 5: Test reset endpoint
160
+ echo ""
161
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
162
+ echo -e "${BLUE}STEP 5: Testing Reset Endpoint${NC}"
163
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
164
+
165
+ RESET_RESPONSE=$(curl -s -X POST http://localhost:${PORT}/reset -H "Content-Type: application/json" -d '{}')
166
+
167
+ if [ -z "${RESET_RESPONSE}" ]; then
168
+ echo -e "${RED}βœ—${NC} Reset endpoint returned empty response"
169
+ docker logs ${CONTAINER_NAME} | tail -20
170
+ exit 1
171
+ fi
172
+
173
+ echo "Response (first 200 chars): ${RESET_RESPONSE:0:200}..."
174
+
175
+ # Check if response contains expected fields
176
+ if echo "${RESET_RESPONSE}" | grep -q "observation" && \
177
+ echo "${RESET_RESPONSE}" | grep -q "screen" && \
178
+ echo "${RESET_RESPONSE}" | grep -q "legal_actions"; then
179
+ echo -e "${GREEN}βœ“${NC} Reset endpoint working"
180
+
181
+ # Extract some info
182
+ SCREEN_LEN=$(echo "${RESET_RESPONSE}" | grep -o '"screen":\[[^]]*\]' | wc -c)
183
+ echo " Screen data length: ${SCREEN_LEN} chars"
184
+ else
185
+ echo -e "${RED}βœ—${NC} Reset response missing required fields"
186
+ echo "Full response: ${RESET_RESPONSE}"
187
+ exit 1
188
+ fi
189
+
190
+ # Step 6: Test step endpoint
191
+ echo ""
192
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
193
+ echo -e "${BLUE}STEP 6: Testing Step Endpoint${NC}"
194
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
195
+
196
+ STEP_PAYLOAD='{"action": {"action_id": 0, "game_name": "pong"}}'
197
+ STEP_RESPONSE=$(curl -s -X POST http://localhost:${PORT}/step -H "Content-Type: application/json" -d "${STEP_PAYLOAD}")
198
+
199
+ if [ -z "${STEP_RESPONSE}" ]; then
200
+ echo -e "${RED}βœ—${NC} Step endpoint returned empty response"
201
+ docker logs ${CONTAINER_NAME} | tail -20
202
+ exit 1
203
+ fi
204
+
205
+ echo "Response (first 200 chars): ${STEP_RESPONSE:0:200}..."
206
+
207
+ # Check if response contains expected fields
208
+ if echo "${STEP_RESPONSE}" | grep -q "observation" && \
209
+ echo "${STEP_RESPONSE}" | grep -q "reward" && \
210
+ echo "${STEP_RESPONSE}" | grep -q "done"; then
211
+ echo -e "${GREEN}βœ“${NC} Step endpoint working"
212
+
213
+ # Extract reward and done
214
+ REWARD=$(echo "${STEP_RESPONSE}" | grep -o '"reward":[^,}]*' | cut -d: -f2)
215
+ DONE=$(echo "${STEP_RESPONSE}" | grep -o '"done":[^,}]*' | cut -d: -f2)
216
+ echo " Reward: ${REWARD}"
217
+ echo " Done: ${DONE}"
218
+ else
219
+ echo -e "${RED}βœ—${NC} Step response missing required fields"
220
+ echo "Full response: ${STEP_RESPONSE}"
221
+ exit 1
222
+ fi
223
+
224
+ # Step 7: Test state endpoint
225
+ echo ""
226
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
227
+ echo -e "${BLUE}STEP 7: Testing State Endpoint${NC}"
228
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
229
+
230
+ STATE_RESPONSE=$(curl -s http://localhost:${PORT}/state)
231
+
232
+ if [ -z "${STATE_RESPONSE}" ]; then
233
+ echo -e "${RED}βœ—${NC} State endpoint returned empty response"
234
+ docker logs ${CONTAINER_NAME} | tail -20
235
+ exit 1
236
+ fi
237
+
238
+ echo "Response: ${STATE_RESPONSE}"
239
+
240
+ # Check if response contains expected fields
241
+ if echo "${STATE_RESPONSE}" | grep -q "episode_id" && \
242
+ echo "${STATE_RESPONSE}" | grep -q "step_count" && \
243
+ echo "${STATE_RESPONSE}" | grep -q "game_name"; then
244
+ echo -e "${GREEN}βœ“${NC} State endpoint working"
245
+
246
+ # Extract info
247
+ GAME_NAME=$(echo "${STATE_RESPONSE}" | grep -o '"game_name":"[^"]*"' | cut -d'"' -f4)
248
+ STEP_COUNT=$(echo "${STATE_RESPONSE}" | grep -o '"step_count":[^,}]*' | cut -d: -f2)
249
+ echo " Game: ${GAME_NAME}"
250
+ echo " Steps: ${STEP_COUNT}"
251
+ else
252
+ echo -e "${RED}βœ—${NC} State response missing required fields"
253
+ echo "Full response: ${STATE_RESPONSE}"
254
+ exit 1
255
+ fi
256
+
257
+ # Step 8: Test multiple steps
258
+ echo ""
259
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
260
+ echo -e "${BLUE}STEP 8: Testing Multiple Steps${NC}"
261
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
262
+
263
+ echo "Taking 10 steps..."
264
+ TOTAL_REWARD=0
265
+ for i in {1..10}; do
266
+ ACTION_ID=$((RANDOM % 3)) # Random action 0-2
267
+ STEP_PAYLOAD="{\"action\": {\"action_id\": ${ACTION_ID}, \"game_name\": \"pong\"}}"
268
+ STEP_RESPONSE=$(curl -s -X POST http://localhost:${PORT}/step -H "Content-Type: application/json" -d "${STEP_PAYLOAD}")
269
+
270
+ if ! echo "${STEP_RESPONSE}" | grep -q "observation"; then
271
+ echo -e "${RED}βœ—${NC} Step ${i} failed"
272
+ exit 1
273
+ fi
274
+
275
+ REWARD=$(echo "${STEP_RESPONSE}" | grep -o '"reward":[^,}]*' | cut -d: -f2 | sed 's/null/0/')
276
+ DONE=$(echo "${STEP_RESPONSE}" | grep -o '"done":[^,}]*' | cut -d: -f2)
277
+
278
+ echo " Step ${i}: action=${ACTION_ID}, reward=${REWARD}, done=${DONE}"
279
+
280
+ if [ "${DONE}" = "true" ]; then
281
+ echo " Episode completed early at step ${i}"
282
+ break
283
+ fi
284
+ done
285
+
286
+ echo -e "${GREEN}βœ“${NC} Multiple steps completed successfully"
287
+
288
+ # Step 9: Check container logs for errors
289
+ echo ""
290
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
291
+ echo -e "${BLUE}STEP 9: Checking Container Logs${NC}"
292
+ echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
293
+
294
+ LOGS=$(docker logs ${CONTAINER_NAME} 2>&1)
295
+
296
+ if echo "${LOGS}" | grep -i "error" | grep -v "LoggerMode.Error"; then
297
+ echo -e "${YELLOW}⚠${NC} Found errors in logs:"
298
+ echo "${LOGS}" | grep -i "error" | head -5
299
+ else
300
+ echo -e "${GREEN}βœ“${NC} No errors in container logs"
301
+ fi
302
+
303
+ if echo "${LOGS}" | grep -i "exception"; then
304
+ echo -e "${RED}βœ—${NC} Found exceptions in logs:"
305
+ echo "${LOGS}" | grep -i "exception" | head -5
306
+ exit 1
307
+ else
308
+ echo -e "${GREEN}βœ“${NC} No exceptions in container logs"
309
+ fi
310
+
311
+ # Final Summary
312
+ echo ""
313
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
314
+ echo -e "${GREEN}βœ… ALL DOCKER TESTS PASSED${NC}"
315
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
316
+ echo ""
317
+ echo "Summary:"
318
+ echo " βœ“ Docker image built successfully"
319
+ echo " βœ“ Container started and ran"
320
+ echo " βœ“ Health endpoint working"
321
+ echo " βœ“ Reset endpoint working"
322
+ echo " βœ“ Step endpoint working"
323
+ echo " βœ“ State endpoint working"
324
+ echo " βœ“ Multiple steps working"
325
+ echo " βœ“ No errors or exceptions"
326
+ echo ""
327
+ echo "Image: ${IMAGE_NAME}:${IMAGE_TAG}"
328
+ echo "Container: ${CONTAINER_NAME}"
329
+ echo "Port: ${PORT}"
330
+ echo ""
331
+ echo "To keep container running: docker start ${CONTAINER_NAME}"
332
+ echo "To view logs: docker logs ${CONTAINER_NAME}"
333
+ echo ""