pacman

Running

App Files Files Community

Zach Wentz commited on 18 days ago

Commit

d0ae716

1 Parent(s): 2ed0abb

🤖 Deploy atari_env environment - 2025-10-19 22:32:33

Browse files

Files changed (47) hide show

.gitattributes +0 -35
Dockerfile +49 -0
README.md +44 -5
src/core/__init__.py +19 -0
src/core/__pycache__/__init__.cpython-311.pyc +0 -0
src/core/__pycache__/__init__.cpython-313.pyc +0 -0
src/core/__pycache__/http_env_client.cpython-311.pyc +0 -0
src/core/__pycache__/types.cpython-311.pyc +0 -0
src/core/containers/__init__.py +7 -0
src/core/containers/__pycache__/__init__.cpython-311.pyc +0 -0
src/core/containers/images/Dockerfile +46 -0
src/core/containers/images/README.md +92 -0
src/core/containers/runtime/__init__.py +15 -0
src/core/containers/runtime/__pycache__/__init__.cpython-311.pyc +0 -0
src/core/containers/runtime/__pycache__/providers.cpython-311.pyc +0 -0
src/core/containers/runtime/providers.py +289 -0
src/core/containers/test_local_docker_provider.py +258 -0
src/core/env_server/__init__.py +35 -0
src/core/env_server/__pycache__/__init__.cpython-311.pyc +0 -0
src/core/env_server/__pycache__/__init__.cpython-313.pyc +0 -0
src/core/env_server/__pycache__/base_transforms.cpython-311.pyc +0 -0
src/core/env_server/__pycache__/base_transforms.cpython-313.pyc +0 -0
src/core/env_server/__pycache__/http_server.cpython-311.pyc +0 -0
src/core/env_server/__pycache__/http_server.cpython-313.pyc +0 -0
src/core/env_server/__pycache__/interfaces.cpython-311.pyc +0 -0
src/core/env_server/__pycache__/interfaces.cpython-313.pyc +0 -0
src/core/env_server/__pycache__/types.cpython-311.pyc +0 -0
src/core/env_server/__pycache__/types.cpython-313.pyc +0 -0
src/core/env_server/__pycache__/web_interface.cpython-311.pyc +0 -0
src/core/env_server/base_transforms.py +29 -0
src/core/env_server/http_server.py +231 -0
src/core/env_server/interfaces.py +118 -0
src/core/env_server/types.py +45 -0
src/core/env_server/web_interface.py +764 -0
src/core/http_env_client.py +175 -0
src/core/tools/__init__.py +11 -0
src/core/tools/local_python_executor.py +105 -0
src/core/types.py +22 -0
src/envs/atari_env/README.md +383 -0
src/envs/atari_env/__init__.py +31 -0
src/envs/atari_env/client.py +118 -0
src/envs/atari_env/models.py +86 -0
src/envs/atari_env/server/Dockerfile +43 -0
src/envs/atari_env/server/__init__.py +15 -0
src/envs/atari_env/server/app.py +73 -0
src/envs/atari_env/server/atari_environment.py +245 -0
src/envs/atari_env/test_atari_docker.sh +333 -0

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,49 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# Multi-stage build: First stage builds the base image
+FROM python:3.11-slim as base-builder
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python dependencies that all environments need
+RUN pip install --no-cache-dir \
+    fastapi>=0.104.0 \
+    "uvicorn[standard]>=0.24.0" \
+    requests>=2.25.0 \
+    wsproto>=1.0.0
+# Set working directory
+WORKDIR /app
+# Default environment variables
+ENV PYTHONPATH=/app/src
+ENV PYTHONUNBUFFERED=1
+# Second stage: Use the built base image and add environment-specific dependencies
+FROM base-builder
+# Install ALE-specific dependencies
+RUN pip install --no-cache-dir \
+    gymnasium>=0.29.0 \
+    ale-py>=0.8.0 \
+    numpy>=1.24.0
+# Copy only what's needed for this environment
+COPY src/core/ /app/src/core/
+COPY src/envs/atari_env/ /app/src/envs/atari_env/
+# Health check
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+# Run the FastAPI server
+CMD ["uvicorn", "envs.atari_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
+ENV ENABLE_WEB_INTERFACE=true

README.md CHANGED Viewed

@@ -1,10 +1,49 @@
 ---
-title: Atari Env
-emoji: 🦀
-colorFrom: red
-colorTo: gray
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Atari_env Environment Server
+emoji: 🐳
+colorFrom: blue
+colorTo: green
 sdk: docker
 pinned: false
+app_port: 8000
+base_path: /web
 ---
+# Atari_env Environment Server
+FastAPI server for atari_env environment powered by Meta's OpenEnv.
+## About
+This Space provides a containerized environment for atari_env interactions.
+Built with FastAPI and OpenEnv framework.
+## Web Interface
+This deployment includes an interactive web interface for exploring the environment:
+- **HumanAgent Interface**: Interact with the environment using a web form
+- **State Observer**: Real-time view of environment state and action history
+- **Live Updates**: WebSocket-based real-time updates
+Access the web interface at: `/web`
+## Atari Environment
+Provides Atari 2600 games via the Arcade Learning Environment (ALE).
+### Usage
+Send a POST request to `/step` with:
+```json
+{
+  "action_id": 0,
+  "game_name": "pong"
+}
+```
+## API Documentation
+Visit `/docs` for interactive API documentation.
+## Health Check
+The environment provides a health check endpoint at `/health`.

src/core/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Core components for agentic environments."""
+# Re-export main components from submodules for convenience
+from .env_server import *
+from .http_env_client import HTTPEnvClient
+from .types import StepResult
+# Note: MCP module doesn't export anything yet
+__all__ = [
+    "HTTPEnvClient",
+    "StepResult",
+]

src/core/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (400 Bytes). View file

src/core/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (383 Bytes). View file

src/core/__pycache__/http_env_client.cpython-311.pyc ADDED Viewed

Binary file (7.68 kB). View file

src/core/__pycache__/types.cpython-311.pyc ADDED Viewed

Binary file (1.09 kB). View file

src/core/containers/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Container management for environment servers."""

src/core/containers/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (206 Bytes). View file

src/core/containers/images/Dockerfile ADDED Viewed

	@@ -0,0 +1,46 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# OpenEnv Base Image
+#
+# This is the standard base image for all OpenEnv environment servers.
+# It includes the minimal dependencies needed to run HTTP environment servers.
+#
+# Build: docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
+# Tag:   docker tag openenv-base:latest openenv-base:0.1.0
+#
+FROM python:3.11-slim
+# Set metadata
+LABEL maintainer="OpenEnv Team"
+LABEL description="Base image for OpenEnv based environment servers"
+LABEL version="0.1.0"
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python dependencies that all environments need
+RUN pip install --no-cache-dir \
+    fastapi>=0.104.0 \
+    "uvicorn[standard]>=0.24.0" \
+    requests>=2.25.0 \
+    wsproto>=1.0.0
+# Set working directory
+WORKDIR /app
+# Default environment variables
+ENV PYTHONPATH=/app/src
+ENV PYTHONUNBUFFERED=1
+# Default expose port (can be overridden)
+EXPOSE 8000
+# Note: CMD should be specified in child Dockerfiles

src/core/containers/images/README.md ADDED Viewed

	@@ -0,0 +1,92 @@

+# OpenEnv Base Image
+Standard base image for all OpenEnv environment servers.
+## What's Included
+| Layer | Size | Contents |
+|-------|------|----------|
+| python:3.11-slim | 200 MB  | Base Python runtime |
+| + Dependencies   | 100 MB  | FastAPI, uvicorn, requests |
+| **Total**        | **~300 MB** | Ready for environment servers |
+## Image Sizes
+```
+openenv-base:latest   300 MB  (python + fastapi + uvicorn)
+```
+echo-env:latest        500 MB  (python + fastapi + uvicorn + app)
+coding-env:latest      520 MB  (python + fastapi + uvicorn + app + tools)
+another-env:latest     510 MB  (python + fastapi + uvicorn + app)
+---
+Total: 1.5 GB (with lots of duplication)
+```
+### With Base Images (✅ Solution)
+```
+openenv-base:latest    300 MB  (python + fastapi + uvicorn)
+echo-env:latest         50 MB  (app only, uses base)
+coding-env:latest       70 MB  (app + tools, uses base)
+another-env:latest      45 MB  (app only, uses base)
+---
+Total: 465 MB (base shared, minimal duplication)
+```
+## Building the Base Image
+```bash
+# From project root
+docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
+```
+## Usage in Environment Dockerfiles
+Each environment Dockerfile should start with:
+```dockerfile
+FROM openenv-base:latest
+# Copy only environment-specific files
+COPY src/core/ /app/src/core/
+COPY src/envs/my_env/ /app/src/envs/my_env/
+# Run the server
+CMD ["uvicorn", "envs.my_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
+```
+## Base Image Contents
+- Python 3.11-slim
+- FastAPI >= 0.104.0
+- Uvicorn >= 0.24.0
+- Requests >= 2.25.0
+- curl (for health checks)
+## Example: Building Echo Environment
+```bash
+# Step 1: Build base image (do this once)
+docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
+# Step 2: Build echo environment (uses base)
+docker build -t echo-env:latest -f src/envs/echo_env/server/Dockerfile .
+# Step 3: Run echo environment
+docker run -p 8000:8000 echo-env:latest
+```
+## Updating the Base
+When dependencies need updating:
+1. Update `src/core/containers/images/Dockerfile`
+2. Rebuild base image
+3. Rebuild all environment images (they'll use new base)
+```bash
+# Update base
+docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
+# Rebuild environments (they automatically use new base)
+docker build -t echo-env:latest -f src/envs/echo_env/server/Dockerfile .
+```

src/core/containers/runtime/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Container runtime providers."""
+from .providers import ContainerProvider, KubernetesProvider, LocalDockerProvider
+__all__ = [
+    "ContainerProvider",
+    "LocalDockerProvider",
+    "KubernetesProvider",
+]

src/core/containers/runtime/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (389 Bytes). View file

src/core/containers/runtime/__pycache__/providers.cpython-311.pyc ADDED Viewed

Binary file (10.9 kB). View file

src/core/containers/runtime/providers.py ADDED Viewed

	@@ -0,0 +1,289 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Container provider abstractions for running environment servers.
+This module provides a pluggable architecture for different container providers
+(local Docker, Kubernetes, cloud providers, etc.) to be used with HTTPEnvClient.
+"""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Optional
+class ContainerProvider(ABC):
+    """
+    Abstract base class for container providers.
+    Providers implement this interface to support different container platforms:
+    - LocalDockerProvider: Runs containers on local Docker daemon
+    - KubernetesProvider: Runs containers in Kubernetes cluster
+    - FargateProvider: Runs containers on AWS Fargate
+    - CloudRunProvider: Runs containers on Google Cloud Run
+    The provider manages a single container lifecycle and provides the base URL
+    for connecting to it.
+    Example:
+        >>> provider = LocalDockerProvider()
+        >>> base_url = provider.start_container("echo-env:latest")
+        >>> print(base_url)  # http://localhost:8000
+        >>> # Use the environment via base_url
+        >>> provider.stop_container()
+    """
+    @abstractmethod
+    def start_container(
+        self,
+        image: str,
+        port: Optional[int] = None,
+        env_vars: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> str:
+        """
+        Start a container from the specified image.
+        Args:
+            image: Container image name (e.g., "echo-env:latest")
+            port: Port to expose (if None, provider chooses)
+            env_vars: Environment variables to pass to container
+            **kwargs: Provider-specific options
+        Returns:
+            Base URL to connect to the container (e.g., "http://localhost:8000")
+        Raises:
+            RuntimeError: If container fails to start
+        """
+        pass
+    @abstractmethod
+    def stop_container(self) -> None:
+        """
+        Stop and remove the running container.
+        This cleans up the container that was started by start_container().
+        """
+        pass
+    @abstractmethod
+    def wait_for_ready(self, base_url: str, timeout_s: float = 30.0) -> None:
+        """
+        Wait for the container to be ready to accept requests.
+        This typically polls the /health endpoint until it returns 200.
+        Args:
+            base_url: Base URL of the container
+            timeout_s: Maximum time to wait
+        Raises:
+            TimeoutError: If container doesn't become ready in time
+        """
+        pass
+class LocalDockerProvider(ContainerProvider):
+    """
+    Container provider for local Docker daemon.
+    This provider runs containers on the local machine using Docker.
+    Useful for development and testing.
+    Example:
+        >>> provider = LocalDockerProvider()
+        >>> base_url = provider.start_container("echo-env:latest")
+        >>> # Container running on http://localhost:<random-port>
+        >>> provider.stop_container()
+    """
+    def __init__(self):
+        """Initialize the local Docker provider."""
+        self._container_id: Optional[str] = None
+        self._container_name: Optional[str] = None
+        # Check if Docker is available
+        import subprocess
+        try:
+            subprocess.run(
+                ["docker", "version"],
+                check=True,
+                capture_output=True,
+                timeout=5,
+            )
+        except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
+            raise RuntimeError(
+                "Docker is not available. Please install Docker Desktop or Docker Engine."
+            )
+    def start_container(
+        self,
+        image: str,
+        port: Optional[int] = None,
+        env_vars: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> str:
+        """
+        Start a Docker container locally.
+        Args:
+            image: Docker image name
+            port: Port to expose (if None, finds available port)
+            env_vars: Environment variables for the container
+            **kwargs: Additional Docker run options
+        Returns:
+            Base URL to connect to the container
+        """
+        import subprocess
+        import time
+        # Find available port if not specified
+        if port is None:
+            port = self._find_available_port()
+        # Generate container name
+        self._container_name = self._generate_container_name(image)
+        # Build docker run command
+        cmd = [
+            "docker", "run",
+            "-d",  # Detached
+            "--name", self._container_name,
+            "-p", f"{port}:8000",  # Map port
+        ]
+        # Add environment variables
+        if env_vars:
+            for key, value in env_vars.items():
+                cmd.extend(["-e", f"{key}={value}"])
+        # Add image
+        cmd.append(image)
+        # Run container
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        self._container_id = result.stdout.strip()
+        # Wait a moment for container to start
+        time.sleep(1)
+        base_url = f"http://localhost:{port}"
+        return base_url
+    def stop_container(self) -> None:
+        """
+        Stop and remove the Docker container.
+        """
+        if self._container_id is None:
+            return
+        import subprocess
+        try:
+            # Stop container
+            subprocess.run(
+                ["docker", "stop", self._container_id],
+                capture_output=True,
+                check=True,
+                timeout=10,
+            )
+            # Remove container
+            subprocess.run(
+                ["docker", "rm", self._container_id],
+                capture_output=True,
+                check=True,
+                timeout=10,
+            )
+        except subprocess.CalledProcessError:
+            # Container might already be stopped/removed
+            pass
+        finally:
+            self._container_id = None
+            self._container_name = None
+    def wait_for_ready(self, base_url: str, timeout_s: float = 30.0) -> None:
+        """
+        Wait for container to be ready by polling /health endpoint.
+        Args:
+            base_url: Base URL of the container
+            timeout_s: Maximum time to wait
+        Raises:
+            TimeoutError: If container doesn't become ready
+        """
+        import time
+        import requests
+        start_time = time.time()
+        health_url = f"{base_url}/health"
+        while time.time() - start_time < timeout_s:
+            try:
+                response = requests.get(health_url, timeout=2.0)
+                if response.status_code == 200:
+                    return
+            except requests.RequestException:
+                pass
+            time.sleep(0.5)
+        raise TimeoutError(
+            f"Container at {base_url} did not become ready within {timeout_s}s"
+        )
+    def _find_available_port(self) -> int:
+        """
+        Find an available port on localhost.
+        Returns:
+            An available port number
+        """
+        import socket
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.bind(("", 0))
+            s.listen(1)
+            port = s.getsockname()[1]
+        return port
+    def _generate_container_name(self, image: str) -> str:
+        """
+        Generate a unique container name based on image name and timestamp.
+        Args:
+            image: Docker image name
+        Returns:
+            A unique container name
+        """
+        import time
+        clean_image = image.split("/")[-1].split(":")[0]
+        timestamp = int(time.time() * 1000)
+        return f"{clean_image}-{timestamp}"
+class KubernetesProvider(ContainerProvider):
+    """
+    Container provider for Kubernetes clusters.
+    This provider creates pods in a Kubernetes cluster and exposes them
+    via services or port-forwarding.
+    Example:
+        >>> provider = KubernetesProvider(namespace="envtorch-dev")
+        >>> base_url = provider.start_container("echo-env:latest")
+        >>> # Pod running in k8s, accessible via service or port-forward
+        >>> provider.stop_container()
+    """
+    pass

src/core/containers/test_local_docker_provider.py ADDED Viewed

	@@ -0,0 +1,258 @@

+#!/usr/bin/env python3
+"""
+End-to-end test for LocalDockerProvider.
+This script tests the complete flow:
+1. Start a container using LocalDockerProvider
+2. Wait for it to be ready
+3. Make HTTP requests to test the environment
+4. Clean up the container
+"""
+import sys
+from pathlib import Path
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+import requests
+from core.containers.runtime import LocalDockerProvider
+# TODO: Remove this test or make it a functional test sicne this will be tested in e2e test for echo env
+def test_local_docker_provider():
+    """Test LocalDockerProvider end-to-end."""
+    print("=" * 60)
+    print("LocalDockerProvider End-to-End Test")
+    print("=" * 60)
+    print()
+    provider = None
+    try:
+        # Step 1: Create provider
+        print("Step 1: Creating LocalDockerProvider...")
+        provider = LocalDockerProvider()
+        print("✓ Provider created\n")
+        # Step 2: Start container
+        print("Step 2: Starting echo-env container...")
+        base_url = provider.start_container("echo-env:latest")
+        print(f"✓ Container started at: {base_url}")
+        if provider._container_id:
+            print(f"  Container ID: {provider._container_id[:12]}...")
+        if provider._container_name:
+            print(f"  Container name: {provider._container_name}\n")
+        # Step 3: Wait for ready
+        print("Step 3: Waiting for container to be ready...")
+        provider.wait_for_ready(base_url, timeout_s=30.0)
+        print("✓ Container is ready!\n")
+        # Step 4: Test health endpoint
+        print("Step 4: Testing /health endpoint...")
+        response = requests.get(f"{base_url}/health")
+        print(f"  Status: {response.status_code}")
+        print(f"  Response: {response.json()}")
+        assert response.status_code == 200
+        assert response.json()["status"] == "healthy"
+        print("✓ Health check passed\n")
+        # Step 5: Test reset endpoint
+        print("Step 5: Testing /reset endpoint...")
+        response = requests.post(
+            f"{base_url}/reset",
+            json={},
+            headers={"Content-Type": "application/json"},
+        )
+        print(f"  Status: {response.status_code}")
+        data = response.json()
+        print(f"  Message: {data['observation']['echoed_message']}")
+        print(f"  Reward: {data['reward']}")
+        print(f"  Done: {data['done']}")
+        assert response.status_code == 200
+        assert data["observation"]["echoed_message"] == "Echo environment ready!"
+        print("✓ Reset test passed\n")
+        # Step 6: Test step endpoint
+        print("Step 6: Testing /step endpoint...")
+        response = requests.post(
+            f"{base_url}/step",
+            json={"action": {"message": "Hello from LocalDockerProvider!"}},
+            headers={"Content-Type": "application/json"},
+        )
+        print(f"  Status: {response.status_code}")
+        data = response.json()
+        print(f"  Echoed: {data['observation']['echoed_message']}")
+        print(f"  Length: {data['observation']['message_length']}")
+        print(f"  Reward: {data['reward']}")
+        assert response.status_code == 200
+        assert data["observation"]["echoed_message"] == "Hello from LocalDockerProvider!"
+        assert data["observation"]["message_length"] == 31
+        print("✓ Step test passed\n")
+        # Step 7: Test state endpoint
+        print("Step 7: Testing /state endpoint...")
+        response = requests.get(f"{base_url}/state")
+        print(f"  Status: {response.status_code}")
+        data = response.json()
+        print(f"  Episode ID: {data['episode_id']}")
+        print(f"  Step count: {data['step_count']}")
+        assert response.status_code == 200
+        assert data["step_count"] == 1  # One step from above
+        print("✓ State test passed\n")
+        # Step 8: Multiple steps
+        print("Step 8: Testing multiple steps...")
+        for i in range(3):
+            response = requests.post(
+                f"{base_url}/step",
+                json={"action": {"message": f"Message {i+1}"}},
+                headers={"Content-Type": "application/json"},
+            )
+            assert response.status_code == 200
+            print(f"  Step {i+1}: ✓")
+        # Check state updated
+        response = requests.get(f"{base_url}/state")
+        data = response.json()
+        assert data["step_count"] == 4  # 1 + 3 more steps
+        print(f"  Final step count: {data['step_count']}")
+        print("✓ Multiple steps test passed\n")
+        print("=" * 60)
+        print("✓ All tests passed!")
+        print("=" * 60)
+        print()
+        return True
+    except Exception as e:
+        print(f"\n❌ Test failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+    finally:
+        # Step 9: Cleanup
+        if provider is not None:
+            print("\nStep 9: Cleaning up container...")
+            try:
+                provider.stop_container()
+                print("✓ Container stopped and removed\n")
+            except Exception as e:
+                print(f"⚠️  Cleanup warning: {e}\n")
+def test_provider_with_custom_port():
+    """Test provider with custom port."""
+    print("=" * 60)
+    print("LocalDockerProvider with Custom Port Test")
+    print("=" * 60)
+    print()
+    provider = None
+    try:
+        provider = LocalDockerProvider()
+        print("Starting container on custom port 8123...")
+        base_url = provider.start_container("echo-env:latest", port=8123)
+        print(f"✓ Started at: {base_url}")
+        assert ":8123" in base_url
+        print("Waiting for ready...")
+        provider.wait_for_ready(base_url)
+        print("✓ Ready!")
+        print("Testing health...")
+        response = requests.get(f"{base_url}/health")
+        assert response.status_code == 200
+        print("✓ Health check passed")
+        print("\n✓ Custom port test passed!\n")
+        return True
+    except Exception as e:
+        print(f"\n❌ Test failed: {e}")
+        return False
+    finally:
+        if provider is not None:
+            provider.stop_container()
+            print("✓ Cleaned up\n")
+def test_provider_with_env_vars():
+    """Test provider with environment variables."""
+    print("=" * 60)
+    print("LocalDockerProvider with Environment Variables Test")
+    print("=" * 60)
+    print()
+    provider = None
+    try:
+        provider = LocalDockerProvider()
+        print("Starting container with environment variables...")
+        base_url = provider.start_container(
+            "echo-env:latest",
+            env_vars={"DEBUG": "true", "LOG_LEVEL": "info"}
+        )
+        print(f"✓ Started at: {base_url}")
+        print("Waiting for ready...")
+        provider.wait_for_ready(base_url)
+        print("✓ Ready!")
+        print("Testing health...")
+        response = requests.get(f"{base_url}/health")
+        assert response.status_code == 200
+        print("✓ Health check passed")
+        print("\n✓ Environment variables test passed!\n")
+        return True
+    except Exception as e:
+        print(f"\n❌ Test failed: {e}")
+        return False
+    finally:
+        if provider is not None:
+            provider.stop_container()
+            print("✓ Cleaned up\n")
+if __name__ == "__main__":
+    print()
+    print("🐳 LocalDockerProvider Test Suite")
+    print()
+    results = []
+    # Run basic test
+    results.append(("Basic End-to-End", test_local_docker_provider()))
+    # Run custom port test
+    results.append(("Custom Port", test_provider_with_custom_port()))
+    # Run environment variables test
+    results.append(("Environment Variables", test_provider_with_env_vars()))
+    # Summary
+    print("=" * 60)
+    print("Test Summary")
+    print("=" * 60)
+    for name, passed in results:
+        status = "✓ PASSED" if passed else "✗ FAILED"
+        print(f"{name:25} {status}")
+    print("=" * 60)
+    all_passed = all(result for _, result in results)
+    if all_passed:
+        print("\n🎉 All tests passed!")
+        exit(0)
+    else:
+        print("\n❌ Some tests failed")
+        exit(1)

src/core/env_server/__init__.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Core environment interfaces and types."""
+from .base_transforms import CompositeTransform, NullTransform
+from .http_server import HTTPEnvServer, create_app, create_fastapi_app
+from .interfaces import Environment, Message, ModelTokenizer, Transform
+from .types import Action, Observation, State
+from .web_interface import create_web_interface_app, WebInterfaceManager
+__all__ = [
+    # Core interfaces
+    "Environment",
+    "Transform",
+    "Message",
+    "ModelTokenizer",
+    # Types
+    "Action",
+    "Observation",
+    "State",
+    # Base transforms
+    "CompositeTransform",
+    "NullTransform",
+    # HTTP Server
+    "HTTPEnvServer",
+    "create_app",
+    "create_fastapi_app",
+    # Web Interface
+    "create_web_interface_app",
+    "WebInterfaceManager",
+]

src/core/env_server/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (898 Bytes). View file

src/core/env_server/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (940 Bytes). View file

src/core/env_server/__pycache__/base_transforms.cpython-311.pyc ADDED Viewed

Binary file (1.67 kB). View file

src/core/env_server/__pycache__/base_transforms.cpython-313.pyc ADDED Viewed

Binary file (1.57 kB). View file

src/core/env_server/__pycache__/http_server.cpython-311.pyc ADDED Viewed

Binary file (9.2 kB). View file

src/core/env_server/__pycache__/http_server.cpython-313.pyc ADDED Viewed

Binary file (7.14 kB). View file

src/core/env_server/__pycache__/interfaces.cpython-311.pyc ADDED Viewed

Binary file (5.22 kB). View file

src/core/env_server/__pycache__/interfaces.cpython-313.pyc ADDED Viewed

Binary file (4.68 kB). View file

src/core/env_server/__pycache__/types.cpython-311.pyc ADDED Viewed

Binary file (2.39 kB). View file

src/core/env_server/__pycache__/types.cpython-313.pyc ADDED Viewed

Binary file (2.1 kB). View file

src/core/env_server/__pycache__/web_interface.cpython-311.pyc ADDED Viewed

Binary file (29.9 kB). View file

src/core/env_server/base_transforms.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Base transform implementations for composing environment-specific transforms."""
+from .interfaces import Transform
+from .types import Observation
+class CompositeTransform(Transform):
+    """Combines multiple transforms into a single transform."""
+    def __init__(self, transforms: list[Transform]):
+        self.transforms = transforms
+    def __call__(self, observation: Observation) -> Observation:
+        for transform in self.transforms:
+            observation = transform(observation)
+        return observation
+class NullTransform(Transform):
+    """Default transform that passes through unchanged."""
+    def __call__(self, observation: Observation) -> Observation:
+        return observation

src/core/env_server/http_server.py ADDED Viewed

	@@ -0,0 +1,231 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+HTTP server wrapper for Environment instances.
+This module provides utilities to wrap any Environment subclass and expose it
+over HTTP endpoints that HTTPEnvClient can consume.
+"""
+from __future__ import annotations
+import os
+from dataclasses import asdict
+from typing import Any, Dict, Type
+from .interfaces import Environment
+from .types import Action, Observation
+from fastapi import Body, FastAPI
+class HTTPEnvServer:
+    """
+    HTTP server wrapper for Environment instances.
+    This class wraps an Environment and exposes its reset(), step(), and state
+    methods as HTTP endpoints compatible with HTTPEnvClient.
+    The server expects:
+    - Action deserialization: Converts JSON dict to Action subclass
+    - Observation serialization: Converts Observation subclass to JSON dict
+    Example:
+        >>> from core.env_server import HTTPEnvServer
+        >>> from envs.coding_env.server import CodeExecutionEnvironment
+        >>>
+        >>> env = CodeExecutionEnvironment()
+        >>> server = HTTPEnvServer(env)
+        >>>
+        >>> # Register routes with FastAPI
+        >>> from fastapi import FastAPI
+        >>> app = FastAPI()
+        >>> server.register_routes(app)
+    """
+    def __init__(
+        self,
+        env: Environment,
+        action_cls: Type[Action],
+        observation_cls: Type[Observation],
+    ):
+        """
+        Initialize HTTP server wrapper.
+        Args:
+            env: The Environment instance to wrap
+            action_cls: The Action subclass this environment expects
+            observation_cls: The Observation subclass this environment returns
+        """
+        self.env = env
+        self.action_cls = action_cls
+        self.observation_cls = observation_cls
+    def register_routes(self, app: Any) -> None:
+        """
+        Register HTTP routes on a FastAPI application.
+        Args:
+            app: FastAPI application instance
+        """
+        if not isinstance(app, FastAPI):
+            raise TypeError("app must be a FastAPI instance")
+        @app.post("/reset")
+        async def reset(request: Dict[str, Any] = Body(default={})) -> Dict[str, Any]:
+            """Reset endpoint - returns initial observation."""
+            # TODO: Handle seed, episode_id from request if provided
+            observation = self.env.reset()
+            return self._serialize_observation(observation)
+        @app.post("/step")
+        async def step(request: Dict[str, Any]) -> Dict[str, Any]:
+            """Step endpoint - executes action and returns observation."""
+            action_data = request.get("action", {})
+            # TODO: Handle timeout_s, request_id, episode_id from request if provided
+            # Deserialize action
+            action = self._deserialize_action(action_data)
+            # Execute step
+            observation = self.env.step(action)
+            # Return serialized observation
+            return self._serialize_observation(observation)
+        @app.get("/state")
+        async def get_state() -> Dict[str, Any]:
+            """State endpoint - returns current environment state."""
+            state = self.env.state
+            return asdict(state)
+        @app.get("/health")
+        async def health() -> Dict[str, str]:
+            """Health check endpoint."""
+            return {"status": "healthy"}
+    def _deserialize_action(self, action_data: Dict[str, Any]) -> Action:
+        """
+        Convert JSON dict to Action instance.
+        Args:
+            action_data: Dictionary containing action data
+        Returns:
+            Action instance
+        Note:
+            This is a simple implementation. Subclasses may need to override
+            for more complex deserialization logic.
+        """
+        # Remove metadata if present (it will be set via kw_only field)
+        metadata = action_data.pop("metadata", {})
+        action = self.action_cls(**action_data)
+        action.metadata = metadata
+        return action
+    def _serialize_observation(self, observation: Observation) -> Dict[str, Any]:
+        """
+        Convert Observation instance to JSON-compatible dict.
+        Args:
+            observation: Observation instance
+        Returns:
+            Dictionary compatible with HTTPEnvClient._parse_result()
+        The format matches what HTTPEnvClient expects:
+        {
+            "observation": {...},  # Observation fields
+            "reward": float | None,
+            "done": bool,
+        }
+        """
+        obs_dict = asdict(observation)
+        # Extract reward and done (these are part of StepResult on client side)
+        reward = obs_dict.pop("reward", None)
+        done = obs_dict.pop("done", False)
+        obs_dict.pop("metadata", None)  # Remove metadata from observation
+        # Return in HTTPEnvClient expected format
+        return {
+            "observation": obs_dict,
+            "reward": reward,
+            "done": done,
+        }
+def create_app(
+    env: Environment,
+    action_cls: Type[Action],
+    observation_cls: Type[Observation],
+) -> Any:
+    """
+    Create a FastAPI application with web interface enabled for Hugging Face deployments.
+    This function checks for the ENABLE_WEB_INTERFACE environment variable to determine
+    whether to enable the web interface.
+    Args:
+        env: The Environment instance to serve
+        action_cls: The Action subclass this environment expects
+        observation_cls: The Observation subclass this environment returns
+    Returns:
+        FastAPI application instance with or without web interface based on environment
+    """
+    # Check if web interface should be enabled
+    # This can be controlled via environment variable or build argument
+    enable_web = (
+        os.getenv("ENABLE_WEB_INTERFACE", "false").lower() in ("true", "1", "yes")
+    )
+    if enable_web:
+        # Import web interface only when needed
+        from .web_interface import create_web_interface_app
+        return create_web_interface_app(env, action_cls, observation_cls)
+    else:
+        # Use standard FastAPI app without web interface
+        return create_fastapi_app(env, action_cls, observation_cls)
+def create_fastapi_app(
+    env: Environment,
+    action_cls: Type[Action],
+    observation_cls: Type[Observation],
+) -> Any:
+    """
+    Create a FastAPI application with routes for the given environment.
+    Args:
+        env: The Environment instance to serve
+        action_cls: The Action subclass this environment expects
+        observation_cls: The Observation subclass this environment returns
+    Returns:
+        FastAPI application instance with routes registered
+    Example:
+        >>> from envs.coding_env.server import CodeExecutionEnvironment
+        >>> from envs.coding_env.models import CodeAction, CodeObservation
+        >>>
+        >>> env = CodeExecutionEnvironment()
+        >>> app = create_fastapi_app(env, CodeAction, CodeObservation)
+        >>>
+        >>> # Run with: uvicorn module:app --host 0.0.0.0 --port 8000
+    """
+    try:
+        from fastapi import FastAPI
+    except ImportError:
+        raise ImportError(
+            "FastAPI is required. Install with: pip install fastapi uvicorn"
+        )
+    app = FastAPI(title="Environment HTTP Server")
+    server = HTTPEnvServer(env, action_cls, observation_cls)
+    server.register_routes(app)
+    return app

src/core/env_server/interfaces.py ADDED Viewed

	@@ -0,0 +1,118 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from abc import ABC, abstractmethod
+from typing import Any, Protocol, TypedDict
+from .types import Action, Observation, State
+class Message(TypedDict):
+    """A message in a conversation.
+    Compatible with Huggingface chat template format.
+    """
+    role: str
+    content: str
+class ModelTokenizer(Protocol):
+    """Protocol for tokenizers that support chat templates.
+    This protocol defines the interface that tokenizers must implement
+    to work with chat-based environments. It's compatible with
+    Huggingface transformers tokenizers.
+    """
+    def apply_chat_template(
+        self,
+        conversation: list[Message],
+        tokenize: bool = True,
+        return_tensors: str | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Apply a chat template to format and optionally tokenize a conversation.
+        Args:
+            conversation: List of message dictionaries with 'role' and 'content'
+            tokenize: Whether to tokenize the output
+            return_tensors: Format for returned tensors ('pt' for PyTorch)
+            **kwargs: Additional arguments
+        Returns:
+            Formatted and optionally tokenized conversation
+        """
+        ...
+    def decode(
+        self, token_ids: Any, skip_special_tokens: bool = False, **kwargs: Any
+    ) -> str:
+        """Decode token IDs back to text.
+        Args:
+            token_ids: Token IDs to decode
+            skip_special_tokens: Whether to skip special tokens in output
+            **kwargs: Additional arguments
+        Returns:
+            Decoded text string
+        """
+        ...
+class Transform(ABC):
+    """Transform observations to add rewards, metrics, or other modifications.
+    Transforms follow the TorchRL pattern where they take an observation
+    and return a (potentially modified) observation. This allows for
+    flexible reward computation and observation augmentation.
+    """
+    @abstractmethod
+    def __call__(self, observation: Observation) -> Observation:
+        """Transform an observation.
+        Args:
+            observation: The input observation
+        Returns:
+            The transformed observation
+        """
+        pass
+class Environment(ABC):
+    """Base class for all environment servers following Gym/Gymnasium API.
+    Args:
+        transform: Optional transform to apply to observations
+    """
+    def __init__(self, transform: Transform | None = None):
+        self.transform = transform
+    @abstractmethod
+    def reset(self) -> Observation:
+        """Reset the environment and return initial observation."""
+        pass
+    @abstractmethod
+    def step(self, action: Action) -> Observation:
+        """Take a step in the environment."""
+        pass
+    @property
+    @abstractmethod
+    def state(self) -> State:
+        """Get the current environment state."""
+        pass
+    def _apply_transform(self, observation: Observation) -> Observation:
+        """Apply transform if one is provided."""
+        if self.transform is not None:
+            return self.transform(observation)
+        return observation

src/core/env_server/types.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union
+# Type aliases
+Scalar = Union[int, float, bool]
+@dataclass(kw_only=True)
+class Action:
+    """Base class for all environment actions."""
+    metadata: Dict[str, Any] = field(default_factory=dict)
+@dataclass(kw_only=True)
+class Observation:
+    """Base class for all environment observations."""
+    done: bool = False
+    reward: Union[bool, int, float, None] = None
+    metadata: Dict[str, Any] = field(default_factory=dict)
+@dataclass
+class State:
+    """Base class for environment state."""
+    episode_id: Optional[str] = None
+    step_count: int = 0
+@dataclass
+class CodeExecResult:
+    """Result of code execution containing stdout, stderr, and exit code."""
+    stdout: str
+    stderr: str
+    exit_code: int

src/core/env_server/web_interface.py ADDED Viewed

	@@ -0,0 +1,764 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Web interface for OpenEnv environments.
+This module provides a web-based interface for interacting with OpenEnv environments,
+including a two-pane layout for HumanAgent interaction and state observation.
+"""
+from __future__ import annotations
+import json
+import time
+from dataclasses import asdict, dataclass
+from typing import Any, Dict, List, Optional, Type
+from datetime import datetime
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request
+from fastapi.responses import HTMLResponse, FileResponse
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel
+from .interfaces import Environment
+from .types import Action, Observation, State
+@dataclass
+class ActionLog:
+    """Log entry for an action taken."""
+    timestamp: str
+    action: Dict[str, Any]
+    observation: Dict[str, Any]
+    reward: Optional[float]
+    done: bool
+    step_count: int
+@dataclass
+class EpisodeState:
+    """Current episode state for the web interface."""
+    episode_id: Optional[str]
+    step_count: int
+    current_observation: Optional[Dict[str, Any]]
+    action_logs: List[ActionLog]
+    is_reset: bool = True
+class WebInterfaceManager:
+    """Manages the web interface for an environment."""
+    def __init__(
+        self,
+        env: Environment,
+        action_cls: Type[Action],
+        observation_cls: Type[Observation],
+    ):
+        self.env = env
+        self.action_cls = action_cls
+        self.observation_cls = observation_cls
+        self.episode_state = EpisodeState(
+            episode_id=None,
+            step_count=0,
+            current_observation=None,
+            action_logs=[]
+        )
+        self.connected_clients: List[WebSocket] = []
+    async def connect_websocket(self, websocket: WebSocket):
+        """Connect a new WebSocket client."""
+        await websocket.accept()
+        self.connected_clients.append(websocket)
+        # Send current state to the new client
+        await self._send_state_update()
+    async def disconnect_websocket(self, websocket: WebSocket):
+        """Disconnect a WebSocket client."""
+        if websocket in self.connected_clients:
+            self.connected_clients.remove(websocket)
+    async def _send_state_update(self):
+        """Send current state to all connected clients."""
+        if not self.connected_clients:
+            return
+        state_data = {
+            "type": "state_update",
+            "episode_state": asdict(self.episode_state)
+        }
+        # Send to all connected clients
+        disconnected_clients = []
+        for client in self.connected_clients:
+            try:
+                await client.send_text(json.dumps(state_data))
+            except:
+                disconnected_clients.append(client)
+        # Remove disconnected clients
+        for client in disconnected_clients:
+            self.connected_clients.remove(client)
+    async def reset_environment(self) -> Dict[str, Any]:
+        """Reset the environment and update state."""
+        observation = self.env.reset()
+        state = self.env.state
+        # Update episode state
+        self.episode_state.episode_id = state.episode_id
+        self.episode_state.step_count = 0
+        self.episode_state.current_observation = asdict(observation)
+        self.episode_state.action_logs = []
+        self.episode_state.is_reset = True
+        # Send state update
+        await self._send_state_update()
+        return {
+            "observation": asdict(observation),
+            "reward": observation.reward,
+            "done": observation.done,
+        }
+    async def step_environment(self, action_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Execute a step in the environment and update state."""
+        # Deserialize action
+        action = self._deserialize_action(action_data)
+        # Execute step
+        observation = self.env.step(action)
+        state = self.env.state
+        # Create action log
+        action_log = ActionLog(
+            timestamp=datetime.now().isoformat(),
+            action=asdict(action),
+            observation=asdict(observation),
+            reward=observation.reward,
+            done=observation.done,
+            step_count=state.step_count
+        )
+        # Update episode state
+        self.episode_state.episode_id = state.episode_id
+        self.episode_state.step_count = state.step_count
+        self.episode_state.current_observation = asdict(observation)
+        self.episode_state.action_logs.append(action_log)
+        self.episode_state.is_reset = False
+        # Send state update
+        await self._send_state_update()
+        return {
+            "observation": asdict(observation),
+            "reward": observation.reward,
+            "done": observation.done,
+        }
+    def get_state(self) -> Dict[str, Any]:
+        """Get current environment state."""
+        state = self.env.state
+        return asdict(state)
+    def _deserialize_action(self, action_data: Dict[str, Any]) -> Action:
+        """Convert JSON dict to Action instance."""
+        metadata = action_data.pop("metadata", {})
+        action = self.action_cls(**action_data)
+        action.metadata = metadata
+        return action
+def create_web_interface_app(
+    env: Environment,
+    action_cls: Type[Action],
+    observation_cls: Type[Observation],
+) -> FastAPI:
+    """
+    Create a FastAPI application with web interface for the given environment.
+    Args:
+        env: The Environment instance to serve
+        action_cls: The Action subclass this environment expects
+        observation_cls: The Observation subclass this environment returns
+    Returns:
+        FastAPI application instance with web interface
+    """
+    from .http_server import create_fastapi_app
+    # Create the base environment app
+    app = create_fastapi_app(env, action_cls, observation_cls)
+    # Create web interface manager
+    web_manager = WebInterfaceManager(env, action_cls, observation_cls)
+    # Add web interface routes
+    @app.get("/web", response_class=HTMLResponse)
+    async def web_interface():
+        """Serve the web interface."""
+        return get_web_interface_html(action_cls)
+    @app.websocket("/ws")
+    async def websocket_endpoint(websocket: WebSocket):
+        """WebSocket endpoint for real-time updates."""
+        await web_manager.connect_websocket(websocket)
+        try:
+            while True:
+                # Keep connection alive
+                await websocket.receive_text()
+        except WebSocketDisconnect:
+            await web_manager.disconnect_websocket(websocket)
+    @app.post("/web/reset")
+    async def web_reset():
+        """Reset endpoint for web interface."""
+        return await web_manager.reset_environment()
+    @app.post("/web/step")
+    async def web_step(request: Dict[str, Any]):
+        """Step endpoint for web interface."""
+        action_data = request.get("action", {})
+        return await web_manager.step_environment(action_data)
+    @app.get("/web/state")
+    async def web_state():
+        """State endpoint for web interface."""
+        return web_manager.get_state()
+    return app
+def get_web_interface_html(action_cls: Type[Action]) -> str:
+    """Generate the HTML for the web interface."""
+    # Get action fields for dynamic form generation
+    action_fields = []
+    if hasattr(action_cls, '__dataclass_fields__'):
+        for field_name, field_info in action_cls.__dataclass_fields__.items():
+            if field_name != 'metadata':
+                field_type = field_info.type
+                if field_type == str:
+                    input_type = "text"
+                elif field_type == int:
+                    input_type = "number"
+                elif field_type == float:
+                    input_type = "number"
+                elif field_type == bool:
+                    input_type = "checkbox"
+                else:
+                    input_type = "text"
+                action_fields.append({
+                    'name': field_name,
+                    'type': input_type,
+                    'required': field_info.default is field_info.default_factory
+                })
+    return f"""
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>OpenEnv Web Interface</title>
+    <style>
+        * {{
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }}
+        body {{
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+            background-color: #f5f5f5;
+            height: 100vh;
+            overflow: hidden;
+        }}
+        .container {{
+            display: flex;
+            height: 100vh;
+        }}
+        .left-pane {{
+            width: 50%;
+            background: white;
+            border-right: 1px solid #e0e0e0;
+            display: flex;
+            flex-direction: column;
+        }}
+        .right-pane {{
+            width: 50%;
+            background: #fafafa;
+            display: flex;
+            flex-direction: column;
+        }}
+        .pane-header {{
+            padding: 20px;
+            border-bottom: 1px solid #e0e0e0;
+            background: #f8f9fa;
+            font-weight: 600;
+            font-size: 16px;
+        }}
+        .pane-content {{
+            flex: 1;
+            padding: 20px;
+            overflow-y: auto;
+        }}
+        .action-form {{
+            background: white;
+            border: 1px solid #e0e0e0;
+            border-radius: 8px;
+            padding: 20px;
+            margin-bottom: 20px;
+        }}
+        .form-group {{
+            margin-bottom: 15px;
+        }}
+        .form-group label {{
+            display: block;
+            margin-bottom: 5px;
+            font-weight: 500;
+            color: #333;
+        }}
+        .form-group input, .form-group textarea {{
+            width: 100%;
+            padding: 8px 12px;
+            border: 1px solid #ddd;
+            border-radius: 4px;
+            font-size: 14px;
+        }}
+        .form-group input:focus, .form-group textarea:focus {{
+            outline: none;
+            border-color: #007bff;
+            box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.25);
+        }}
+        .btn {{
+            background: #007bff;
+            color: white;
+            border: none;
+            padding: 10px 20px;
+            border-radius: 4px;
+            cursor: pointer;
+            font-size: 14px;
+            margin-right: 10px;
+            margin-bottom: 10px;
+        }}
+        .btn:hover {{
+            background: #0056b3;
+        }}
+        .btn:disabled {{
+            background: #6c757d;
+            cursor: not-allowed;
+        }}
+        .btn-secondary {{
+            background: #6c757d;
+        }}
+        .btn-secondary:hover {{
+            background: #545b62;
+        }}
+        .state-display {{
+            background: white;
+            border: 1px solid #e0e0e0;
+            border-radius: 8px;
+            padding: 15px;
+            margin-bottom: 20px;
+        }}
+        .state-item {{
+            margin-bottom: 8px;
+        }}
+        .state-label {{
+            font-weight: 500;
+            color: #666;
+        }}
+        .state-value {{
+            color: #333;
+            font-family: monospace;
+        }}
+        .logs-container {{
+            background: white;
+            border: 1px solid #e0e0e0;
+            border-radius: 8px;
+            padding: 15px;
+            max-height: 400px;
+            overflow-y: auto;
+        }}
+        .log-entry {{
+            border-bottom: 1px solid #f0f0f0;
+            padding: 10px 0;
+        }}
+        .log-entry:last-child {{
+            border-bottom: none;
+        }}
+        .log-timestamp {{
+            font-size: 12px;
+            color: #666;
+            margin-bottom: 5px;
+        }}
+        .log-action {{
+            background: #e3f2fd;
+            padding: 8px;
+            border-radius: 4px;
+            margin-bottom: 5px;
+            font-family: monospace;
+            font-size: 12px;
+        }}
+        .log-observation {{
+            background: #f3e5f5;
+            padding: 8px;
+            border-radius: 4px;
+            font-family: monospace;
+            font-size: 12px;
+        }}
+        .log-reward {{
+            font-weight: 600;
+            color: #28a745;
+        }}
+        .log-done {{
+            font-weight: 600;
+            color: #dc3545;
+        }}
+        .status-indicator {{
+            display: inline-block;
+            width: 8px;
+            height: 8px;
+            border-radius: 50%;
+            margin-right: 8px;
+        }}
+        .status-connected {{
+            background: #28a745;
+        }}
+        .status-disconnected {{
+            background: #dc3545;
+        }}
+        .json-display {{
+            background: #f8f9fa;
+            border: 1px solid #e9ecef;
+            border-radius: 4px;
+            padding: 10px;
+            font-family: monospace;
+            font-size: 12px;
+            white-space: pre-wrap;
+            max-height: 200px;
+            overflow-y: auto;
+        }}
+    </style>
+</head>
+<body>
+    <div class="container">
+        <!-- Left Pane: HumanAgent Interface -->
+        <div class="left-pane">
+            <div class="pane-header">
+                <span class="status-indicator status-disconnected" id="connection-status"></span>
+                HumanAgent Interface
+            </div>
+            <div class="pane-content">
+                <!-- Action Form -->
+                <div class="action-form">
+                    <h3>Take Action</h3>
+                    <form id="action-form">
+                        {_generate_action_form_fields(action_fields)}
+                        <button type="submit" class="btn" id="step-btn">Step</button>
+                    </form>
+                </div>
+                <!-- Control Buttons -->
+                <div style="margin-bottom: 20px;">
+                    <button class="btn btn-secondary" id="reset-btn">Reset Environment</button>
+                    <button class="btn btn-secondary" id="state-btn">Get State</button>
+                </div>
+                <!-- Current State Display -->
+                <div class="state-display">
+                    <h3>Current State</h3>
+                    <div id="current-state">
+                        <div class="state-item">
+                            <span class="state-label">Status:</span>
+                            <span class="state-value" id="env-status">Not initialized</span>
+                        </div>
+                        <div class="state-item">
+                            <span class="state-label">Episode ID:</span>
+                            <span class="state-value" id="episode-id">-</span>
+                        </div>
+                        <div class="state-item">
+                            <span class="state-label">Step Count:</span>
+                            <span class="state-value" id="step-count">0</span>
+                        </div>
+                    </div>
+                </div>
+            </div>
+        </div>
+        <!-- Right Pane: State Observer -->
+        <div class="right-pane">
+            <div class="pane-header">
+                State Observer
+            </div>
+            <div class="pane-content">
+                <!-- Current Observation -->
+                <div class="state-display">
+                    <h3>Current Observation</h3>
+                    <div id="current-observation" class="json-display">
+                        No observation yet
+                    </div>
+                </div>
+                <!-- Action Logs -->
+                <div class="logs-container">
+                    <h3>Action History</h3>
+                    <div id="action-logs">
+                        No actions taken yet
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+    <script>
+        class OpenEnvWebInterface {{
+            constructor() {{
+                this.ws = null;
+                this.isConnected = false;
+                this.init();
+            }}
+            init() {{
+                this.connectWebSocket();
+                this.setupEventListeners();
+            }}
+            connectWebSocket() {{
+                const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
+                const wsUrl = `${{protocol}}//${{window.location.host}}/ws`;
+                this.ws = new WebSocket(wsUrl);
+                this.ws.onopen = () => {{
+                    this.isConnected = true;
+                    this.updateConnectionStatus(true);
+                    console.log('WebSocket connected');
+                }};
+                this.ws.onmessage = (event) => {{
+                    const data = JSON.parse(event.data);
+                    if (data.type === 'state_update') {{
+                        this.updateUI(data.episode_state);
+                    }}
+                }};
+                this.ws.onclose = () => {{
+                    this.isConnected = false;
+                    this.updateConnectionStatus(false);
+                    console.log('WebSocket disconnected');
+                    // Attempt to reconnect after 3 seconds
+                    setTimeout(() => this.connectWebSocket(), 3000);
+                }};
+                this.ws.onerror = (error) => {{
+                    console.error('WebSocket error:', error);
+                }};
+            }}
+            setupEventListeners() {{
+                // Action form submission
+                document.getElementById('action-form').addEventListener('submit', (e) => {{
+                    e.preventDefault();
+                    this.submitAction();
+                }});
+                // Reset button
+                document.getElementById('reset-btn').addEventListener('click', () => {{
+                    this.resetEnvironment();
+                }});
+                // State button
+                document.getElementById('state-btn').addEventListener('click', () => {{
+                    this.getState();
+                }});
+            }}
+            async submitAction() {{
+                const formData = new FormData(document.getElementById('action-form'));
+                const action = {{}};
+                // Collect form data
+                for (const [key, value] of formData.entries()) {{
+                    if (value !== '') {{
+                        action[key] = value;
+                    }}
+                }}
+                try {{
+                    const response = await fetch('/web/step', {{
+                        method: 'POST',
+                        headers: {{ 'Content-Type': 'application/json' }},
+                        body: JSON.stringify({{ action }})
+                    }});
+                    if (!response.ok) {{
+                        throw new Error(`HTTP error! status: ${{response.status}}`);
+                    }}
+                    const result = await response.json();
+                    console.log('Step result:', result);
+                }} catch (error) {{
+                    console.error('Error submitting action:', error);
+                    alert('Error submitting action: ' + error.message);
+                }}
+            }}
+            async resetEnvironment() {{
+                try {{
+                    const response = await fetch('/web/reset', {{
+                        method: 'POST',
+                        headers: {{ 'Content-Type': 'application/json' }}
+                    }});
+                    if (!response.ok) {{
+                        throw new Error(`HTTP error! status: ${{response.status}}`);
+                    }}
+                    const result = await response.json();
+                    console.log('Reset result:', result);
+                }} catch (error) {{
+                    console.error('Error resetting environment:', error);
+                    alert('Error resetting environment: ' + error.message);
+                }}
+            }}
+            async getState() {{
+                try {{
+                    const response = await fetch('/web/state');
+                    const state = await response.json();
+                    console.log('Current state:', state);
+                    alert('Current state: ' + JSON.stringify(state, null, 2));
+                }} catch (error) {{
+                    console.error('Error getting state:', error);
+                    alert('Error getting state: ' + error.message);
+                }}
+            }}
+            updateConnectionStatus(connected) {{
+                const indicator = document.getElementById('connection-status');
+                if (connected) {{
+                    indicator.className = 'status-indicator status-connected';
+                }} else {{
+                    indicator.className = 'status-indicator status-disconnected';
+                }}
+            }}
+            updateUI(episodeState) {{
+                // Update current state
+                document.getElementById('env-status').textContent =
+                    episodeState.is_reset ? 'Reset' : 'Running';
+                document.getElementById('episode-id').textContent =
+                    episodeState.episode_id || '-';
+                document.getElementById('step-count').textContent =
+                    episodeState.step_count.toString();
+                // Update current observation
+                const observationDiv = document.getElementById('current-observation');
+                if (episodeState.current_observation) {{
+                    observationDiv.textContent = JSON.stringify(
+                        episodeState.current_observation, null, 2
+                    );
+                }} else {{
+                    observationDiv.textContent = 'No observation yet';
+                }}
+                // Update action logs
+                const logsDiv = document.getElementById('action-logs');
+                if (episodeState.action_logs.length === 0) {{
+                    logsDiv.innerHTML = 'No actions taken yet';
+                }} else {{
+                    logsDiv.innerHTML = episodeState.action_logs.map(log => `
+                        <div class="log-entry">
+                            <div class="log-timestamp">${{log.timestamp}} (Step ${{log.step_count}})</div>
+                            <div class="log-action">Action: ${{JSON.stringify(log.action, null, 2)}}</div>
+                            <div class="log-observation">Observation: ${{JSON.stringify(log.observation, null, 2)}}</div>
+                            <div>
+                                <span class="log-reward">Reward: ${{log.reward !== null ? log.reward : 'None'}}</span>
+                                ${{log.done ? '<span class="log-done">DONE</span>' : ''}}
+                            </div>
+                        </div>
+                    `).join('');
+                }}
+            }}
+        }}
+        // Initialize the web interface when the page loads
+        document.addEventListener('DOMContentLoaded', () => {{
+            new OpenEnvWebInterface();
+        }});
+    </script>
+</body>
+</html>
+    """.replace('{_generate_action_form_fields(action_fields)}', _generate_action_form_fields(action_fields))
+def _generate_action_form_fields(action_fields: List[Dict[str, Any]]) -> str:
+    """Generate HTML form fields for action input."""
+    if not action_fields:
+        return '<p>No action fields available</p>'
+    fields_html = []
+    for field in action_fields:
+        if field['type'] == 'checkbox':
+            fields_html.append(f'''
+                <div class="form-group">
+                    <label>
+                        <input type="checkbox" name="{field['name']}" value="true">
+                        {field['name']}
+                    </label>
+                </div>
+            ''')
+        elif field['type'] == 'text' and 'message' in field['name'].lower():
+            fields_html.append(f'''
+                <div class="form-group">
+                    <label for="{field['name']}">{field['name']}:</label>
+                    <textarea name="{field['name']}" id="{field['name']}" rows="3" placeholder="Enter {field['name']}..."></textarea>
+                </div>
+            ''')
+        else:
+            fields_html.append(f'''
+                <div class="form-group">
+                    <label for="{field['name']}">{field['name']}:</label>
+                    <input type="{field['type']}" name="{field['name']}" id="{field['name']}" placeholder="Enter {field['name']}..." {"required" if field['required'] else ""}>
+                </div>
+            ''')
+    return '\n'.join(fields_html)

src/core/http_env_client.py ADDED Viewed

	@@ -0,0 +1,175 @@

+"""
+core/runner_env.py
+Minimal HTTP-based environment client.
+- Talks to a single env worker exposing: POST /reset, POST /step
+Future hooks (commented below) for:
+- episode_id, seed on reset
+- request_id on step
+- custom headers (auth/trace)
+"""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any, Dict, Generic, Optional, Type, TypeVar
+from .containers.runtime import LocalDockerProvider
+import requests
+from .types import StepResult
+if TYPE_CHECKING:
+    from .containers.runtime import ContainerProvider
+ActT = TypeVar("ActT")
+ObsT = TypeVar("ObsT")
+EnvClientT = TypeVar("EnvClientT", bound="HTTPEnvClient")
+class HTTPEnvClient(ABC, Generic[ActT, ObsT]):
+    def __init__(
+        self,
+        base_url: str,
+        request_timeout_s: float = 15.0,
+        default_headers: Optional[Dict[str, str]] = None,
+        provider: Optional["ContainerProvider"] = None,
+    ):
+        self._base = base_url.rstrip("/")
+        self._timeout = float(request_timeout_s)
+        self._http = requests.Session()
+        self._headers = default_headers or {}
+        self._provider = provider
+    @classmethod
+    def from_docker_image(
+        cls: Type[EnvClientT],
+        image: str,
+        provider: Optional["ContainerProvider"] = None,
+    ) -> EnvClientT:
+        """
+        Create an environment client by spinning up a Docker container locally.
+        This is a development utility that:
+        1. Starts a Docker container from the specified image
+        2. Waits for the server to be ready
+        3. Creates and returns a client instance connected to the container
+        Note: The container lifecycle management is left to the user or higher-level
+        orchestration. The container will keep running until manually stopped.
+        Args:
+            image: Docker image name to run (e.g., "echo-env:latest")
+            provider: Container provider to use (defaults to LocalDockerProvider)
+        Returns:
+            An instance of the client class connected to the running container
+        Example:
+            >>> from envs.coding_env.client import CodingEnv
+            >>> from envs.coding_env.models import CodeAction
+            >>>
+            >>> # Create environment from image
+            >>> env = CodingEnv.from_docker_image("coding-env:latest")
+            >>>
+            >>> # Use the environment
+            >>> result = env.reset()
+            >>> print(result.observation)
+            >>>
+            >>> step_result = env.step(CodeAction(code="print('hello')"))
+            >>> print(step_result.observation.stdout)
+            >>>
+            >>> # Cleanup (optional)
+            >>> env.close()
+        """
+        # Use default provider if none provided
+        if provider is None:
+            provider = LocalDockerProvider()
+        # 1. Start container
+        base_url = provider.start_container(image)
+        # 2. Wait for server to be ready
+        provider.wait_for_ready(base_url)
+        # 3. Create and return client instance with provider reference
+        return cls(base_url=base_url, provider=provider)
+    @abstractmethod
+    def _step_payload(self, action: ActT) -> dict:
+        """Convert an Action object to the JSON body expected by the env server."""
+        raise NotImplementedError
+    @abstractmethod
+    def _parse_result(self, payload: dict) -> StepResult[ObsT]:
+        """Convert a JSON response from the env server to StepResult[ObsT]."""
+        raise NotImplementedError
+    @abstractmethod
+    def _parse_state(self, payload: dict) -> Any:
+        """Convert a JSON response from the state endpoint to a State object."""
+        raise NotImplementedError
+    # ---------- Environment Server Interface Methods ----------
+    def reset(self) -> StepResult[ObsT]:
+        body: Dict[str, Any] = {}
+        # TODO: later:
+        # body["seed"] = seed
+        # body["episode_id"] = episode_id
+        r = self._http.post(
+            f"{self._base}/reset",
+            json=body,
+            headers=self._headers,
+            timeout=self._timeout,
+        )
+        r.raise_for_status()
+        return self._parse_result(r.json())
+    def step(self, action: ActT) -> StepResult[ObsT]:
+        body: Dict[str, Any] = {
+            "action": self._step_payload(action),
+            "timeout_s": int(self._timeout),
+        }
+        # TODO: later:
+        # body["request_id"] = str(uuid.uuid4())
+        # body["episode_id"] = current_episode_id
+        r = self._http.post(
+            f"{self._base}/step",
+            json=body,
+            headers=self._headers,
+            timeout=self._timeout,
+        )
+        r.raise_for_status()
+        return self._parse_result(r.json())
+    def state(self) -> Any:
+        """
+        Get the current environment state from the server.
+        Returns:
+            State object with environment state information (e.g., episode_id, step_count)
+        Example:
+            >>> client = EchoEnv.from_docker_image("echo-env:latest")
+            >>> result = client.reset()
+            >>> state = client.state()
+            >>> print(state.episode_id)
+            >>> print(state.step_count)
+        """
+        r = self._http.get(
+            f"{self._base}/state",
+            headers=self._headers,
+            timeout=self._timeout,
+        )
+        r.raise_for_status()
+        return self._parse_state(r.json())
+    def close(self) -> None:
+        """
+        Close the environment and clean up resources.
+        If this client was created via from_docker_image(), this will stop
+        and remove the associated container.
+        """
+        if self._provider is not None:
+            self._provider.stop_container()

src/core/tools/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Core tools for code execution and other utilities."""
+from .local_python_executor import PyExecutor
+__all__ = ["PyExecutor"]

src/core/tools/local_python_executor.py ADDED Viewed

	@@ -0,0 +1,105 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Local Python Executor.
+This module provides functionality for executing Python code locally by wrapping
+the smolagents LocalPythonExecutor.
+"""
+from smolagents import LocalPythonExecutor
+from core.env_server.types import CodeExecResult
+class PyExecutor:
+    """
+    Wrapper around smolagents LocalPythonExecutor for executing Python code.
+    This class provides a simple interface to execute Python code in a subprocess
+    and capture the results including stdout, stderr, and exit code.
+    Args:
+        additional_imports: List of additional module imports to authorize.
+                          For example: ["numpy", "pandas", "matplotlib"]
+                          These will be added to the base authorized imports.
+    Example:
+        >>> # Basic usage with default imports
+        >>> executor = PyExecutor()
+        >>> result = executor.run("print('Hello, World!')")
+        >>> print(result.stdout)  # "Hello, World!\n"
+        >>> print(result.exit_code)  # 0
+        >>>
+        >>> # Usage with additional imports
+        >>> executor = PyExecutor(additional_imports=["numpy", "pandas"])
+        >>> result = executor.run("import numpy as np\\nprint(np.array([1, 2, 3]))")
+        >>> print(result.stdout)  # "[1 2 3]\n"
+    """
+    def __init__(self, additional_imports: list[str] | None = None):
+        """
+        Initialize the PyExecutor with a LocalPythonExecutor instance.
+        Args:
+            additional_imports: List of additional module names to authorize for import.
+                              Defaults to an empty list if not provided.
+        """
+        if additional_imports is None:
+            additional_imports = []
+        self._executor = LocalPythonExecutor(
+            additional_authorized_imports=additional_imports
+        )
+        # Initialize tools to make BASE_PYTHON_TOOLS available (including print)
+        self._executor.send_tools({})
+    def run(self, code: str) -> CodeExecResult:
+        """
+        Execute Python code and return the result.
+        Args:
+            code: Python code string to execute
+        Returns:
+            CodeExecResult containing stdout, stderr, and exit_code
+        Example:
+            >>> executor = PyExecutor()
+            >>> result = executor.run("x = 5 + 3\\nprint(x)")
+            >>> print(result.stdout)  # "8\n"
+            >>> print(result.exit_code)  # 0
+            >>>
+            >>> # Error handling
+            >>> result = executor.run("1 / 0")
+            >>> print(result.exit_code)  # 1
+            >>> print(result.stderr)  # Contains error message
+        """
+        try:
+            # Execute the code using LocalPythonExecutor
+            # LocalPythonExecutor returns a CodeOutput object with output, logs, is_final_answer
+            exec_result = self._executor(code)
+            # Extract the logs (which contain print outputs) as stdout
+            # The output field contains the return value of the code
+            stdout = exec_result.logs
+            stderr = ""
+            exit_code = 0  # Success
+            return CodeExecResult(
+                stdout=stdout,
+                stderr=stderr,
+                exit_code=exit_code,
+            )
+        except Exception as e:
+            # LocalPythonExecutor raises InterpreterError for various issues
+            # (syntax errors, forbidden operations, runtime errors, etc.)
+            return CodeExecResult(
+                stdout="",
+                stderr=str(e),
+                exit_code=1,  # Non-zero indicates error
+            )

src/core/types.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# Type definitions for EnvTorch
+from dataclasses import dataclass
+from typing import Any, Generic, Optional, TypeVar
+# Generic type for observations
+ObsT = TypeVar("ObsT")  # TypeVar for typehinting in IDEs
+@dataclass
+class StepResult(Generic[ObsT]):
+    """
+    Represents the result of one environment step.
+    Attributes:
+        observation: The environment's observation after the action.
+        reward: Scalar reward for this step (optional).
+        done: Whether the episode is finished.
+    """
+    observation: ObsT
+    reward: Optional[float] = None
+    done: bool = False

src/envs/atari_env/README.md ADDED Viewed

	@@ -0,0 +1,383 @@

+# Atari Environment
+Integration of Atari 2600 games with the OpenEnv framework via the Arcade Learning Environment (ALE). ALE provides access to 100+ classic Atari games for RL research.
+## Supported Games
+ALE supports 100+ Atari 2600 games including:
+### Popular Games
+- **Pong** - Classic two-player tennis
+- **Breakout** - Break bricks with a ball
+- **Space Invaders** - Shoot descending aliens
+- **Pac-Man / Ms. Pac-Man** - Navigate mazes and eat pellets
+- **Asteroids** - Destroy asteroids in space
+- **Defender** - Side-scrolling space shooter
+- **Centipede** - Shoot segmented centipede
+- **Donkey Kong** - Jump over barrels to save princess
+- **Frogger** - Cross road and river safely
+- **Q*bert** - Jump on pyramid cubes
+And many more! For a complete list, see [ALE documentation](https://ale.farama.org/environments/complete_list/).
+## Architecture
+```
+┌────────────────────────────────────┐
+│ RL Training Code (Client)          │
+│   AtariEnv.step(action)            │
+└──────────────┬─────────────────────┘
+               │ HTTP
+┌──────────────▼─────────────────────┐
+│ FastAPI Server (Docker)            │
+│   AtariEnvironment                 │
+│     ├─ Wraps ALEInterface          │
+│     ├─ Handles observations        │
+│     └─ Action execution            │
+└────────────────────────────────────┘
+```
+## Installation & Usage
+### Option 1: Local Development (without Docker)
+**Requirements:**
+- Python 3.11+
+- ale-py installed: `pip install ale-py`
+```python
+from envs.atari_env import AtariEnv, AtariAction
+# Start local server manually
+# python -m envs.atari_env.server.app
+# Connect to local server
+env = AtariEnv(base_url="http://localhost:8000")
+# Reset environment
+result = env.reset()
+print(f"Screen shape: {result.observation.screen_shape}")
+print(f"Legal actions: {result.observation.legal_actions}")
+print(f"Lives: {result.observation.lives}")
+# Take actions
+for _ in range(10):
+    action_id = 2  # UP action
+    result = env.step(AtariAction(action_id=action_id, game_name="pong"))
+    print(f"Reward: {result.reward}, Done: {result.done}")
+    if result.done:
+        break
+# Cleanup
+env.close()
+```
+### Option 2: Docker (Recommended)
+**Build Atari image:**
+```bash
+cd OpenEnv
+# Build the image
+docker build \
+  -f src/envs/atari_env/server/Dockerfile \
+  -t atari-env:latest \
+  .
+```
+**Run specific games:**
+```bash
+# Pong (default)
+docker run -p 8000:8000 atari-env:latest
+# Breakout
+docker run -p 8000:8000 -e ATARI_GAME=breakout atari-env:latest
+# Space Invaders with grayscale observation
+docker run -p 8000:8000 \
+  -e ATARI_GAME=space_invaders \
+  -e ATARI_OBS_TYPE=grayscale \
+  atari-env:latest
+# Ms. Pac-Man with full action space
+docker run -p 8000:8000 \
+  -e ATARI_GAME=ms_pacman \
+  -e ATARI_FULL_ACTION_SPACE=true \
+  atari-env:latest
+```
+**Use with from_docker_image():**
+```python
+from envs.atari_env import AtariEnv, AtariAction
+import numpy as np
+# Automatically starts container
+env = AtariEnv.from_docker_image("atari-env:latest")
+result = env.reset()
+result = env.step(AtariAction(action_id=2))  # UP
+# Reshape screen for visualization
+screen = np.array(result.observation.screen).reshape(result.observation.screen_shape)
+print(f"Screen shape: {screen.shape}")  # (210, 160, 3) for RGB
+env.close()  # Stops container
+```
+## Observation Types
+### 1. RGB (Default)
+- **Shape**: [210, 160, 3]
+- **Description**: Full-color screen observation
+- **Usage**: Most realistic, good for vision-based learning
+```python
+docker run -p 8000:8000 -e ATARI_OBS_TYPE=rgb atari-env:latest
+```
+### 2. Grayscale
+- **Shape**: [210, 160]
+- **Description**: Grayscale screen observation
+- **Usage**: Reduced dimensionality, faster processing
+```python
+docker run -p 8000:8000 -e ATARI_OBS_TYPE=grayscale atari-env:latest
+```
+### 3. RAM
+- **Shape**: [128]
+- **Description**: Raw 128-byte Atari 2600 RAM contents
+- **Usage**: Compact representation, useful for specific research
+```python
+docker run -p 8000:8000 -e ATARI_OBS_TYPE=ram atari-env:latest
+```
+## Action Spaces
+### Minimal Action Set (Default)
+Game-specific minimal actions (typically 4-9 actions).
+- Pong: 6 actions (NOOP, FIRE, UP, DOWN, etc.)
+- Breakout: 4 actions (NOOP, FIRE, LEFT, RIGHT)
+```python
+docker run -p 8000:8000 -e ATARI_FULL_ACTION_SPACE=false atari-env:latest
+```
+### Full Action Set
+All 18 possible Atari 2600 actions:
+0. NOOP
+1. FIRE
+2. UP
+3. RIGHT
+4. LEFT
+5. DOWN
+6. UPRIGHT
+7. UPLEFT
+8. DOWNRIGHT
+9. DOWNLEFT
+10. UPFIRE
+11. RIGHTFIRE
+12. LEFTFIRE
+13. DOWNFIRE
+14. UPRIGHTFIRE
+15. UPLEFTFIRE
+16. DOWNRIGHTFIRE
+17. DOWNLEFTFIRE
+```python
+docker run -p 8000:8000 -e ATARI_FULL_ACTION_SPACE=true atari-env:latest
+```
+## Configuration
+### Environment Variables
+- `ATARI_GAME`: Game name (default: "pong")
+- `ATARI_OBS_TYPE`: Observation type - "rgb", "grayscale", "ram" (default: "rgb")
+- `ATARI_FULL_ACTION_SPACE`: Use full action space - "true"/"false" (default: "false")
+- `ATARI_MODE`: Game mode (optional, game-specific)
+- `ATARI_DIFFICULTY`: Game difficulty (optional, game-specific)
+- `ATARI_REPEAT_ACTION_PROB`: Sticky action probability 0.0-1.0 (default: "0.0")
+- `ATARI_FRAMESKIP`: Frames to skip per action (default: "4")
+### Example: Breakout with Custom Settings
+```bash
+docker run -p 8000:8000 \
+  -e ATARI_GAME=breakout \
+  -e ATARI_OBS_TYPE=grayscale \
+  -e ATARI_FULL_ACTION_SPACE=true \
+  -e ATARI_REPEAT_ACTION_PROB=0.25 \
+  -e ATARI_FRAMESKIP=4 \
+  atari-env:latest
+```
+## API Reference
+### AtariAction
+```python
+@dataclass
+class AtariAction(Action):
+    action_id: int                  # Action index to execute
+    game_name: str = "pong"         # Game name
+    obs_type: str = "rgb"           # Observation type
+    full_action_space: bool = False # Full or minimal action space
+```
+### AtariObservation
+```python
+@dataclass
+class AtariObservation(Observation):
+    screen: List[int]               # Flattened screen pixels
+    screen_shape: List[int]         # Original screen shape
+    legal_actions: List[int]        # Legal action indices
+    lives: int                      # Lives remaining
+    episode_frame_number: int       # Frame # in episode
+    frame_number: int               # Total frame #
+    done: bool                      # Episode finished
+    reward: Optional[float]         # Reward from last action
+```
+### AtariState
+```python
+@dataclass
+class AtariState(State):
+    episode_id: str                      # Unique episode ID
+    step_count: int                      # Number of steps
+    game_name: str                       # Game name
+    obs_type: str                        # Observation type
+    full_action_space: bool              # Action space type
+    mode: Optional[int]                  # Game mode
+    difficulty: Optional[int]            # Game difficulty
+    repeat_action_probability: float     # Sticky action prob
+    frameskip: int                       # Frameskip setting
+```
+## Example Script
+```python
+#!/usr/bin/env python3
+"""Example training loop with Atari environment."""
+import numpy as np
+from envs.atari_env import AtariEnv, AtariAction
+# Start environment
+env = AtariEnv.from_docker_image("atari-env:latest")
+# Training loop
+for episode in range(10):
+    result = env.reset()
+    episode_reward = 0
+    steps = 0
+    while not result.done:
+        # Random policy (replace with your RL agent)
+        action_id = np.random.choice(result.observation.legal_actions)
+        # Take action
+        result = env.step(AtariAction(action_id=action_id))
+        episode_reward += result.reward or 0
+        steps += 1
+        # Reshape screen for processing
+        screen = np.array(result.observation.screen).reshape(
+            result.observation.screen_shape
+        )
+        # Your RL training code here
+        # ...
+    print(f"Episode {episode}: reward={episode_reward:.2f}, steps={steps}")
+env.close()
+```
+## Testing
+### Local Testing
+```bash
+# Install dependencies
+pip install ale-py fastapi uvicorn requests
+# Start server
+cd /Users/sanyambhutani/OpenEnv/OpenEnv
+export PYTHONPATH=/Users/sanyambhutani/OpenEnv/OpenEnv/src
+python -m envs.atari_env.server.app
+# Test from another terminal
+python -c "
+from envs.atari_env import AtariEnv, AtariAction
+env = AtariEnv(base_url='http://localhost:8000')
+result = env.reset()
+print(f'Initial obs: {result.observation.screen_shape}')
+result = env.step(AtariAction(action_id=2))
+print(f'After step: reward={result.reward}, done={result.done}')
+env.close()
+"
+```
+### Docker Testing
+```bash
+# Build and run
+docker build -f src/envs/atari_env/server/Dockerfile -t atari-env:latest .
+docker run -p 8000:8000 atari-env:latest
+# Test in another terminal
+curl http://localhost:8000/health
+curl -X POST http://localhost:8000/reset
+```
+## Popular Games and Their Characteristics
+| Game | Minimal Actions | Lives | Difficulty | Notes |
+|------|----------------|-------|-----------|-------|
+| Pong | 6 | 1 | Low | Good for learning basics |
+| Breakout | 4 | 5 | Medium | Classic RL benchmark |
+| Space Invaders | 6 | 3 | Medium | Shooting game |
+| Ms. Pac-Man | 9 | 3 | High | Complex navigation |
+| Asteroids | 14 | 3 | Medium | Continuous shooting |
+| Montezuma's Revenge | 18 | 5 | Very High | Exploration challenge |
+| Pitfall | 18 | 1 | High | Platformer |
+| Seaquest | 18 | 3 | High | Submarine rescue |
+## Limitations & Notes
+- **Frame perfect timing**: Some games require precise timing
+- **Exploration**: Games like Montezuma's Revenge are notoriously difficult
+- **Observation delay**: HTTP adds minimal latency vs local gym
+- **Determinism**: Set `ATARI_REPEAT_ACTION_PROB=0.0` for deterministic behavior
+- **ROMs**: All ROMs are bundled with ale-py package
+## References
+- [Arcade Learning Environment Paper (2013)](https://jair.org/index.php/jair/article/view/10819)
+- [ALE GitHub](https://github.com/Farama-Foundation/Arcade-Learning-Environment)
+- [ALE Documentation](https://ale.farama.org/)
+- [Gymnasium Atari Environments](https://gymnasium.farama.org/environments/atari/)
+## Citation
+If you use ALE in your research, please cite:
+```bibtex
+@Article{bellemare13arcade,
+    author = {{Bellemare}, M.~G. and {Naddaf}, Y. and {Veness}, J. and {Bowling}, M.},
+    title = {The Arcade Learning Environment: An Evaluation Platform for General Agents},
+    journal = {Journal of Artificial Intelligence Research},
+    year = "2013",
+    month = "jun",
+    volume = "47",
+    pages = "253--279",
+}
+```

src/envs/atari_env/__init__.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Atari Environment for OpenEnv.
+This module provides OpenEnv integration for Atari 2600 games via the
+Arcade Learning Environment (ALE).
+Example:
+    >>> from envs.atari_env import AtariEnv, AtariAction
+    >>>
+    >>> # Connect to a running server or start via Docker
+    >>> env = AtariEnv.from_docker_image("atari-env:latest")
+    >>>
+    >>> # Reset and interact
+    >>> result = env.reset()
+    >>> result = env.step(AtariAction(action_id=2))  # UP
+    >>> print(result.reward, result.done)
+    >>>
+    >>> # Cleanup
+    >>> env.close()
+"""
+from .client import AtariEnv
+from .models import AtariAction, AtariObservation, AtariState
+__all__ = ["AtariEnv", "AtariAction", "AtariObservation", "AtariState"]

src/envs/atari_env/client.py ADDED Viewed

	@@ -0,0 +1,118 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Atari Environment HTTP Client.
+This module provides the client for connecting to an Atari Environment server
+over HTTP.
+"""
+from __future__ import annotations
+from typing import Any, Dict, TYPE_CHECKING
+from core.http_env_client import HTTPEnvClient
+from core.types import StepResult
+from .models import AtariAction, AtariObservation, AtariState
+if TYPE_CHECKING:
+    from core.containers.runtime import ContainerProvider
+class AtariEnv(HTTPEnvClient[AtariAction, AtariObservation]):
+    """
+    HTTP client for Atari Environment.
+    This client connects to an AtariEnvironment HTTP server and provides
+    methods to interact with it: reset(), step(), and state access.
+    Example:
+        >>> # Connect to a running server
+        >>> client = AtariEnv(base_url="http://localhost:8000")
+        >>> result = client.reset()
+        >>> print(result.observation.screen_shape)
+        >>>
+        >>> # Take an action
+        >>> result = client.step(AtariAction(action_id=2))  # UP
+        >>> print(result.reward, result.done)
+    Example with Docker:
+        >>> # Automatically start container and connect
+        >>> client = AtariEnv.from_docker_image("atari-env:latest")
+        >>> result = client.reset()
+        >>> result = client.step(AtariAction(action_id=0))  # NOOP
+    """
+    def _step_payload(self, action: AtariAction) -> Dict[str, Any]:
+        """
+        Convert AtariAction to JSON payload for step request.
+        Args:
+            action: AtariAction instance.
+        Returns:
+            Dictionary representation suitable for JSON encoding.
+        """
+        return {
+            "action_id": action.action_id,
+            "game_name": action.game_name,
+            "obs_type": action.obs_type,
+            "full_action_space": action.full_action_space,
+        }
+    def _parse_result(self, payload: Dict[str, Any]) -> StepResult[AtariObservation]:
+        """
+        Parse server response into StepResult[AtariObservation].
+        Args:
+            payload: JSON response from server.
+        Returns:
+            StepResult with AtariObservation.
+        """
+        obs_data = payload.get("observation", {})
+        observation = AtariObservation(
+            screen=obs_data.get("screen", []),
+            screen_shape=obs_data.get("screen_shape", []),
+            legal_actions=obs_data.get("legal_actions", []),
+            lives=obs_data.get("lives", 0),
+            episode_frame_number=obs_data.get("episode_frame_number", 0),
+            frame_number=obs_data.get("frame_number", 0),
+            done=payload.get("done", False),
+            reward=payload.get("reward"),
+            metadata=obs_data.get("metadata", {}),
+        )
+        return StepResult(
+            observation=observation,
+            reward=payload.get("reward"),
+            done=payload.get("done", False),
+        )
+    def _parse_state(self, payload: Dict[str, Any]) -> AtariState:
+        """
+        Parse server response into AtariState object.
+        Args:
+            payload: JSON response from /state endpoint.
+        Returns:
+            AtariState object with environment state information.
+        """
+        return AtariState(
+            episode_id=payload.get("episode_id"),
+            step_count=payload.get("step_count", 0),
+            game_name=payload.get("game_name", "unknown"),
+            obs_type=payload.get("obs_type", "rgb"),
+            full_action_space=payload.get("full_action_space", False),
+            mode=payload.get("mode"),
+            difficulty=payload.get("difficulty"),
+            repeat_action_probability=payload.get("repeat_action_probability", 0.0),
+            frameskip=payload.get("frameskip", 4),
+        )

src/envs/atari_env/models.py ADDED Viewed

	@@ -0,0 +1,86 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Data models for Atari Environment.
+This module defines the Action, Observation, and State types for Atari games
+via the Arcade Learning Environment (ALE).
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Literal, Optional
+from core.env_server import Action, Observation, State
+@dataclass
+class AtariAction(Action):
+    """
+    Action for Atari environments.
+    Attributes:
+        action_id: The integer action ID to take (from legal_actions).
+        game_name: Name of the Atari game (e.g., "pong", "breakout", "space_invaders").
+        obs_type: Observation type ("rgb", "grayscale", or "ram").
+        full_action_space: Whether to use full (18 actions) or minimal action space.
+    """
+    action_id: int
+    game_name: str = "pong"
+    obs_type: Literal["rgb", "grayscale", "ram"] = "rgb"
+    full_action_space: bool = False
+@dataclass
+class AtariObservation(Observation):
+    """
+    Observation from Atari environment.
+    This represents what the agent sees after taking an action.
+    Attributes:
+        screen: Screen observation as a flattened list of pixels.
+                Shape depends on obs_type:
+                - rgb: [210, 160, 3] flattened
+                - grayscale: [210, 160] flattened
+                - ram: [128] (RAM contents)
+        screen_shape: Original shape of the screen before flattening.
+        legal_actions: List of legal action IDs the agent can take.
+        lives: Number of lives remaining.
+        episode_frame_number: Frame number within current episode.
+        frame_number: Total frame number since environment creation.
+    """
+    screen: List[int]
+    screen_shape: List[int]
+    legal_actions: List[int]
+    lives: int = 0
+    episode_frame_number: int = 0
+    frame_number: int = 0
+@dataclass
+class AtariState(State):
+    """
+    State for Atari environment.
+    Attributes:
+        game_name: Name of the Atari game.
+        obs_type: Observation type ("rgb", "grayscale", or "ram").
+        full_action_space: Whether using full or minimal action space.
+        mode: Game mode (if applicable).
+        difficulty: Game difficulty (if applicable).
+        repeat_action_probability: Probability of repeating previous action (sticky actions).
+        frameskip: Number of frames to skip per action.
+    """
+    game_name: str = "pong"
+    obs_type: Literal["rgb", "grayscale", "ram"] = "rgb"
+    full_action_space: bool = False
+    mode: Optional[int] = None
+    difficulty: Optional[int] = None
+    repeat_action_probability: float = 0.0
+    frameskip: int = 4

src/envs/atari_env/server/Dockerfile ADDED Viewed

	@@ -0,0 +1,43 @@

+# Dockerfile for Atari Environment
+# This image provides Atari 2600 games via the Arcade Learning Environment (ALE)
+# Configurable base image - defaults to local build, can be overridden for CI/CD
+# Base image provides: fastapi, uvicorn, requests, curl, PYTHONPATH=/app/src
+#
+# Local build: docker build -t envtorch-base:latest -f src/core/containers/images/Dockerfile .
+#              docker build -f src/envs/atari_env/server/Dockerfile -t atari-env:latest .
+#
+# CI/CD build: docker build --build-arg BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest \
+#              -f src/envs/atari_env/server/Dockerfile -t atari-env:latest .
+ARG BASE_IMAGE=envtorch-base:latest
+FROM ${BASE_IMAGE}
+# Install ALE-specific dependencies
+# ale-py includes all Atari ROMs by default and requires gymnasium
+RUN pip install --no-cache-dir \
+    gymnasium>=0.29.0 \
+    ale-py>=0.8.0 \
+    numpy>=1.24.0
+# Copy OpenEnv core (base image already set WORKDIR=/app)
+COPY src/core/ /app/src/core/
+# Copy Atari environment code
+COPY src/envs/atari_env/ /app/src/envs/atari_env/
+# Atari-specific environment variables (can be overridden at runtime)
+ENV ATARI_GAME=pong
+ENV ATARI_OBS_TYPE=rgb
+ENV ATARI_FULL_ACTION_SPACE=false
+ENV ATARI_REPEAT_ACTION_PROB=0.0
+ENV ATARI_FRAMESKIP=4
+# Expose port
+EXPOSE 8000
+# Health check
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+# Run the FastAPI server
+CMD ["uvicorn", "envs.atari_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]

src/envs/atari_env/server/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Atari Environment Server.
+Server-side implementation of Atari environment for OpenEnv.
+"""
+from .atari_environment import AtariEnvironment
+__all__ = ["AtariEnvironment"]

src/envs/atari_env/server/app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+FastAPI application for the Atari Environment.
+This module creates an HTTP server that exposes Atari games
+over HTTP endpoints, making them compatible with HTTPEnvClient.
+Usage:
+    # Development (with auto-reload):
+    uvicorn envs.atari_env.server.app:app --reload --host 0.0.0.0 --port 8000
+    # Production:
+    uvicorn envs.atari_env.server.app:app --host 0.0.0.0 --port 8000 --workers 4
+    # Or run directly:
+    python -m envs.atari_env.server.app
+Environment variables:
+    ATARI_GAME: Game name to serve (default: "pong")
+    ATARI_OBS_TYPE: Observation type (default: "rgb")
+    ATARI_FULL_ACTION_SPACE: Use full action space (default: "false")
+    ATARI_MODE: Game mode (optional)
+    ATARI_DIFFICULTY: Game difficulty (optional)
+    ATARI_REPEAT_ACTION_PROB: Sticky action probability (default: "0.0")
+    ATARI_FRAMESKIP: Frameskip (default: "4")
+"""
+import os
+from core.env_server import create_app
+from ..models import AtariAction, AtariObservation
+from .atari_environment import AtariEnvironment
+# Get configuration from environment variables
+game_name = os.getenv("ATARI_GAME", "pong")
+obs_type = os.getenv("ATARI_OBS_TYPE", "rgb")
+full_action_space = os.getenv("ATARI_FULL_ACTION_SPACE", "false").lower() == "true"
+repeat_action_prob = float(os.getenv("ATARI_REPEAT_ACTION_PROB", "0.0"))
+frameskip = int(os.getenv("ATARI_FRAMESKIP", "4"))
+# Optional parameters
+mode = os.getenv("ATARI_MODE")
+difficulty = os.getenv("ATARI_DIFFICULTY")
+# Convert to int if specified
+mode = int(mode) if mode is not None else None
+difficulty = int(difficulty) if difficulty is not None else None
+# Create the environment instance
+env = AtariEnvironment(
+    game_name=game_name,
+    obs_type=obs_type,
+    full_action_space=full_action_space,
+    mode=mode,
+    difficulty=difficulty,
+    repeat_action_probability=repeat_action_prob,
+    frameskip=frameskip,
+)
+# Create the FastAPI app with routes
+app = create_app(env, AtariAction, AtariObservation)
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

src/envs/atari_env/server/atari_environment.py ADDED Viewed

	@@ -0,0 +1,245 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Atari Environment Server Implementation.
+This module wraps ALE's ALEInterface and exposes it
+via the OpenEnv Environment interface.
+"""
+import uuid
+from typing import Any, Dict, Literal, Optional
+from core.env_server import Action, Environment, Observation
+from ..models import AtariAction, AtariObservation, AtariState
+# Import ALE
+try:
+    from ale_py import ALEInterface, roms
+    import numpy as np
+except ImportError as e:
+    raise ImportError(
+        "ALE (Arcade Learning Environment) is not installed. "
+        "Please install it with: pip install ale-py"
+    ) from e
+class AtariEnvironment(Environment):
+    """
+    Atari Environment wrapper for OpenEnv.
+    This environment wraps Atari 2600 games via the Arcade Learning Environment (ALE)
+    and provides a clean interface for RL training.
+    Supported games include: pong, breakout, space_invaders, and 100+ others.
+    Args:
+        game_name: Name of the Atari game (e.g., "pong", "breakout").
+        obs_type: Observation type - "rgb", "grayscale", or "ram".
+        full_action_space: Use full action space (18 actions) vs minimal.
+        mode: Game mode (if applicable).
+        difficulty: Game difficulty (if applicable).
+        repeat_action_probability: Sticky action probability (default 0.0).
+        frameskip: Number of frames to skip per action (default 4).
+    Example:
+        >>> env = AtariEnvironment("pong")
+        >>> obs = env.reset()
+        >>> print(obs.screen_shape)  # [210, 160, 3]
+        >>> obs = env.step(AtariAction(action_id=2))  # UP
+        >>> print(obs.reward, obs.done)
+    """
+    def __init__(
+        self,
+        game_name: str = "pong",
+        obs_type: Literal["rgb", "grayscale", "ram"] = "rgb",
+        full_action_space: bool = False,
+        mode: Optional[int] = None,
+        difficulty: Optional[int] = None,
+        repeat_action_probability: float = 0.0,
+        frameskip: int = 4,
+    ):
+        """Initialize Atari environment."""
+        super().__init__()
+        self.game_name = game_name
+        self.obs_type = obs_type
+        self.full_action_space = full_action_space
+        self.mode = mode
+        self.difficulty = difficulty
+        self.repeat_action_probability = repeat_action_probability
+        self.frameskip = frameskip
+        # Create ALE interface
+        self.ale = ALEInterface()
+        # Configure ALE
+        from ale_py import LoggerMode
+        self.ale.setLoggerMode(LoggerMode.Error)  # Error mode only
+        self.ale.setFloat("repeat_action_probability", repeat_action_probability)
+        # Load ROM
+        try:
+            rom_path = roms.get_rom_path(game_name)
+            self.ale.loadROM(rom_path)
+        except Exception as e:
+            raise ValueError(
+                f"Failed to load Atari game '{game_name}': {e}\n"
+                f"Available games can be found via: ale_py.roms.list_roms()"
+            ) from e
+        # Set mode and difficulty if specified
+        if mode is not None:
+            self.ale.setMode(mode)
+        if difficulty is not None:
+            self.ale.setDifficulty(difficulty)
+        # Get action set
+        if full_action_space:
+            self._action_set = self.ale.getLegalActionSet()
+        else:
+            self._action_set = self.ale.getMinimalActionSet()
+        # Get screen dimensions for observation space
+        self.screen_height, self.screen_width = self.ale.getScreenDims()
+        if obs_type == "rgb":
+            self.screen_shape = [self.screen_height, self.screen_width, 3]
+        elif obs_type == "grayscale":
+            self.screen_shape = [self.screen_height, self.screen_width]
+        elif obs_type == "ram":
+            self.screen_shape = [self.ale.getRAMSize()]
+        else:
+            raise ValueError(f"Invalid obs_type: {obs_type}")
+        # Initialize state
+        self._state = AtariState(
+            game_name=game_name,
+            obs_type=obs_type,
+            full_action_space=full_action_space,
+            mode=mode,
+            difficulty=difficulty,
+            repeat_action_probability=repeat_action_probability,
+            frameskip=frameskip,
+        )
+    def reset(self) -> Observation:
+        """
+        Reset the environment and return initial observation.
+        Returns:
+            Initial observation for the agent.
+        """
+        # Reset ALE
+        self.ale.reset_game()
+        # Reset state tracking
+        self._state.episode_id = str(uuid.uuid4())
+        self._state.step_count = 0
+        # Get initial observation
+        return self._make_observation()
+    def step(self, action: Action) -> Observation:
+        """
+        Execute agent's action and return resulting observation.
+        Args:
+            action: AtariAction containing the action_id to execute.
+        Returns:
+            Observation after action execution.
+        Raises:
+            ValueError: If action is not an AtariAction.
+        """
+        if not isinstance(action, AtariAction):
+            raise ValueError(f"Expected AtariAction, got {type(action)}")
+        # Validate action_id
+        if action.action_id < 0 or action.action_id >= len(self._action_set):
+            raise ValueError(
+                f"Invalid action_id: {action.action_id}. "
+                f"Valid range: [0, {len(self._action_set) - 1}]"
+            )
+        # Get actual ALE action
+        ale_action = self._action_set[action.action_id]
+        # Execute action with frameskip
+        total_reward = 0.0
+        for _ in range(self.frameskip):
+            total_reward += self.ale.act(ale_action)
+            if self.ale.game_over():
+                break
+        self._state.step_count += 1
+        # Get observation
+        obs = self._make_observation()
+        obs.reward = total_reward
+        return obs
+    @property
+    def state(self) -> AtariState:
+        """Get current environment state."""
+        return self._state
+    def _make_observation(self) -> AtariObservation:
+        """
+        Create an AtariObservation from current ALE state.
+        Returns:
+            AtariObservation for the agent.
+        """
+        # Get screen observation
+        if self.obs_type == "rgb":
+            screen = self.ale.getScreenRGB()
+        elif self.obs_type == "grayscale":
+            screen = self.ale.getScreenGrayscale()
+        elif self.obs_type == "ram":
+            screen = self.ale.getRAM()
+        else:
+            raise ValueError(f"Invalid obs_type: {self.obs_type}")
+        # Flatten screen for JSON serialization
+        # Handle both numpy arrays and lists
+        if hasattr(screen, "flatten"):
+            screen_flat = screen.flatten().tolist()
+        elif hasattr(screen, "tolist"):
+            screen_flat = screen.tolist()
+        else:
+            screen_flat = list(screen)
+        # Get game info
+        lives = self.ale.lives()
+        episode_frame_number = self.ale.getEpisodeFrameNumber()
+        frame_number = self.ale.getFrameNumber()
+        done = self.ale.game_over()
+        # Create legal actions list (indices into action_set)
+        legal_actions = list(range(len(self._action_set)))
+        # Create observation
+        obs = AtariObservation(
+            screen=screen_flat,
+            screen_shape=self.screen_shape,
+            legal_actions=legal_actions,
+            lives=lives,
+            episode_frame_number=episode_frame_number,
+            frame_number=frame_number,
+            done=done,
+            reward=0.0,  # Will be filled in by step()
+            metadata={
+                "game_name": self.game_name,
+                "action_meanings": [str(a) for a in self._action_set],
+            },
+        )
+        return obs

src/envs/atari_env/test_atari_docker.sh ADDED Viewed

	@@ -0,0 +1,333 @@

+#!/bin/bash
+# Comprehensive Docker test for Atari environment
+# Tests: Build, Start, Health, Reset, Step, State, Cleanup
+set -e  # Exit on error
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+# Configuration
+IMAGE_NAME="atari-env"
+IMAGE_TAG="test"
+CONTAINER_NAME="atari-env-test"
+PORT="8765"  # Use non-standard port to avoid conflicts
+HEALTH_RETRIES=30
+HEALTH_DELAY=2
+# Cleanup function
+cleanup() {
+    echo -e "\n${BLUE}Cleaning up...${NC}"
+    docker stop ${CONTAINER_NAME} 2>/dev/null || true
+    docker rm ${CONTAINER_NAME} 2>/dev/null || true
+    echo -e "${GREEN}✓${NC} Cleanup complete"
+}
+# Set trap to cleanup on exit
+trap cleanup EXIT
+# Header
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "  ATARI ENVIRONMENT DOCKER TEST"
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo ""
+# Check prerequisites
+echo -e "${BLUE}Checking prerequisites...${NC}"
+if ! command -v docker &> /dev/null; then
+    echo -e "${RED}✗${NC} Docker is not installed"
+    exit 1
+fi
+echo -e "${GREEN}✓${NC} Docker is installed"
+if ! command -v curl &> /dev/null; then
+    echo -e "${RED}✗${NC} curl is not installed"
+    exit 1
+fi
+echo -e "${GREEN}✓${NC} curl is installed"
+# Check if we're in the right directory
+if [ ! -f "src/envs/atari_env/server/Dockerfile" ]; then
+    echo -e "${RED}✗${NC} Must run from OpenEnv root directory"
+    exit 1
+fi
+echo -e "${GREEN}✓${NC} In correct directory"
+# Step 1: Build Docker image
+echo ""
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo -e "${BLUE}STEP 1: Building Docker Image${NC}"
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo "Building ${IMAGE_NAME}:${IMAGE_TAG}..."
+if docker build -f src/envs/atari_env/server/Dockerfile -t ${IMAGE_NAME}:${IMAGE_TAG} . 2>&1 | tee /tmp/atari_build.log | tail -n 20; then
+    echo -e "${GREEN}✓${NC} Docker image built successfully"
+else
+    echo -e "${RED}✗${NC} Docker build failed"
+    echo "See /tmp/atari_build.log for full output"
+    exit 1
+fi
+# Check image exists
+if docker image inspect ${IMAGE_NAME}:${IMAGE_TAG} &> /dev/null; then
+    IMAGE_SIZE=$(docker image inspect ${IMAGE_NAME}:${IMAGE_TAG} --format='{{.Size}}' | awk '{print $1/1024/1024}')
+    echo -e "${GREEN}✓${NC} Image size: ${IMAGE_SIZE} MB"
+else
+    echo -e "${RED}✗${NC} Image not found after build"
+    exit 1
+fi
+# Step 2: Start container
+echo ""
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo -e "${BLUE}STEP 2: Starting Container${NC}"
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+# Clean up any existing container
+docker rm -f ${CONTAINER_NAME} 2>/dev/null || true
+echo "Starting container on port ${PORT}..."
+docker run -d \
+    --name ${CONTAINER_NAME} \
+    -p ${PORT}:8000 \
+    -e ATARI_GAME=pong \
+    -e ATARI_OBS_TYPE=ram \
+    -e ATARI_FRAMESKIP=4 \
+    ${IMAGE_NAME}:${IMAGE_TAG}
+if [ $? -eq 0 ]; then
+    echo -e "${GREEN}✓${NC} Container started: ${CONTAINER_NAME}"
+else
+    echo -e "${RED}✗${NC} Failed to start container"
+    exit 1
+fi
+# Wait for container to be running
+sleep 2
+if docker ps | grep -q ${CONTAINER_NAME}; then
+    echo -e "${GREEN}✓${NC} Container is running"
+else
+    echo -e "${RED}✗${NC} Container is not running"
+    docker logs ${CONTAINER_NAME}
+    exit 1
+fi
+# Step 3: Wait for health check
+echo ""
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo -e "${BLUE}STEP 3: Waiting for Server${NC}"
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo "Waiting for server to be ready (timeout: ${HEALTH_RETRIES}s)..."
+for i in $(seq 1 ${HEALTH_RETRIES}); do
+    if curl -s http://localhost:${PORT}/health > /dev/null 2>&1; then
+        echo -e "${GREEN}✓${NC} Server is ready (${i}s)"
+        break
+    fi
+    if [ $i -eq ${HEALTH_RETRIES} ]; then
+        echo -e "${RED}✗${NC} Server did not become ready in time"
+        echo "Container logs:"
+        docker logs ${CONTAINER_NAME}
+        exit 1
+    fi
+    echo -n "."
+    sleep ${HEALTH_DELAY}
+done
+# Step 4: Test health endpoint
+echo ""
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo -e "${BLUE}STEP 4: Testing Health Endpoint${NC}"
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+HEALTH_RESPONSE=$(curl -s http://localhost:${PORT}/health)
+echo "Response: ${HEALTH_RESPONSE}"
+if echo "${HEALTH_RESPONSE}" | grep -q "healthy"; then
+    echo -e "${GREEN}✓${NC} Health endpoint working"
+else
+    echo -e "${RED}✗${NC} Health endpoint failed"
+    exit 1
+fi
+# Step 5: Test reset endpoint
+echo ""
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo -e "${BLUE}STEP 5: Testing Reset Endpoint${NC}"
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+RESET_RESPONSE=$(curl -s -X POST http://localhost:${PORT}/reset -H "Content-Type: application/json" -d '{}')
+if [ -z "${RESET_RESPONSE}" ]; then
+    echo -e "${RED}✗${NC} Reset endpoint returned empty response"
+    docker logs ${CONTAINER_NAME} | tail -20
+    exit 1
+fi
+echo "Response (first 200 chars): ${RESET_RESPONSE:0:200}..."
+# Check if response contains expected fields
+if echo "${RESET_RESPONSE}" | grep -q "observation" && \
+   echo "${RESET_RESPONSE}" | grep -q "screen" && \
+   echo "${RESET_RESPONSE}" | grep -q "legal_actions"; then
+    echo -e "${GREEN}✓${NC} Reset endpoint working"
+    # Extract some info
+    SCREEN_LEN=$(echo "${RESET_RESPONSE}" | grep -o '"screen":\[[^]]*\]' | wc -c)
+    echo "  Screen data length: ${SCREEN_LEN} chars"
+else
+    echo -e "${RED}✗${NC} Reset response missing required fields"
+    echo "Full response: ${RESET_RESPONSE}"
+    exit 1
+fi
+# Step 6: Test step endpoint
+echo ""
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo -e "${BLUE}STEP 6: Testing Step Endpoint${NC}"
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+STEP_PAYLOAD='{"action": {"action_id": 0, "game_name": "pong"}}'
+STEP_RESPONSE=$(curl -s -X POST http://localhost:${PORT}/step -H "Content-Type: application/json" -d "${STEP_PAYLOAD}")
+if [ -z "${STEP_RESPONSE}" ]; then
+    echo -e "${RED}✗${NC} Step endpoint returned empty response"
+    docker logs ${CONTAINER_NAME} | tail -20
+    exit 1
+fi
+echo "Response (first 200 chars): ${STEP_RESPONSE:0:200}..."
+# Check if response contains expected fields
+if echo "${STEP_RESPONSE}" | grep -q "observation" && \
+   echo "${STEP_RESPONSE}" | grep -q "reward" && \
+   echo "${STEP_RESPONSE}" | grep -q "done"; then
+    echo -e "${GREEN}✓${NC} Step endpoint working"
+    # Extract reward and done
+    REWARD=$(echo "${STEP_RESPONSE}" | grep -o '"reward":[^,}]*' | cut -d: -f2)
+    DONE=$(echo "${STEP_RESPONSE}" | grep -o '"done":[^,}]*' | cut -d: -f2)
+    echo "  Reward: ${REWARD}"
+    echo "  Done: ${DONE}"
+else
+    echo -e "${RED}✗${NC} Step response missing required fields"
+    echo "Full response: ${STEP_RESPONSE}"
+    exit 1
+fi
+# Step 7: Test state endpoint
+echo ""
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo -e "${BLUE}STEP 7: Testing State Endpoint${NC}"
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+STATE_RESPONSE=$(curl -s http://localhost:${PORT}/state)
+if [ -z "${STATE_RESPONSE}" ]; then
+    echo -e "${RED}✗${NC} State endpoint returned empty response"
+    docker logs ${CONTAINER_NAME} | tail -20
+    exit 1
+fi
+echo "Response: ${STATE_RESPONSE}"
+# Check if response contains expected fields
+if echo "${STATE_RESPONSE}" | grep -q "episode_id" && \
+   echo "${STATE_RESPONSE}" | grep -q "step_count" && \
+   echo "${STATE_RESPONSE}" | grep -q "game_name"; then
+    echo -e "${GREEN}✓${NC} State endpoint working"
+    # Extract info
+    GAME_NAME=$(echo "${STATE_RESPONSE}" | grep -o '"game_name":"[^"]*"' | cut -d'"' -f4)
+    STEP_COUNT=$(echo "${STATE_RESPONSE}" | grep -o '"step_count":[^,}]*' | cut -d: -f2)
+    echo "  Game: ${GAME_NAME}"
+    echo "  Steps: ${STEP_COUNT}"
+else
+    echo -e "${RED}✗${NC} State response missing required fields"
+    echo "Full response: ${STATE_RESPONSE}"
+    exit 1
+fi
+# Step 8: Test multiple steps
+echo ""
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo -e "${BLUE}STEP 8: Testing Multiple Steps${NC}"
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo "Taking 10 steps..."
+TOTAL_REWARD=0
+for i in {1..10}; do
+    ACTION_ID=$((RANDOM % 3))  # Random action 0-2
+    STEP_PAYLOAD="{\"action\": {\"action_id\": ${ACTION_ID}, \"game_name\": \"pong\"}}"
+    STEP_RESPONSE=$(curl -s -X POST http://localhost:${PORT}/step -H "Content-Type: application/json" -d "${STEP_PAYLOAD}")
+    if ! echo "${STEP_RESPONSE}" | grep -q "observation"; then
+        echo -e "${RED}✗${NC} Step ${i} failed"
+        exit 1
+    fi
+    REWARD=$(echo "${STEP_RESPONSE}" | grep -o '"reward":[^,}]*' | cut -d: -f2 | sed 's/null/0/')
+    DONE=$(echo "${STEP_RESPONSE}" | grep -o '"done":[^,}]*' | cut -d: -f2)
+    echo "  Step ${i}: action=${ACTION_ID}, reward=${REWARD}, done=${DONE}"
+    if [ "${DONE}" = "true" ]; then
+        echo "  Episode completed early at step ${i}"
+        break
+    fi
+done
+echo -e "${GREEN}✓${NC} Multiple steps completed successfully"
+# Step 9: Check container logs for errors
+echo ""
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo -e "${BLUE}STEP 9: Checking Container Logs${NC}"
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+LOGS=$(docker logs ${CONTAINER_NAME} 2>&1)
+if echo "${LOGS}" | grep -i "error" | grep -v "LoggerMode.Error"; then
+    echo -e "${YELLOW}⚠${NC}  Found errors in logs:"
+    echo "${LOGS}" | grep -i "error" | head -5
+else
+    echo -e "${GREEN}✓${NC} No errors in container logs"
+fi
+if echo "${LOGS}" | grep -i "exception"; then
+    echo -e "${RED}✗${NC} Found exceptions in logs:"
+    echo "${LOGS}" | grep -i "exception" | head -5
+    exit 1
+else
+    echo -e "${GREEN}✓${NC} No exceptions in container logs"
+fi
+# Final Summary
+echo ""
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo -e "${GREEN}✅ ALL DOCKER TESTS PASSED${NC}"
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo ""
+echo "Summary:"
+echo "  ✓ Docker image built successfully"
+echo "  ✓ Container started and ran"
+echo "  ✓ Health endpoint working"
+echo "  ✓ Reset endpoint working"
+echo "  ✓ Step endpoint working"
+echo "  ✓ State endpoint working"
+echo "  ✓ Multiple steps working"
+echo "  ✓ No errors or exceptions"
+echo ""
+echo "Image: ${IMAGE_NAME}:${IMAGE_TAG}"
+echo "Container: ${CONTAINER_NAME}"
+echo "Port: ${PORT}"
+echo ""
+echo "To keep container running: docker start ${CONTAINER_NAME}"
+echo "To view logs: docker logs ${CONTAINER_NAME}"
+echo ""