Spaces:

pquiggles
/

vsp-demo

Runtime error

navkast commited on Sep 11, 2024

Commit

a1ead4c

unverified ·

1 Parent(s): 6abd06b

Create openrouter client (#7)

* feat: Implement AsyncOpenRouterService for OpenAI API integration

* fix: Add type annotations and ignore untyped function calls in openrouter.py

* Create openrouter client

Files changed (6) hide show

src/vsp/llm/openrouter/openrouter.py +172 -0
src/vsp/shared/config.py +10 -0
src/vsp/shared/config.toml +3 -0
tests/vsp/llm/{__ini__.py → __init__.py} +0 -0
tests/vsp/llm/openrouter/__init__.py +0 -0
tests/vsp/llm/openrouter/test_integration_openrouter.py +48 -0

src/vsp/llm/openrouter/openrouter.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import asyncio
+import time
+from contextlib import asynccontextmanager
+from typing import AsyncIterator
+import openai
+from openai import AsyncOpenAI
+from openai.types.chat import ChatCompletion
+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_random_exponential
+from vsp.llm.llm_service import LLMService, RateLimitError
+from vsp.shared import aws_clients, config, logger_factory
+logger = logger_factory.get_logger(__name__)
+class AsyncOpenRouterService(LLMService):
+    """
+    An asynchronous service class for making calls to the OpenRouter API.
+    This class handles authentication and implements a basic rate limiting strategy.
+    """
+    def __init__(self, model: str, max_concurrency: int = 3, requests_per_minute: int = 60):
+        """
+        Initialize the AsyncOpenRouterService.
+        Args:
+            model (str): The OpenRouter model to use for API calls.
+            max_concurrency (int): Maximum number of concurrent API calls. Defaults to 10.
+            requests_per_minute (int): Maximum number of requests allowed per minute. Defaults to 60.
+        """
+        self._client = AsyncOpenAI(base_url="https://openrouter.ai/api/v1", api_key=self._fetch_api_key())
+        self._semaphore = asyncio.Semaphore(max_concurrency)
+        self._model = model
+        self._requests_per_minute = requests_per_minute
+        self._request_times: list[float] = []
+    @staticmethod
+    def _fetch_api_key() -> str:
+        """
+        Fetch the OpenRouter API key from AWS Parameter Store.
+        Returns:
+            str: The OpenRouter API key.
+        Raises:
+            ValueError: If the API key is not found in the Parameter Store.
+            RuntimeError: If there's an error accessing the Parameter Store.
+        """
+        try:
+            return aws_clients.fetch_from_parameter_store(config.get_openrouter_api_key_path(), is_secret=True)
+        except aws_clients.ParameterNotFoundError as e:
+            logger.error("OpenRouter API key not found in Parameter Store", error=str(e))
+            raise ValueError("OpenRouter API key not found") from e
+        except aws_clients.ParameterStoreAccessError as e:
+            logger.error("Error accessing Parameter Store", error=str(e))
+            raise RuntimeError("Unable to access OpenRouter API key") from e
+    @asynccontextmanager
+    async def __call__(self) -> AsyncIterator["AsyncOpenRouterService"]:
+        try:
+            yield self
+        finally:
+            await self._client.close()
+    async def invoke(
+        self,
+        user_prompt: str | None = None,
+        system_prompt: str | None = None,
+        partial_assistant_prompt: str | None = None,
+        max_tokens: int = 1000,
+        temperature: float = 0.0,
+    ) -> str | None:
+        """
+        Invoke the OpenRouter API with the given prompts and parameters.
+        This method handles rate limiting and makes the API call.
+        Args:
+            user_prompt (str | None): The main prompt from the user.
+            system_prompt (str | None): A system message to set the context.
+            partial_assistant_prompt (str | None): A partial response from the assistant.
+            max_tokens (int): Maximum number of tokens in the response.
+            temperature (float): Sampling temperature for response generation.
+        Returns:
+            str | None: The generated response from the OpenRouter API, or None if no response.
+        Raises:
+            RateLimitError: If the API rate limit is exceeded.
+            openai.APIError: For any other errors encountered during the API call.
+        """
+        async with self._semaphore:  # Use semaphore to limit concurrency
+            await self._wait_for_rate_limit()
+            messages = []
+            if system_prompt:
+                messages.append({"role": "system", "content": system_prompt})
+            if user_prompt:
+                messages.append({"role": "user", "content": user_prompt})
+            if partial_assistant_prompt:
+                messages.append({"role": "assistant", "content": partial_assistant_prompt})
+            response = await self.query_openrouter(max_tokens, temperature, messages)
+            logger.info("OpenRouter API called", model=self._model)
+            self._update_request_times()
+            message = response.choices[0].message
+            text = str(message.content)
+            if text is None:
+                logger.warn("No message content from OpenRouter API")
+                return None
+            if partial_assistant_prompt:
+                text = f"{partial_assistant_prompt}{text}"
+            # Extract token usage information
+            usage = response.usage
+            input_tokens = usage.prompt_tokens if usage else 0
+            output_tokens = usage.completion_tokens if usage else 0
+            # Log token usage
+            logger.info("Token usage", input_tokens=input_tokens, output_tokens=output_tokens)
+            return text
+    @retry(
+        wait=wait_random_exponential(min=1, max=60),
+        stop=stop_after_attempt(6),
+        retry=retry_if_exception_type(RateLimitError),
+    )  # type: ignore
+    async def query_openrouter(self, max_tokens, temperature, messages) -> ChatCompletion:
+        try:
+            response = await self._client.chat.completions.create(
+                model=self._model,
+                messages=messages,
+                max_tokens=max_tokens,
+                temperature=temperature,
+            )
+            return response
+        except openai.RateLimitError as e:
+            logger.warning("Rate limit error encountered. Retrying...")
+            raise RateLimitError("OpenRouter API rate limit exceeded") from e
+        except openai.APIError as e:
+            logger.error("OpenRouter API error", error=str(e))
+            raise
+    def _update_request_times(self) -> None:
+        """
+        Update the list of request times, removing any that are older than one minute.
+        """
+        current_time = time.time()
+        self._request_times = [t for t in self._request_times if current_time - t < 60]
+        self._request_times.append(current_time)
+    async def _wait_for_rate_limit(self) -> None:
+        """
+        Wait if necessary to respect the rate limit.
+        """
+        while len(self._request_times) >= self._requests_per_minute:
+            current_time = time.time()
+            oldest_request_time = self._request_times[0]
+            if current_time - oldest_request_time < 60:
+                wait_time = 60 - (current_time - oldest_request_time)
+                logger.info(f"Rate limit reached. Waiting for {wait_time:.2f} seconds.")
+                await asyncio.sleep(wait_time)
+            self._request_times = [t for t in self._request_times if current_time - t < 60]
+        logger.debug(f"Requests in the last minute: {len(self._request_times)}")

src/vsp/shared/config.py CHANGED Viewed

@@ -93,6 +93,16 @@ def get_openai_api_key_path() -> str:
     return str(config["openai"]["openai_api_key_parameter_store_path"])
 @cache
 def get_linkedin_key_path() -> str:
     """

     return str(config["openai"]["openai_api_key_parameter_store_path"])
+@cache
+def get_openrouter_api_key_path() -> str:
+    """
+    Reads the OpenRouter API key path from the TOML configuration file.
+    Key is in AWS parameter store
+    """
+    config = _get_config()
+    return str(config["openrouter"]["openrouter_api_key_parameter_store_path"])
 @cache
 def get_linkedin_key_path() -> str:
     """

src/vsp/shared/config.toml CHANGED Viewed

@@ -8,5 +8,8 @@ bedrock_aws_account = "339713101814"
 [openai]
 openai_api_key_parameter_store_path = "/secrets/openai/api_key"
 [linkedin]
 linkedin_api_key_parameter_store_path = "/secrets/rapidapi/linkedin"

 [openai]
 openai_api_key_parameter_store_path = "/secrets/openai/api_key"
+[openrouter]
+openrouter_api_key_parameter_store_path = "/secrets/openrouter/api_key"
 [linkedin]
 linkedin_api_key_parameter_store_path = "/secrets/rapidapi/linkedin"

tests/vsp/llm/{__ini__.py → __init__.py} RENAMED Viewed

File without changes

tests/vsp/llm/openrouter/__init__.py ADDED Viewed

File without changes

tests/vsp/llm/openrouter/test_integration_openrouter.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import asyncio
+import pytest
+from vsp.llm.openrouter.openrouter import AsyncOpenRouterService
+from vsp.shared import logger_factory
+logger = logger_factory.get_logger(__name__)
+@pytest.mark.asyncio
+async def test_openrouter_integration():
+    """
+    Integration test for AsyncOpenRouterService.
+    This test makes an actual API call to OpenRouter using the
+    perplexity/llama-3.1-sonar-huge-128k-online model. It requires a valid
+    OpenRouter API key to be set in the AWS Parameter Store.
+    Note: This test should be run sparingly to avoid unnecessary API calls
+    and potential costs.
+    """
+    model = "nousresearch/hermes-3-llama-3.1-405b:free"
+    service = AsyncOpenRouterService(model)
+    async with service() as openrouter:
+        try:
+            response = await openrouter.invoke(
+                user_prompt="What is the capital of France?", max_tokens=100, temperature=0.7
+            )
+            # Log the response
+            logger.info("OpenRouter API Response", response=response)
+            # Assertions to verify the response
+            assert response is not None
+            assert isinstance(response, str)
+            assert len(response) > 0
+            assert "Paris" in response
+            logger.info("Integration test passed successfully")
+        except Exception as e:
+            logger.error("Integration test failed", error=str(e))
+            raise
+if __name__ == "__main__":
+    asyncio.run(test_openrouter_integration())