litellmprv

Paused

File size: 2,372 Bytes

# config.yaml for LiteLLM - Replicate Models Only

model_list:
  # -------------- Replicate Models --------------------
  # Add all your Replicate models here.
  # The 'model' format is 'replicate/<owner>/<model-name>:<version-id>'
  # You can find the full model path on replicate.com

  # Your example model:
  - model_name: claude-4.5-sonnet
    litellm_params:
      model: replicate/anthropic/claude-4.5-sonnet # Note: Check if this specific model path is available on Replicate
      api_key: os.environ/REPLICATE_API_KEY

  # Example: Llama 3 70B Instruct
  - model_name: rep/llama-3-70b
    litellm_params:
      model: replicate/meta/meta-llama-3-70b-instruct
      api_key: os.environ/REPLICATE_API_KEY

  # Example: Mixtral 8x7B Instruct
  - model_name: rep/mixtral-8x7b
    litellm_params:
      model: replicate/mistralai/mixtral-8x7b-instruct-v0.1
      api_key: os.environ/REPLICATE_API_KEY

  # Example: Claude 3 Haiku
  - model_name: rep/claude-3-haiku
    litellm_params:
      model: replicate/anthropic/claude-3-haiku-20240307
      api_key: os.environ/REPLICATE_API_KEY
  
  # Example: Stable Diffusion XL (Image Model)
  - model_name: rep/sdxl
    litellm_params:
      model: replicate/stability-ai/sdxl
      api_key: os.environ/REPLICATE_API_KEY

# -------------- LiteLLM Settings --------------------

litellm_settings:
  # Networking settings
  # Replicate models can have cold starts, so a high timeout is recommended.
  request_timeout: 600 # (int) llm request timeout in seconds (e.g., 10 minutes)
  num_retries: 3
  allowed_fails: 3 # cooldown model if it fails > 3 calls in a minute.
  cooldown_time: 30 # how long to cooldown model
  drop_params: true
  # You could add fallbacks between replicate models if desired, e.g.:
  # fallbacks: [{ "rep/llama-3-70b": ["rep/mixtral-8x7b"] }]

# -------------- Router Settings --------------------

router_settings:
  # Provider-specific fallbacks and aliases from your original config have been removed.
  routing_strategy: simple-shuffle # Keeps routing strategy from your original config
  # You can add Replicate-specific fallbacks here if needed:
  # fallbacks:
  #   [
  #     { "rep/llama-3-70b": ["rep/mixtral-8x7b"] },
  #   ]

# -------------- General Settings --------------------

general_settings:
  master_key: os.environ/MASTER_KEY # [RECOMMENDED] Secure your proxy with a master key