litellmprv

Runtime error

App Files Files Community

rkihacker commited on 25 days ago

Commit

62c810f

verified ·

1 Parent(s): f3a7dbb

Update config.yaml

Browse files

Files changed (1) hide show

config.yaml +47 -204

config.yaml CHANGED Viewed

@@ -1,223 +1,66 @@
-model_list:
-  # --------------gemini-models--------------------
-  - model_name: gg1/gemini-2.5-flash
-    litellm_params:
-      model: gemini/gemini-2.5-flash
-      api_key: os.environ/GEMINI_API_KEY1
-  - model_name: gg1/gemini-2.5-pro
-    litellm_params:
-      model: gemini/gemini-2.5-pro
-      api_key: os.environ/GEMINI_API_KEY1
-  - model_name: gg1/gemini-embedding-001
-    litellm_params:
-      model: gemini/gemini-embedding-001
-      api_key: os.environ/GEMINI_API_KEY1
-  - model_name: gg1/gemini-2.5-flash-lite
-    litellm_params:
-      model: gemini/gemini-2.5-flash-lite
-      api_key: os.environ/GEMINI_API_KEY1
-  # --------------vercel ai gateway-----------------
-  - model_name: vercel1/gpt-5
-    litellm_params:
-      model: openai/openai/gpt-5
-      api_base: os.environ/VERCELAI_BASE_URL
-      api_key: os.environ/VERCELAI_API_KEY1
-  - model_name: vercel1/claude-4-sonnet
-    litellm_params:
-      model: openai/anthropic/claude-4-sonnet
-      api_base: os.environ/VERCELAI_BASE_URL
-      api_key: os.environ/VERCELAI_API_KEY1
-  - model_name: vercel1/claude-4-sonnet
-    litellm_params:
-      model: openai/anthropic/claude-4-sonnet
-      api_base: os.environ/VERCELAI_BASE_URL
-      api_key: os.environ/VERCELAI_API_KEY1
-  - model_name: vercel1/claude-4-sonnet
-    litellm_params:
-      model: openai/anthropic/claude-4-sonnet
-      api_base: os.environ/VERCELAI_BASE_URL
-      api_key: os.environ/VERCELAI_API_KEY1
-  - model_name: vercel1/claude-4-sonnet
-    litellm_params:
-      model: openai/anthropic/claude-4-sonnet
-      api_base: os.environ/VERCELAI_BASE_URL
-      api_key: os.environ/VERCELAI_API_KEY1
-  - model_name: vercel1/claude-4-sonnet
-    litellm_params:
-      model: openai/anthropic/claude-4-sonnet
-      api_base: os.environ/VERCELAI_BASE_URL
-      api_key: os.environ/VERCELAI_API_KEY1
-  # --------------openrouter(free)--------------------
-  - model_name: or1/gpt-oss-20b
-    litellm_params:
-      model: openrouter/openai/gpt-oss-20b:free
-      api_key: os.environ/OPENROUTER_API_KEY1
-  - model_name: or1/deepseek-chat-v3.1:free
-    litellm_params:
-      model: openrouter/deepseek/deepseek-chat-v3.1:free
-      api_key: os.environ/OPENROUTER_API_KEY1
-  - model_name: or1/kimi-k2
-    litellm_params:
-      model: openrouter/moonshotai/kimi-k2:free
-      api_key: os.environ/OPENROUTER_API_KEY1
-  - model_name: or1/dolphin-mistral-24b-venice-edition
-    litellm_params:
-      model: openrouter/cognitivecomputations/dolphin-mistral-24b-venice-edition:free
-      api_key: os.environ/OPENROUTER_API_KEY1
-  - model_name: or1/deepseek-r1-0528
-    litellm_params:
-      model: openrouter/deepseek/deepseek-r1-0528:free
-      api_key: os.environ/OPENROUTER_API_KEY1
-  # --------------groq----------------------
-  - model_name: gq1/llama-3.3-70b-versatile
-    litellm_params:
-      model: groq/llama-3.3-70b-versatile
-      api_key: os.environ/GROQ_API_KEY1
-  - model_name: gq1/kimi-k2-instruct-0905
-    litellm_params:
-      model: groq/moonshotai/kimi-k2-instruct-0905
-      api_key: os.environ/GROQ_API_KEY1
-  - model_name: gq1/qwen/qwen3-32b
-    litellm_params:
-      model: groq/qwen/qwen3-32b
-      api_key: os.environ/GROQ_API_KEY1
-  - model_name: gq1/gpt-oss-120b
-    litellm_params:
-      model: groq/openai/gpt-oss-120b
-      api_key: os.environ/GROQ_API_KEY1
-  - model_name: gq1/deepseek-r1-distill-llama-70b
-    litellm_params:
-      model: groq/deepseek-r1-distill-llama-70b
-      api_key: os.environ/GROQ_API_KEY1
-  - model_name: gq1/compound
-    litellm_params:
-      model: groq/groq/compound
-      api_key: os.environ/GROQ_API_KEY1
-  # --------------cerebras----------------------
-  - model_name: cb/qwen-3-235b-a22b-instruct-2507
-    litellm_params:
-      model: cerebras/qwen-3-235b-a22b-instruct-2507
-      api_key: os.environ/CEREBRAS_API_KEY
-  - model_name: cb/qwen-3-235b-a22b-thinking-2507
-    litellm_params:
-      model: cerebras/qwen-3-235b-a22b-thinking-2507
-      api_key: os.environ/CEREBRAS_API_KEY
-  - model_name: cb/gpt-oss-120b
-    litellm_params:
-      model: cerebras/gpt-oss-120b
-      api_key: os.environ/CEREBRAS_API_KEY
-  # --------------openai--------------------
-  - model_name: op1/gpt-5-mini
-    litellm_params:
-      model: openai/gpt-5-mini
-      api_key: os.environ/OPENAI_API_KEY1
-  - model_name: op1/gpt-4.1-mini
-    litellm_params:
-      model: openai/gpt-4.1-mini
-      api_key: os.environ/OPENAI_API_KEY1
-  - model_name: op1/text-embedding-3-large
-    litellm_params:
-      model: openai/text-embedding-3-large
-      api_key: os.environ/OPENAI_API_KEY1
-  - model_name: op1/text-embedding-3-small
-    litellm_params:
-      model: openai/text-embedding-3-small
-      api_key: os.environ/OPENAI_API_KEY1
-  # --------------cohere--------------------
-  - model_name: ch/command-r-plus
-    litellm_params:
-      model: cohere/command-r-plus-08-2024
-      api_key: os.environ/COHERE_API_KEY
-  - model_name: ch/command-a
-    litellm_params:
-      model: cohere/command-a-03-2025
-      api_key: os.environ/COHERE_API_KEY
-  - model_name: ch/embed-english
     litellm_params:
-      model: cohere/embed-english-v3.0
-      api_key: os.environ/COHERE_API_KEY
-  - model_name: ch/embed-english-light
     litellm_params:
-      model: cohere/embed-english-light-v3.0
-      api_key: os.environ/COHERE_API_KEY
-  - model_name: ch/embed-multilingual
     litellm_params:
-      model: cohere/embed-multilingual-v3.0
-      api_key: os.environ/COHERE_API_KEY
-  - model_name: ch/embed-multilingual-light
     litellm_params:
-      model: cohere/embed-multilingual-light-v3.0
-      api_key: os.environ/COHERE_API_KEY
-  - model_name: ch/rerank-english
     litellm_params:
-      model: cohere/rerank-english-v3.0
-      api_key: os.environ/COHERE_API_KEY
-# --------------Other Settings--------------------
 litellm_settings:
   # Networking settings
-  request_timeout: 20 # (int) llm request timeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
   num_retries: 3
-  fallbacks: [{ "gemini-1.5-pro": ["gemini-1.5-flash"] }]
-  allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
-  cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
   drop_params: true
-general_settings:
-  master_key: os.environ/MASTER_KEY # sk-1234 # [OPTIONAL] Only use this if you require all calls to contain this key (Authorization: Bearer sk-1234)
 router_settings:
-  fallbacks:
-    [
-      { "or/gemini-2.0-flash-exp": ["gg1/gemini-2.0-flash-exp"] },
-      { "gpt-3.5-turbo": ["gemini-1.5-flash"] },
-    ]
-  model_group_alias: { "gpt-4": "gemini-1.5-pro" }
-  routing_strategy: simple-shuffle

+# config.yaml for LiteLLM - Replicate Models Only
+model_list:
+  # -------------- Replicate Models --------------------
+  # Add all your Replicate models here.
+  # The 'model' format is 'replicate/<owner>/<model-name>:<version-id>'
+  # You can find the full model path on replicate.com
+  # Your example model:
+  - model_name: claude-4.5-sonnet
     litellm_params:
+      model: replicate/anthropic/claude-4.5-sonnet # Note: Check if this specific model path is available on Replicate
+      api_key: os.environ/REPLICATE_API_KEY
+  # Example: Llama 3 70B Instruct
+  - model_name: rep/llama-3-70b
     litellm_params:
+      model: replicate/meta/meta-llama-3-70b-instruct
+      api_key: os.environ/REPLICATE_API_KEY
+  # Example: Mixtral 8x7B Instruct
+  - model_name: rep/mixtral-8x7b
     litellm_params:
+      model: replicate/mistralai/mixtral-8x7b-instruct-v0.1
+      api_key: os.environ/REPLICATE_API_KEY
+  # Example: Claude 3 Haiku
+  - model_name: rep/claude-3-haiku
     litellm_params:
+      model: replicate/anthropic/claude-3-haiku-20240307
+      api_key: os.environ/REPLICATE_API_KEY
+  # Example: Stable Diffusion XL (Image Model)
+  - model_name: rep/sdxl
     litellm_params:
+      model: replicate/stability-ai/sdxl
+      api_key: os.environ/REPLICATE_API_KEY
+# -------------- LiteLLM Settings --------------------
 litellm_settings:
   # Networking settings
+  # Replicate models can have cold starts, so a high timeout is recommended.
+  request_timeout: 600 # (int) llm request timeout in seconds (e.g., 10 minutes)
   num_retries: 3
+  allowed_fails: 3 # cooldown model if it fails > 3 calls in a minute.
+  cooldown_time: 30 # how long to cooldown model
   drop_params: true
+  # You could add fallbacks between replicate models if desired, e.g.:
+  # fallbacks: [{ "rep/llama-3-70b": ["rep/mixtral-8x7b"] }]
+# -------------- Router Settings --------------------
 router_settings:
+  # Provider-specific fallbacks and aliases from your original config have been removed.
+  routing_strategy: simple-shuffle # Keeps routing strategy from your original config
+  # You can add Replicate-specific fallbacks here if needed:
+  # fallbacks:
+  #   [
+  #     { "rep/llama-3-70b": ["rep/mixtral-8x7b"] },
+  #   ]
+# -------------- General Settings --------------------
+general_settings:
+  master_key: os.environ/MASTER_KEY # [RECOMMENDED] Secure your proxy with a master key