rkihacker commited on
Commit
62c810f
·
verified ·
1 Parent(s): f3a7dbb

Update config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +47 -204
config.yaml CHANGED
@@ -1,223 +1,66 @@
1
- model_list:
2
- # --------------gemini-models--------------------
3
-
4
- - model_name: gg1/gemini-2.5-flash
5
- litellm_params:
6
- model: gemini/gemini-2.5-flash
7
- api_key: os.environ/GEMINI_API_KEY1
8
-
9
- - model_name: gg1/gemini-2.5-pro
10
- litellm_params:
11
- model: gemini/gemini-2.5-pro
12
- api_key: os.environ/GEMINI_API_KEY1
13
-
14
- - model_name: gg1/gemini-embedding-001
15
- litellm_params:
16
- model: gemini/gemini-embedding-001
17
- api_key: os.environ/GEMINI_API_KEY1
18
-
19
- - model_name: gg1/gemini-2.5-flash-lite
20
- litellm_params:
21
- model: gemini/gemini-2.5-flash-lite
22
- api_key: os.environ/GEMINI_API_KEY1
23
-
24
- # --------------vercel ai gateway-----------------
25
-
26
- - model_name: vercel1/gpt-5
27
- litellm_params:
28
- model: openai/openai/gpt-5
29
- api_base: os.environ/VERCELAI_BASE_URL
30
- api_key: os.environ/VERCELAI_API_KEY1
31
-
32
- - model_name: vercel1/claude-4-sonnet
33
- litellm_params:
34
- model: openai/anthropic/claude-4-sonnet
35
- api_base: os.environ/VERCELAI_BASE_URL
36
- api_key: os.environ/VERCELAI_API_KEY1
37
-
38
-
39
- - model_name: vercel1/claude-4-sonnet
40
- litellm_params:
41
- model: openai/anthropic/claude-4-sonnet
42
- api_base: os.environ/VERCELAI_BASE_URL
43
- api_key: os.environ/VERCELAI_API_KEY1
44
-
45
- - model_name: vercel1/claude-4-sonnet
46
- litellm_params:
47
- model: openai/anthropic/claude-4-sonnet
48
- api_base: os.environ/VERCELAI_BASE_URL
49
- api_key: os.environ/VERCELAI_API_KEY1
50
-
51
- - model_name: vercel1/claude-4-sonnet
52
- litellm_params:
53
- model: openai/anthropic/claude-4-sonnet
54
- api_base: os.environ/VERCELAI_BASE_URL
55
- api_key: os.environ/VERCELAI_API_KEY1
56
-
57
- - model_name: vercel1/claude-4-sonnet
58
- litellm_params:
59
- model: openai/anthropic/claude-4-sonnet
60
- api_base: os.environ/VERCELAI_BASE_URL
61
- api_key: os.environ/VERCELAI_API_KEY1
62
-
63
-
64
-
65
- # --------------openrouter(free)--------------------
66
-
67
- - model_name: or1/gpt-oss-20b
68
- litellm_params:
69
- model: openrouter/openai/gpt-oss-20b:free
70
- api_key: os.environ/OPENROUTER_API_KEY1
71
-
72
- - model_name: or1/deepseek-chat-v3.1:free
73
- litellm_params:
74
- model: openrouter/deepseek/deepseek-chat-v3.1:free
75
- api_key: os.environ/OPENROUTER_API_KEY1
76
-
77
- - model_name: or1/kimi-k2
78
- litellm_params:
79
- model: openrouter/moonshotai/kimi-k2:free
80
- api_key: os.environ/OPENROUTER_API_KEY1
81
-
82
- - model_name: or1/dolphin-mistral-24b-venice-edition
83
- litellm_params:
84
- model: openrouter/cognitivecomputations/dolphin-mistral-24b-venice-edition:free
85
- api_key: os.environ/OPENROUTER_API_KEY1
86
-
87
- - model_name: or1/deepseek-r1-0528
88
- litellm_params:
89
- model: openrouter/deepseek/deepseek-r1-0528:free
90
- api_key: os.environ/OPENROUTER_API_KEY1
91
-
92
-
93
-
94
- # --------------groq----------------------
95
 
96
- - model_name: gq1/llama-3.3-70b-versatile
97
- litellm_params:
98
- model: groq/llama-3.3-70b-versatile
99
- api_key: os.environ/GROQ_API_KEY1
100
-
101
- - model_name: gq1/kimi-k2-instruct-0905
102
- litellm_params:
103
- model: groq/moonshotai/kimi-k2-instruct-0905
104
- api_key: os.environ/GROQ_API_KEY1
105
-
106
- - model_name: gq1/qwen/qwen3-32b
107
- litellm_params:
108
- model: groq/qwen/qwen3-32b
109
- api_key: os.environ/GROQ_API_KEY1
110
-
111
- - model_name: gq1/gpt-oss-120b
112
- litellm_params:
113
- model: groq/openai/gpt-oss-120b
114
- api_key: os.environ/GROQ_API_KEY1
115
-
116
- - model_name: gq1/deepseek-r1-distill-llama-70b
117
- litellm_params:
118
- model: groq/deepseek-r1-distill-llama-70b
119
- api_key: os.environ/GROQ_API_KEY1
120
-
121
- - model_name: gq1/compound
122
- litellm_params:
123
- model: groq/groq/compound
124
- api_key: os.environ/GROQ_API_KEY1
125
-
126
- # --------------cerebras----------------------
127
-
128
- - model_name: cb/qwen-3-235b-a22b-instruct-2507
129
- litellm_params:
130
- model: cerebras/qwen-3-235b-a22b-instruct-2507
131
- api_key: os.environ/CEREBRAS_API_KEY
132
-
133
- - model_name: cb/qwen-3-235b-a22b-thinking-2507
134
- litellm_params:
135
- model: cerebras/qwen-3-235b-a22b-thinking-2507
136
- api_key: os.environ/CEREBRAS_API_KEY
137
-
138
- - model_name: cb/gpt-oss-120b
139
- litellm_params:
140
- model: cerebras/gpt-oss-120b
141
- api_key: os.environ/CEREBRAS_API_KEY
142
-
143
- # --------------openai--------------------
144
-
145
- - model_name: op1/gpt-5-mini
146
- litellm_params:
147
- model: openai/gpt-5-mini
148
- api_key: os.environ/OPENAI_API_KEY1
149
-
150
- - model_name: op1/gpt-4.1-mini
151
- litellm_params:
152
- model: openai/gpt-4.1-mini
153
- api_key: os.environ/OPENAI_API_KEY1
154
-
155
- - model_name: op1/text-embedding-3-large
156
- litellm_params:
157
- model: openai/text-embedding-3-large
158
- api_key: os.environ/OPENAI_API_KEY1
159
-
160
- - model_name: op1/text-embedding-3-small
161
- litellm_params:
162
- model: openai/text-embedding-3-small
163
- api_key: os.environ/OPENAI_API_KEY1
164
-
165
- # --------------cohere--------------------
166
-
167
- - model_name: ch/command-r-plus
168
- litellm_params:
169
- model: cohere/command-r-plus-08-2024
170
- api_key: os.environ/COHERE_API_KEY
171
-
172
- - model_name: ch/command-a
173
- litellm_params:
174
- model: cohere/command-a-03-2025
175
- api_key: os.environ/COHERE_API_KEY
176
 
177
- - model_name: ch/embed-english
 
178
  litellm_params:
179
- model: cohere/embed-english-v3.0
180
- api_key: os.environ/COHERE_API_KEY
181
 
182
- - model_name: ch/embed-english-light
 
183
  litellm_params:
184
- model: cohere/embed-english-light-v3.0
185
- api_key: os.environ/COHERE_API_KEY
186
 
187
- - model_name: ch/embed-multilingual
 
188
  litellm_params:
189
- model: cohere/embed-multilingual-v3.0
190
- api_key: os.environ/COHERE_API_KEY
191
 
192
- - model_name: ch/embed-multilingual-light
 
193
  litellm_params:
194
- model: cohere/embed-multilingual-light-v3.0
195
- api_key: os.environ/COHERE_API_KEY
196
-
197
- - model_name: ch/rerank-english
 
198
  litellm_params:
199
- model: cohere/rerank-english-v3.0
200
- api_key: os.environ/COHERE_API_KEY
201
 
202
- # --------------Other Settings--------------------
203
 
204
  litellm_settings:
205
  # Networking settings
206
- request_timeout: 20 # (int) llm request timeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
 
207
  num_retries: 3
208
- fallbacks: [{ "gemini-1.5-pro": ["gemini-1.5-flash"] }]
209
- allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
210
- cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
211
  drop_params: true
 
 
212
 
213
- general_settings:
214
- master_key: os.environ/MASTER_KEY # sk-1234 # [OPTIONAL] Only use this if you require all calls to contain this key (Authorization: Bearer sk-1234)
215
 
216
  router_settings:
217
- fallbacks:
218
- [
219
- { "or/gemini-2.0-flash-exp": ["gg1/gemini-2.0-flash-exp"] },
220
- { "gpt-3.5-turbo": ["gemini-1.5-flash"] },
221
- ]
222
- model_group_alias: { "gpt-4": "gemini-1.5-pro" }
223
- routing_strategy: simple-shuffle
 
 
 
 
 
 
1
+ # config.yaml for LiteLLM - Replicate Models Only
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ model_list:
4
+ # -------------- Replicate Models --------------------
5
+ # Add all your Replicate models here.
6
+ # The 'model' format is 'replicate/<owner>/<model-name>:<version-id>'
7
+ # You can find the full model path on replicate.com
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # Your example model:
10
+ - model_name: claude-4.5-sonnet
11
  litellm_params:
12
+ model: replicate/anthropic/claude-4.5-sonnet # Note: Check if this specific model path is available on Replicate
13
+ api_key: os.environ/REPLICATE_API_KEY
14
 
15
+ # Example: Llama 3 70B Instruct
16
+ - model_name: rep/llama-3-70b
17
  litellm_params:
18
+ model: replicate/meta/meta-llama-3-70b-instruct
19
+ api_key: os.environ/REPLICATE_API_KEY
20
 
21
+ # Example: Mixtral 8x7B Instruct
22
+ - model_name: rep/mixtral-8x7b
23
  litellm_params:
24
+ model: replicate/mistralai/mixtral-8x7b-instruct-v0.1
25
+ api_key: os.environ/REPLICATE_API_KEY
26
 
27
+ # Example: Claude 3 Haiku
28
+ - model_name: rep/claude-3-haiku
29
  litellm_params:
30
+ model: replicate/anthropic/claude-3-haiku-20240307
31
+ api_key: os.environ/REPLICATE_API_KEY
32
+
33
+ # Example: Stable Diffusion XL (Image Model)
34
+ - model_name: rep/sdxl
35
  litellm_params:
36
+ model: replicate/stability-ai/sdxl
37
+ api_key: os.environ/REPLICATE_API_KEY
38
 
39
+ # -------------- LiteLLM Settings --------------------
40
 
41
  litellm_settings:
42
  # Networking settings
43
+ # Replicate models can have cold starts, so a high timeout is recommended.
44
+ request_timeout: 600 # (int) llm request timeout in seconds (e.g., 10 minutes)
45
  num_retries: 3
46
+ allowed_fails: 3 # cooldown model if it fails > 3 calls in a minute.
47
+ cooldown_time: 30 # how long to cooldown model
 
48
  drop_params: true
49
+ # You could add fallbacks between replicate models if desired, e.g.:
50
+ # fallbacks: [{ "rep/llama-3-70b": ["rep/mixtral-8x7b"] }]
51
 
52
+ # -------------- Router Settings --------------------
 
53
 
54
  router_settings:
55
+ # Provider-specific fallbacks and aliases from your original config have been removed.
56
+ routing_strategy: simple-shuffle # Keeps routing strategy from your original config
57
+ # You can add Replicate-specific fallbacks here if needed:
58
+ # fallbacks:
59
+ # [
60
+ # { "rep/llama-3-70b": ["rep/mixtral-8x7b"] },
61
+ # ]
62
+
63
+ # -------------- General Settings --------------------
64
+
65
+ general_settings:
66
+ master_key: os.environ/MASTER_KEY # [RECOMMENDED] Secure your proxy with a master key