File size: 2,372 Bytes
62c810f
7e9c324
62c810f
 
 
 
 
88510a3
62c810f
 
7195864
62c810f
 
7195864
62c810f
 
7195864
62c810f
 
7195864
62c810f
 
7195864
62c810f
 
7195864
62c810f
 
7195864
62c810f
 
 
 
 
7195864
62c810f
 
7195864
62c810f
c5a9402
af413b3
 
62c810f
 
c5a9402
62c810f
 
5931289
62c810f
 
c5a9402
62c810f
88aceaf
 
62c810f
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# config.yaml for LiteLLM - Replicate Models Only

model_list:
  # -------------- Replicate Models --------------------
  # Add all your Replicate models here.
  # The 'model' format is 'replicate/<owner>/<model-name>:<version-id>'
  # You can find the full model path on replicate.com

  # Your example model:
  - model_name: claude-4.5-sonnet
    litellm_params:
      model: replicate/anthropic/claude-4.5-sonnet # Note: Check if this specific model path is available on Replicate
      api_key: os.environ/REPLICATE_API_KEY

  # Example: Llama 3 70B Instruct
  - model_name: rep/llama-3-70b
    litellm_params:
      model: replicate/meta/meta-llama-3-70b-instruct
      api_key: os.environ/REPLICATE_API_KEY

  # Example: Mixtral 8x7B Instruct
  - model_name: rep/mixtral-8x7b
    litellm_params:
      model: replicate/mistralai/mixtral-8x7b-instruct-v0.1
      api_key: os.environ/REPLICATE_API_KEY

  # Example: Claude 3 Haiku
  - model_name: rep/claude-3-haiku
    litellm_params:
      model: replicate/anthropic/claude-3-haiku-20240307
      api_key: os.environ/REPLICATE_API_KEY
  
  # Example: Stable Diffusion XL (Image Model)
  - model_name: rep/sdxl
    litellm_params:
      model: replicate/stability-ai/sdxl
      api_key: os.environ/REPLICATE_API_KEY

# -------------- LiteLLM Settings --------------------

litellm_settings:
  # Networking settings
  # Replicate models can have cold starts, so a high timeout is recommended.
  request_timeout: 600 # (int) llm request timeout in seconds (e.g., 10 minutes)
  num_retries: 3
  allowed_fails: 3 # cooldown model if it fails > 3 calls in a minute.
  cooldown_time: 30 # how long to cooldown model
  drop_params: true
  # You could add fallbacks between replicate models if desired, e.g.:
  # fallbacks: [{ "rep/llama-3-70b": ["rep/mixtral-8x7b"] }]

# -------------- Router Settings --------------------

router_settings:
  # Provider-specific fallbacks and aliases from your original config have been removed.
  routing_strategy: simple-shuffle # Keeps routing strategy from your original config
  # You can add Replicate-specific fallbacks here if needed:
  # fallbacks:
  #   [
  #     { "rep/llama-3-70b": ["rep/mixtral-8x7b"] },
  #   ]

# -------------- General Settings --------------------

general_settings:
  master_key: os.environ/MASTER_KEY # [RECOMMENDED] Secure your proxy with a master key