File size: 5,034 Bytes
3b993c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
from dataclasses import dataclass
from enum import Enum
@dataclass
class RateLimit:
"""
Dataclass representing rate limit information for Bedrock models.
Attributes:
requests_per_minute (int): The maximum number of requests allowed per minute.
tokens_per_minute (int | None): The maximum number of tokens allowed per minute, if applicable.
regions (str): The AWS regions where this rate limit applies.
"""
requests_per_minute: int
tokens_per_minute: int | None
regions: str
class BedrockModel(Enum):
"""
Base enum class for Bedrock models.
This class defines the interface for Bedrock model enums and provides
a method to get rate limits for specific models.
Methods:
get_rate_limit(model: BedrockModel) -> RateLimit:
Get the rate limit for a specific model.
"""
@classmethod
def get_rate_limit(cls, model: "BedrockModel") -> RateLimit:
"""
Get the rate limit for a specific Bedrock model.
Args:
model (BedrockModel): The Bedrock model to get the rate limit for.
Returns:
RateLimit: The rate limit information for the specified model.
"""
return cls._rate_limits()[model]
@classmethod
def _rate_limits(cls) -> dict["BedrockModel", RateLimit]:
"""
Define the rate limits for each Bedrock model.
This method must be implemented by subclasses.
Returns:
dict[BedrockModel, RateLimit]: A dictionary mapping each model to its rate limit.
Raises:
NotImplementedError: If not implemented by a subclass.
"""
raise NotImplementedError("Subclasses must implement this method")
class AnthropicModel(BedrockModel):
"""
Enum representing different Anthropic models available through Bedrock.
This enum includes various versions of Claude models.
"""
CLAUDE_3_OPUS = "anthropic.claude-3-opus-20240229-v1:0"
CLAUDE_3_5_SONNET = "anthropic.claude-3-5-sonnet-20240620-v1:0"
CLAUDE_3_SONNET = "anthropic.claude-3-sonnet-20240229-v1:0"
CLAUDE_3_HAIKU = "anthropic.claude-3-haiku-20240307-v1:0"
CLAUDE_INSTANT_1_2 = "anthropic.claude-instant-v1"
@classmethod
def _rate_limits(cls) -> dict[BedrockModel, RateLimit]:
return {
cls.CLAUDE_3_OPUS: RateLimit(50, 400_000, "All"),
cls.CLAUDE_3_5_SONNET: RateLimit(50, 400_000, "All"),
cls.CLAUDE_3_SONNET: RateLimit(
500, 1_000_000, "US East (N. Virginia) (us-east-1), US West (Oregon) (us-west-2)"
),
cls.CLAUDE_3_HAIKU: RateLimit(
1000, 2_000_000, "US East (N. Virginia) (us-east-1), US West (Oregon) (us-west-2)"
),
cls.CLAUDE_INSTANT_1_2: RateLimit(
1000, 1_000_000, "US East (N. Virginia) (us-east-1), US West (Oregon) (us-west-2)"
),
}
class MetaModel(BedrockModel):
"""
Enum representing different Meta models available through Bedrock.
This enum includes various versions of Llama models.
"""
LLAMA_2_70B_CHAT = "meta.llama2-70b-chat-v1"
LLAMA_2_13B_CHAT = "meta.llama2-13b-chat-v1"
LLAMA_3_8B_INSTRUCT = "meta.llama3-8b-instruct-v1:0"
LLAMA_3_70B_INSTRUCT = "meta.llama3-70b-instruct-v1:0"
@classmethod
def _rate_limits(cls) -> dict[BedrockModel, RateLimit]:
return {
cls.LLAMA_2_70B_CHAT: RateLimit(400, 300_000, "All"),
cls.LLAMA_2_13B_CHAT: RateLimit(800, 300_000, "All"),
cls.LLAMA_3_8B_INSTRUCT: RateLimit(800, 300_000, "All"),
cls.LLAMA_3_70B_INSTRUCT: RateLimit(400, 300_000, "All"),
}
class MistralModel(BedrockModel):
"""
Enum representing different Mistral models available through Bedrock.
This enum includes various versions of Mistral models.
"""
MISTRAL_7B_INSTRUCT = "mistral.mistral-7b-instruct-v0:2"
MIXTRAL_8X7B_INSTRUCT = "mistral.mixtral-8x7b-instruct-v0:1"
MISTRAL_LARGE = "mistral.mistral-large-2402-v1:0"
MISTRAL_SMALL = "mistral.mistral-small-2402-v1:0"
@classmethod
def _rate_limits(cls) -> dict[BedrockModel, RateLimit]:
return {
cls.MISTRAL_7B_INSTRUCT: RateLimit(800, 300_000, "All"),
cls.MIXTRAL_8X7B_INSTRUCT: RateLimit(400, 300_000, "All"),
cls.MISTRAL_LARGE: RateLimit(400, 300_000, "All"),
cls.MISTRAL_SMALL: RateLimit(400, 300_000, "All"),
}
# Function to get rate limit for any Bedrock model
def get_bedrock_model_rate_limit(model: BedrockModel) -> RateLimit:
"""
Get the rate limit for a specific Bedrock model.
This is a convenience function that calls the get_rate_limit method of the appropriate BedrockModel subclass.
Args:
model (BedrockModel): The Bedrock model to get the rate limit for.
Returns:
RateLimit: The rate limit information for the specified model.
"""
return type(model).get_rate_limit(model)
|