File size: 5,034 Bytes
3b993c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
from dataclasses import dataclass
from enum import Enum


@dataclass
class RateLimit:
    """
    Dataclass representing rate limit information for Bedrock models.

    Attributes:
        requests_per_minute (int): The maximum number of requests allowed per minute.
        tokens_per_minute (int | None): The maximum number of tokens allowed per minute, if applicable.
        regions (str): The AWS regions where this rate limit applies.
    """

    requests_per_minute: int
    tokens_per_minute: int | None
    regions: str


class BedrockModel(Enum):
    """
    Base enum class for Bedrock models.

    This class defines the interface for Bedrock model enums and provides
    a method to get rate limits for specific models.

    Methods:
        get_rate_limit(model: BedrockModel) -> RateLimit:
            Get the rate limit for a specific model.
    """

    @classmethod
    def get_rate_limit(cls, model: "BedrockModel") -> RateLimit:
        """
        Get the rate limit for a specific Bedrock model.

        Args:
            model (BedrockModel): The Bedrock model to get the rate limit for.

        Returns:
            RateLimit: The rate limit information for the specified model.
        """
        return cls._rate_limits()[model]

    @classmethod
    def _rate_limits(cls) -> dict["BedrockModel", RateLimit]:
        """
        Define the rate limits for each Bedrock model.

        This method must be implemented by subclasses.

        Returns:
            dict[BedrockModel, RateLimit]: A dictionary mapping each model to its rate limit.

        Raises:
            NotImplementedError: If not implemented by a subclass.
        """
        raise NotImplementedError("Subclasses must implement this method")


class AnthropicModel(BedrockModel):
    """
    Enum representing different Anthropic models available through Bedrock.

    This enum includes various versions of Claude models.
    """

    CLAUDE_3_OPUS = "anthropic.claude-3-opus-20240229-v1:0"
    CLAUDE_3_5_SONNET = "anthropic.claude-3-5-sonnet-20240620-v1:0"
    CLAUDE_3_SONNET = "anthropic.claude-3-sonnet-20240229-v1:0"
    CLAUDE_3_HAIKU = "anthropic.claude-3-haiku-20240307-v1:0"
    CLAUDE_INSTANT_1_2 = "anthropic.claude-instant-v1"

    @classmethod
    def _rate_limits(cls) -> dict[BedrockModel, RateLimit]:
        return {
            cls.CLAUDE_3_OPUS: RateLimit(50, 400_000, "All"),
            cls.CLAUDE_3_5_SONNET: RateLimit(50, 400_000, "All"),
            cls.CLAUDE_3_SONNET: RateLimit(
                500, 1_000_000, "US East (N. Virginia) (us-east-1), US West (Oregon) (us-west-2)"
            ),
            cls.CLAUDE_3_HAIKU: RateLimit(
                1000, 2_000_000, "US East (N. Virginia) (us-east-1), US West (Oregon) (us-west-2)"
            ),
            cls.CLAUDE_INSTANT_1_2: RateLimit(
                1000, 1_000_000, "US East (N. Virginia) (us-east-1), US West (Oregon) (us-west-2)"
            ),
        }


class MetaModel(BedrockModel):
    """
    Enum representing different Meta models available through Bedrock.

    This enum includes various versions of Llama models.
    """

    LLAMA_2_70B_CHAT = "meta.llama2-70b-chat-v1"
    LLAMA_2_13B_CHAT = "meta.llama2-13b-chat-v1"
    LLAMA_3_8B_INSTRUCT = "meta.llama3-8b-instruct-v1:0"
    LLAMA_3_70B_INSTRUCT = "meta.llama3-70b-instruct-v1:0"

    @classmethod
    def _rate_limits(cls) -> dict[BedrockModel, RateLimit]:
        return {
            cls.LLAMA_2_70B_CHAT: RateLimit(400, 300_000, "All"),
            cls.LLAMA_2_13B_CHAT: RateLimit(800, 300_000, "All"),
            cls.LLAMA_3_8B_INSTRUCT: RateLimit(800, 300_000, "All"),
            cls.LLAMA_3_70B_INSTRUCT: RateLimit(400, 300_000, "All"),
        }


class MistralModel(BedrockModel):
    """
    Enum representing different Mistral models available through Bedrock.

    This enum includes various versions of Mistral models.
    """

    MISTRAL_7B_INSTRUCT = "mistral.mistral-7b-instruct-v0:2"
    MIXTRAL_8X7B_INSTRUCT = "mistral.mixtral-8x7b-instruct-v0:1"
    MISTRAL_LARGE = "mistral.mistral-large-2402-v1:0"
    MISTRAL_SMALL = "mistral.mistral-small-2402-v1:0"

    @classmethod
    def _rate_limits(cls) -> dict[BedrockModel, RateLimit]:
        return {
            cls.MISTRAL_7B_INSTRUCT: RateLimit(800, 300_000, "All"),
            cls.MIXTRAL_8X7B_INSTRUCT: RateLimit(400, 300_000, "All"),
            cls.MISTRAL_LARGE: RateLimit(400, 300_000, "All"),
            cls.MISTRAL_SMALL: RateLimit(400, 300_000, "All"),
        }


# Function to get rate limit for any Bedrock model
def get_bedrock_model_rate_limit(model: BedrockModel) -> RateLimit:
    """
    Get the rate limit for a specific Bedrock model.

    This is a convenience function that calls the get_rate_limit method of the appropriate BedrockModel subclass.

    Args:
        model (BedrockModel): The Bedrock model to get the rate limit for.

    Returns:
        RateLimit: The rate limit information for the specified model.
    """
    return type(model).get_rate_limit(model)