|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from typing import Any, Callable, List, Literal, Type, Union |
|
|
|
|
|
from pydantic import BaseModel |
|
|
|
|
|
from .base import BaseConverter |
|
|
|
|
|
|
|
|
class OutlinesConverter(BaseConverter): |
|
|
r"""OutlinesConverter is a class that converts a string or a function |
|
|
into a BaseModel schema. |
|
|
|
|
|
Args: |
|
|
model_type (str, optional): The model type to be used. |
|
|
platform (str, optional): The platform to be used. |
|
|
1. transformers |
|
|
2. mamba |
|
|
3. vllm |
|
|
4. llamacpp |
|
|
5. mlx |
|
|
(default: "transformers") |
|
|
**kwargs: The keyword arguments to be used. See the outlines |
|
|
documentation for more details. See |
|
|
https://dottxt-ai.github.io/outlines/latest/reference/models/models/ |
|
|
""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
model_type: str, |
|
|
platform: Literal[ |
|
|
"vllm", "transformers", "mamba", "llamacpp", "mlx" |
|
|
] = "transformers", |
|
|
**kwargs: Any, |
|
|
): |
|
|
self.model_type = model_type |
|
|
from outlines import models |
|
|
|
|
|
match platform: |
|
|
case "vllm": |
|
|
self._outlines_model = models.vllm(model_type, **kwargs) |
|
|
case "transformers": |
|
|
self._outlines_model = models.transformers( |
|
|
model_type, **kwargs |
|
|
) |
|
|
case "mamba": |
|
|
self._outlines_model = models.mamba(model_type, **kwargs) |
|
|
case "llamacpp": |
|
|
self._outlines_model = models.llamacpp(model_type, **kwargs) |
|
|
case "mlx": |
|
|
self._outlines_model = models.mlxlm(model_type, **kwargs) |
|
|
case _: |
|
|
raise ValueError(f"Unsupported platform: {platform}") |
|
|
|
|
|
def convert_regex(self, content: str, regex_pattern: str) -> str: |
|
|
r"""Convert the content to the specified regex pattern. |
|
|
|
|
|
Args: |
|
|
content (str): The content to be converted. |
|
|
regex_pattern (str): The regex pattern to be used. |
|
|
|
|
|
Returns: |
|
|
str: The converted content. |
|
|
""" |
|
|
import outlines |
|
|
|
|
|
regex_generator = outlines.generate.regex( |
|
|
self._outlines_model, regex_pattern |
|
|
) |
|
|
return regex_generator(content) |
|
|
|
|
|
def convert_json( |
|
|
self, |
|
|
content: str, |
|
|
output_schema: Union[str, Callable], |
|
|
) -> dict: |
|
|
r"""Convert the content to the specified JSON schema given by |
|
|
output_schema. |
|
|
|
|
|
Args: |
|
|
content (str): The content to be converted. |
|
|
output_schema (Union[str, Callable]): The expected format of the |
|
|
response. |
|
|
|
|
|
Returns: |
|
|
dict: The converted content in JSON format. |
|
|
""" |
|
|
import outlines |
|
|
|
|
|
json_generator = outlines.generate.json( |
|
|
self._outlines_model, output_schema |
|
|
) |
|
|
return json_generator(content) |
|
|
|
|
|
def convert_pydantic( |
|
|
self, |
|
|
content: str, |
|
|
output_schema: Type[BaseModel], |
|
|
) -> BaseModel: |
|
|
r"""Convert the content to the specified Pydantic schema. |
|
|
|
|
|
Args: |
|
|
content (str): The content to be converted. |
|
|
output_schema (Type[BaseModel]): The expected format of the |
|
|
response. |
|
|
|
|
|
Returns: |
|
|
BaseModel: The converted content in pydantic model format. |
|
|
""" |
|
|
import outlines |
|
|
|
|
|
json_generator = outlines.generate.json( |
|
|
self._outlines_model, output_schema |
|
|
) |
|
|
return json_generator(content) |
|
|
|
|
|
def convert_type(self, content: str, type_name: type) -> str: |
|
|
r"""Convert the content to the specified type. |
|
|
|
|
|
The following types are currently available: |
|
|
1. int |
|
|
2. float |
|
|
3. bool |
|
|
4. datetime.date |
|
|
5. datetime.time |
|
|
6. datetime.datetime |
|
|
7. custom types (https://dottxt-ai.github.io/outlines/latest/reference/generation/types/) |
|
|
|
|
|
Args: |
|
|
content (str): The content to be converted. |
|
|
type_name (type): The type to be used. |
|
|
|
|
|
Returns: |
|
|
str: The converted content. |
|
|
""" |
|
|
import outlines |
|
|
|
|
|
type_generator = outlines.generate.format( |
|
|
self._outlines_model, type_name |
|
|
) |
|
|
return type_generator(content) |
|
|
|
|
|
def convert_choice(self, content: str, choices: List[str]) -> str: |
|
|
r"""Convert the content to the specified choice. |
|
|
|
|
|
Args: |
|
|
content (str): The content to be converted. |
|
|
choices (List[str]): The choices to be used. |
|
|
|
|
|
Returns: |
|
|
str: The converted content. |
|
|
""" |
|
|
import outlines |
|
|
|
|
|
choices_generator = outlines.generate.choice( |
|
|
self._outlines_model, choices |
|
|
) |
|
|
return choices_generator(content) |
|
|
|
|
|
def convert_grammar(self, content: str, grammar: str) -> str: |
|
|
r"""Convert the content to the specified grammar. |
|
|
|
|
|
Args: |
|
|
content (str): The content to be converted. |
|
|
grammar (str): The grammar to be used. |
|
|
|
|
|
Returns: |
|
|
str: The converted content. |
|
|
""" |
|
|
import outlines |
|
|
|
|
|
grammar_generator = outlines.generate.cfg( |
|
|
self._outlines_model, grammar |
|
|
) |
|
|
return grammar_generator(content) |
|
|
|
|
|
def convert( |
|
|
self, |
|
|
content: str, |
|
|
type: Literal["regex", "json", "type", "choice", "grammar"], |
|
|
**kwargs, |
|
|
) -> Any: |
|
|
r"""Formats the input content into the expected BaseModel. |
|
|
|
|
|
Args: |
|
|
type (Literal["regex", "json", "type", "choice", "grammar"]): |
|
|
The type of conversion to perform. Options are: |
|
|
- "regex": Match the content against a regex pattern. |
|
|
- "pydantic": Convert the content into a pydantic model. |
|
|
- "json": Convert the content into a JSON based on a |
|
|
schema. |
|
|
- "type": Convert the content into a specified type. |
|
|
- "choice": Match the content against a list of valid |
|
|
choices. |
|
|
- "grammar": Convert the content using a specified grammar. |
|
|
content (str): The content to be formatted. |
|
|
**kwargs: Additional keyword arguments specific to the conversion |
|
|
type. |
|
|
|
|
|
- For "regex": |
|
|
regex_pattern (str): The regex pattern to use for matching. |
|
|
|
|
|
- For "pydantic": |
|
|
output_schema (Type[BaseModel]): The schema to validate and |
|
|
format the pydantic model. |
|
|
|
|
|
- For "json": |
|
|
output_schema (Union[str, Callable]): The schema to validate |
|
|
and format the JSON object. |
|
|
|
|
|
- For "type": |
|
|
type_name (str): The target type name for the conversion. |
|
|
|
|
|
- For "choice": |
|
|
choices (List[str]): A list of valid choices to match against. |
|
|
|
|
|
- For "grammar": |
|
|
grammar (str): The grammar definition to use for content |
|
|
conversion. |
|
|
""" |
|
|
match type: |
|
|
case "regex": |
|
|
return self.convert_regex(content, kwargs.get("regex_pattern")) |
|
|
case "pydantic": |
|
|
return self.convert_pydantic( |
|
|
content, kwargs.get("output_schema") |
|
|
) |
|
|
case "json": |
|
|
return self.convert_json(content, kwargs.get("output_schema")) |
|
|
case "type": |
|
|
return self.convert_type(content, kwargs.get("type_name")) |
|
|
case "choice": |
|
|
return self.convert_choice(content, kwargs.get("choices")) |
|
|
case "grammar": |
|
|
return self.convert_grammar(content, kwargs.get("grammar")) |
|
|
case _: |
|
|
raise ValueError("Unsupported output schema type") |
|
|
|