Spaces:

Mqleet
/

AutoPage

Running

App Files Files Community

AutoPage / camel /schemas /outlines_converter.py

Mqleet

upd code

fcaa164 30 days ago

raw

history blame

8.74 kB

	# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========

	from typing import Any, Callable, List, Literal, Type, Union

	from pydantic import BaseModel

	from .base import BaseConverter


	class OutlinesConverter(BaseConverter):
	r"""OutlinesConverter is a class that converts a string or a function
	into a BaseModel schema.

	Args:
	model_type (str, optional): The model type to be used.
	platform (str, optional): The platform to be used.
	1. transformers
	2. mamba
	3. vllm
	4. llamacpp
	5. mlx
	(default: "transformers")
	**kwargs: The keyword arguments to be used. See the outlines
	documentation for more details. See
	https://dottxt-ai.github.io/outlines/latest/reference/models/models/
	"""

	def __init__(
	self,
	model_type: str,
	platform: Literal[
	"vllm", "transformers", "mamba", "llamacpp", "mlx"
	] = "transformers",
	**kwargs: Any,
	):
	self.model_type = model_type
	from outlines import models

	match platform:
	case "vllm":
	self._outlines_model = models.vllm(model_type, **kwargs)
	case "transformers":
	self._outlines_model = models.transformers(
	model_type, **kwargs
	)
	case "mamba":
	self._outlines_model = models.mamba(model_type, **kwargs)
	case "llamacpp":
	self._outlines_model = models.llamacpp(model_type, **kwargs)
	case "mlx":
	self._outlines_model = models.mlxlm(model_type, **kwargs)
	case _:
	raise ValueError(f"Unsupported platform: {platform}")

	def convert_regex(self, content: str, regex_pattern: str) -> str:
	r"""Convert the content to the specified regex pattern.

	Args:
	content (str): The content to be converted.
	regex_pattern (str): The regex pattern to be used.

	Returns:
	str: The converted content.
	"""
	import outlines

	regex_generator = outlines.generate.regex(
	self._outlines_model, regex_pattern
	)
	return regex_generator(content)

	def convert_json(
	self,
	content: str,
	output_schema: Union[str, Callable],
	) -> dict:
	r"""Convert the content to the specified JSON schema given by
	output_schema.

	Args:
	content (str): The content to be converted.
	output_schema (Union[str, Callable]): The expected format of the
	response.

	Returns:
	dict: The converted content in JSON format.
	"""
	import outlines

	json_generator = outlines.generate.json(
	self._outlines_model, output_schema
	)
	return json_generator(content)

	def convert_pydantic(
	self,
	content: str,
	output_schema: Type[BaseModel],
	) -> BaseModel:
	r"""Convert the content to the specified Pydantic schema.

	Args:
	content (str): The content to be converted.
	output_schema (Type[BaseModel]): The expected format of the
	response.

	Returns:
	BaseModel: The converted content in pydantic model format.
	"""
	import outlines

	json_generator = outlines.generate.json(
	self._outlines_model, output_schema
	)
	return json_generator(content)

	def convert_type(self, content: str, type_name: type) -> str:
	r"""Convert the content to the specified type.

	The following types are currently available:
	1. int
	2. float
	3. bool
	4. datetime.date
	5. datetime.time
	6. datetime.datetime
	7. custom types (https://dottxt-ai.github.io/outlines/latest/reference/generation/types/)

	Args:
	content (str): The content to be converted.
	type_name (type): The type to be used.

	Returns:
	str: The converted content.
	"""
	import outlines

	type_generator = outlines.generate.format(
	self._outlines_model, type_name
	)
	return type_generator(content)

	def convert_choice(self, content: str, choices: List[str]) -> str:
	r"""Convert the content to the specified choice.

	Args:
	content (str): The content to be converted.
	choices (List[str]): The choices to be used.

	Returns:
	str: The converted content.
	"""
	import outlines

	choices_generator = outlines.generate.choice(
	self._outlines_model, choices
	)
	return choices_generator(content)

	def convert_grammar(self, content: str, grammar: str) -> str:
	r"""Convert the content to the specified grammar.

	Args:
	content (str): The content to be converted.
	grammar (str): The grammar to be used.

	Returns:
	str: The converted content.
	"""
	import outlines

	grammar_generator = outlines.generate.cfg(
	self._outlines_model, grammar
	)
	return grammar_generator(content)

	def convert( # type: ignore[override]
	self,
	content: str,
	type: Literal["regex", "json", "type", "choice", "grammar"],
	**kwargs,
	) -> Any:
	r"""Formats the input content into the expected BaseModel.

	Args:
	type (Literal["regex", "json", "type", "choice", "grammar"]):
	The type of conversion to perform. Options are:
	- "regex": Match the content against a regex pattern.
	- "pydantic": Convert the content into a pydantic model.
	- "json": Convert the content into a JSON based on a
	schema.
	- "type": Convert the content into a specified type.
	- "choice": Match the content against a list of valid
	choices.
	- "grammar": Convert the content using a specified grammar.
	content (str): The content to be formatted.
	**kwargs: Additional keyword arguments specific to the conversion
	type.

	- For "regex":
	regex_pattern (str): The regex pattern to use for matching.

	- For "pydantic":
	output_schema (Type[BaseModel]): The schema to validate and
	format the pydantic model.

	- For "json":
	output_schema (Union[str, Callable]): The schema to validate
	and format the JSON object.

	- For "type":
	type_name (str): The target type name for the conversion.

	- For "choice":
	choices (List[str]): A list of valid choices to match against.

	- For "grammar":
	grammar (str): The grammar definition to use for content
	conversion.
	"""
	match type:
	case "regex":
	return self.convert_regex(content, kwargs.get("regex_pattern")) # type: ignore[arg-type]
	case "pydantic":
	return self.convert_pydantic(
	content, kwargs.get("output_schema")
	) # type: ignore[arg-type]
	case "json":
	return self.convert_json(content, kwargs.get("output_schema")) # type: ignore[arg-type]
	case "type":
	return self.convert_type(content, kwargs.get("type_name")) # type: ignore[arg-type]
	case "choice":
	return self.convert_choice(content, kwargs.get("choices")) # type: ignore[arg-type]
	case "grammar":
	return self.convert_grammar(content, kwargs.get("grammar")) # type: ignore[arg-type]
	case _:
	raise ValueError("Unsupported output schema type")