Spaces:
Sleeping
Sleeping
| import json | |
| from typing import Any, Dict, List | |
| from distilabel.steps.tasks.typing import ChatType | |
| from distilabel.steps.tasks.text_generation import TextGeneration | |
| from distilabel.steps import StepInput, StepOutput, Step | |
| from dotenv import load_dotenv | |
| from defaults import ( | |
| DEFAULT_DOMAIN, | |
| DEFAULT_PERSPECTIVES, | |
| DEFAULT_TOPICS, | |
| DEFAULT_EXAMPLES, | |
| DEFAULT_SYSTEM_PROMPT, | |
| N_PERSPECTIVES, | |
| N_TOPICS, | |
| N_EXAMPLES, | |
| ) | |
| load_dotenv() | |
| # Application description used for SelfInstruct | |
| APPLICATION_DESCRIPTION = f"""You are an AI assistant than generates queries around the domain of {DEFAULT_DOMAIN}. | |
| Your should not expect basic but profound questions from your users. | |
| The queries should reflect a diversity of vision and economic positions and political positions. | |
| The queries may know about different methods of {DEFAULT_DOMAIN}. | |
| The queries can be positioned politically, economically, socially, or practically. | |
| Also take into account the impact of diverse causes on diverse domains.""" | |
| TOPICS = DEFAULT_TOPICS[:N_TOPICS] | |
| PERSPECTIVES = DEFAULT_PERSPECTIVES[:N_PERSPECTIVES] | |
| EXAMPLES = DEFAULT_EXAMPLES[:N_EXAMPLES] | |
| def create_examples_template(examples: List[Dict[str, str]]) -> List[str]: | |
| questions = """ Examples of high quality questions:""" | |
| answers = """ Examples of high quality answers:""" | |
| for example in examples: | |
| questions += f"""\n- Question: {example["question"]}\n""" | |
| answers += f"""\n- Answer: {example["answer"]}\n""" | |
| _template: str = ( | |
| """{instruction}\nThis is the the instruction.\n Examples: """ | |
| + questions | |
| + answers | |
| ) | |
| return _template | |
| def create_topics(topics: List[str], positions: List[str]) -> List[str]: | |
| return [ | |
| f"{topic} from a {position} perspective" | |
| for topic in topics | |
| for position in positions | |
| ] | |
| class DomainExpert(TextGeneration): | |
| """A customized task to generate text as a domain expert in the domain of farming and agriculture.""" | |
| _system_prompt: (str) = DEFAULT_SYSTEM_PROMPT | |
| _template: str = """{instruction}\nThis is the the instruction.\n Examples: """ | |
| def format_input(self, input: Dict[str, Any]) -> "ChatType": | |
| return [ | |
| { | |
| "role": "system", | |
| "content": self._system_prompt, | |
| }, | |
| { | |
| "role": "user", | |
| "content": self._template.format(**input), | |
| }, | |
| ] | |
| class CleanNumberedList(Step): | |
| """A step to clean the numbered list of questions.""" | |
| def process(self, inputs: StepInput) -> StepOutput: | |
| import re | |
| pattern = r"^\d+\.\s" | |
| for input in inputs: | |
| input["question"] = re.sub(pattern, "", input["question"]) | |
| yield inputs | |