Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| from defaults import ( | |
| PROJECT_NAME, | |
| ARGILLA_URL, | |
| DIBT_PARENT_APP_URL, | |
| DATASET_URL, | |
| DATASET_REPO_ID, | |
| ) | |
| def project_sidebar(): | |
| if PROJECT_NAME == "DEFAULT_DOMAIN": | |
| st.warning( | |
| "Please set up the project configuration in the parent app before proceeding." | |
| ) | |
| st.stop() | |
| st.sidebar.subheader(f"A Data Growing Project in the domain of {PROJECT_NAME}") | |
| st.sidebar.markdown( | |
| """ | |
| This space helps you create a dataset seed for building diverse domain-specific datasets for aligning models. | |
| """ | |
| ) | |
| st.sidebar.link_button(f"π Dataset Repo", DATASET_URL) | |
| st.sidebar.link_button(f"π€ Argilla Space", ARGILLA_URL) | |
| hub_username = DATASET_REPO_ID.split("/")[0] | |
| project_name = DATASET_REPO_ID.split("/")[1] | |
| st.session_state["project_name"] = project_name | |
| st.session_state["hub_username"] = hub_username | |
| st.session_state["hub_token"] = st.sidebar.text_input( | |
| "Hub Token", type="password", value=os.environ.get("HF_TOKEN", None) | |
| ) | |
| if st.session_state["hub_token"] is not None: | |
| os.environ["HF_TOKEN"] = st.session_state["hub_token"] | |
| st.sidebar.link_button( | |
| "π€ Get your Hub Token", "https://huggingface.co/settings/tokens" | |
| ) | |
| if all( | |
| ( | |
| st.session_state.get("project_name"), | |
| st.session_state.get("hub_username"), | |
| st.session_state.get("hub_token"), | |
| ) | |
| ): | |
| st.success(f"Using the dataset repo {hub_username}/{project_name} on the Hub") | |
| st.sidebar.divider() | |
| st.sidebar.link_button("π§βπΎ New Project", DIBT_PARENT_APP_URL) | |
| if st.session_state["hub_token"] is None: | |
| st.error("Please provide a Hub token to generate answers") | |
| st.stop() | |
| def create_seed_terms(topics: list[str], perspectives: list[str]) -> list[str]: | |
| """Create seed terms for self intruct to start from.""" | |
| return [ | |
| f"{topic} from a {perspective} perspective" | |
| for topic in topics | |
| for perspective in perspectives | |
| ] | |
| def create_application_instruction( | |
| domain: str, system_prompt: str, examples: list[dict[str, str]] | |
| ) -> str: | |
| """Create the instruction for Self-Instruct task.""" | |
| system_prompt = f"""AI assistant in the domain of {domain}. {system_prompt}""" | |
| examples_str = "" | |
| for example in examples: | |
| question = example["question"] | |
| answer = example["answer"] | |
| if len(answer) and len(question): | |
| examples_str += f"""\n- Question: {question}\n- Answer: {answer}\n""" | |
| examples_str += f"""\n- Question: {question}\n- Answer: {answer}\n""" | |
| if len(examples_str): | |
| system_prompt += """Below are some examples of questions and answers \ | |
| that the AI assistant would generate:""" | |
| system_prompt += "\nExamples:" | |
| system_prompt += f"\n{examples_str}" | |
| return system_prompt | |