Spaces:

arcee-ai
/

mergekit-config-generator

Running

App Files Files Community

julien-c HF Staff commited on Apr 9, 2024

Commit

cde73c1

verified ·

1 Parent(s): 049ec21

simplify + only handle yaml generation

Browse files

Files changed (2) hide show

.vscode/settings.json +2 -1
app.py +77 -133

.vscode/settings.json CHANGED Viewed

@@ -1,3 +1,4 @@
 {
-	"editor.formatOnSave": true
 }

 {
+	"editor.formatOnSave": true,
+	"editor.defaultFormatter": "ms-python.black-formatter"
 }

app.py CHANGED Viewed

@@ -1,11 +1,23 @@
 import gradio as gr
-import os
-from huggingface_hub import HfApi
-import subprocess
 def create_config_yaml(
-    model_name,
     model1,
     model1_layers,
     model2,
@@ -14,140 +26,72 @@ def create_config_yaml(
     base_model,
     parameters,
     dtype,
-):
-    yaml_config = (
-        f"  slices:\n"
-        "    - sources:\n"
-        f"        - model: {model1}\n"
-        f"          layer_range: {model1_layers}\n"
-        f"        - model: {model2}\n"
-        f"          layer_range: {model2_layers}\n"
-        f"  merge_method: {merge_method}\n"
-        f"  base_model: {base_model}\n"
-        f"  parameters:\n"
-        f" {parameters}\n"
-        f"  dtype: {dtype}\n"
-    )
-    print("Writing YAML config to 'config.yaml'...")
-    try:
-        with open("config.yaml", "w", encoding="utf-8") as f:
-            f.write(yaml_config)
-        print("File 'config.yaml' written successfully.")
-    except Exception as e:
-        print(f"Error writing file: {e}")
-    return yaml_config
-def execute_merge_command():
-    # Define the command and arguments
-    command = "mergekit-yaml"
-    args = ["config.yaml", "./output-model-directory"]
-    # Execute the command
-    result = subprocess.run([command] + args, capture_output=True, text=True)
-    # Check if the command was executed successfully
-    if result.returncode == 0:
-        print("Command executed successfully")
-        return f"Output:\n{result.stdout}"
-    else:
-        print("Error in executing command")
-        return f"Error:\n{result.stderr}"
-# Function to push to HF Hub (for the third tab)
-def push_to_hf_hub(model_name, yaml_config):
-    # Username and API token setup
-    username = "arcee-ai"
-    api_token = os.getenv("HF_TOKEN")
-    if api_token is None:
-        return "Hugging Face API token not set. Please set the HF_TOKEN environment variable."
-    # Initialize HfApi with token
-    api = HfApi(token=api_token)
-    repo_id = f"{username}/{model_name}"
-    try:
-        # Create a new repository on Hugging Face
-        api.create_repo(repo_id=repo_id, repo_type="model")
-        # For demonstration, let's just create a yaml file inside a folder
-        # os.makedirs("merge", exist_ok=True)
-        with open("config.yaml", "w") as file:
-            file.write(yaml_config)
-        # Upload the contents of the 'merge' folder to the repository
-        api.upload_folder(repo_id=repo_id, folder_path="merge")
-        return f"Successfully pushed to HF Hub: {repo_id}"
-    except Exception as e:
-        return str(e)
 # make sure to add the themes as well
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo")) as app:
-    gr.Markdown("# Mergekit GUI")  # Title for your Gradio app
-    with gr.Tab("Config YAML"):
-        # Inputs for the YAML config
-        with gr.Row():
-            model_name_input = gr.Textbox(label="Model Name")
-            model1_input = gr.Textbox(label="Model 1")
-            model1_layers_input = gr.Textbox(
-                label="Model 1 Layer Range", placeholder="[start, end]"
-            )
-            model2_input = gr.Textbox(label="Model 2")
-            model2_layers_input = gr.Textbox(
-                label="Model 2 Layer Range", placeholder="[start, end]"
-            )
-        merge_method_input = gr.Dropdown(
-            label="Merge Method", choices=["slerp", "linear"]
         )
-        base_model_input = gr.Textbox(label="Base Model")
-        parameters_input = gr.Textbox(
-            label="Parameters", placeholder="Formatted as a list of dicts"
         )
-        dtype_input = gr.Textbox(label="Data Type", value="bfloat16")
-        create_button = gr.Button("Create Config YAML")
-        create_button.click(
-            fn=create_config_yaml,
-            inputs=[
-                model_name_input,
-                model1_input,
-                model1_layers_input,
-                model2_input,
-                model2_layers_input,
-                merge_method_input,
-                base_model_input,
-                parameters_input,
-                dtype_input,
-            ],
-            outputs=[],
-        )
-    with gr.Tab("Merge"):
-        # Placeholder for Merge tab contents
-        # Not yet tested
-        merge_output = gr.Textbox(label="Merge Output", interactive=False)
-        merge_button = gr.Button("Execute Merge Command")
-        merge_button.click(fn=execute_merge_command, inputs=[], outputs=merge_output)
-    with gr.Tab("Push to HF Hub"):
-        push_model_name_input = gr.Textbox(label="Model Name", interactive=False)
-        push_yaml_config_input = gr.Textbox(label="YAML Config", interactive=False)
-        push_output = gr.Textbox(label="Push Output", interactive=False)
-        push_button = gr.Button("Push to HF Hub")
-        push_button.click(
-            fn=push_to_hf_hub,
-            inputs=[push_model_name_input, push_yaml_config_input],
-            outputs=push_output,
         )
-app.launch()

 import gradio as gr
+import yaml
+MARKDOWN_DESCRIPTION = """
+# mergekit config.yaml generator
+GUI to template a YAML configuration file for mergekit, which you can then copy/paste into [mergekit-gui](https://huggingface.co/spaces/arcee-ai/mergekit-gui) 🔥
+"""
+DEFAULT_PARAMETERS = """
+t:
+- filter: self_attn
+  value: [0, 0.5, 0.3, 0.7, 1]
+- filter: mlp
+  value: [1, 0.5, 0.7, 0.3, 0]
+- value: 0.5
+"""
 def create_config_yaml(
     model1,
     model1_layers,
     model2,
     base_model,
     parameters,
     dtype,
+) -> str:
+    dict_config = {
+        "slices": [
+            {
+                "sources": [
+                    {"model": model1, "layer_range": yaml.safe_load(model1_layers)},
+                    {"model": model2, "layer_range": yaml.safe_load(model2_layers)},
+                ]
+            }
+        ],
+        "merge_method": merge_method,
+        "base_model": base_model,
+    }
+    if parameters:
+        dict_config["parameters"] = yaml.safe_load(parameters)
+    if dtype:
+        dict_config["dtype"] = dtype
+    return yaml.dump(dict_config, sort_keys=False)
 # make sure to add the themes as well
+with gr.Blocks() as demo:
+    gr.Markdown(MARKDOWN_DESCRIPTION)
+    with gr.Row():
+        # model_name_input = gr.Textbox(label="Model Name", value="my-merge")
+        model1_input = gr.Textbox(label="Model 1", value="BioMistral/BioMistral-7B")
+        model1_layers_input = gr.Textbox(
+            label="Model 1 Layer Range", placeholder="[start, end]", value="[0, 32]"
         )
+        model2_input = gr.Textbox(
+            label="Model 2", value="CorticalStack/pastiche-crown-clown-7b-dare-dpo"
         )
+        model2_layers_input = gr.Textbox(
+            label="Model 2 Layer Range", placeholder="[start, end]", value="[0, 32]"
         )
+    merge_method_input = gr.Dropdown(
+        label="Merge Method", choices=["slerp", "linear"], value="slerp"
+    )
+    base_model_input = gr.Textbox(label="Base Model", value="BioMistral/BioMistral-7B")
+    parameters_input = gr.Code(
+        language="yaml",
+        label="Merge Parameters",
+        value=DEFAULT_PARAMETERS,
+    )
+    dtype_input = gr.Textbox(label="Dtype", value="bfloat16")
+    create_button = gr.Button("Create config.yaml", variant="primary")
+    output_zone = gr.Code(language="yaml", lines=10)
+    create_button.click(
+        fn=create_config_yaml,
+        inputs=[
+            model1_input,
+            model1_layers_input,
+            model2_input,
+            model2_layers_input,
+            merge_method_input,
+            base_model_input,
+            parameters_input,
+            dtype_input,
+        ],
+        outputs=[output_zone],
+    )
+demo.launch()