Spaces:
Running
Running
added more models
Browse files- app.py +2 -0
- configs/micro_llama_1b_dpo.yml +14 -0
- configs/micro_llama_3b.yml +14 -0
- configs/micro_moe_llama_1b.yml +17 -0
- configs/micro_moe_smollm2_135m.yml +16 -0
- configs/micro_moe_smollm2_360m.yml +16 -0
- configs/micro_smollm2_135m.yml +15 -0
- configs/micro_smollm2_360m.yml +15 -0
- router_backend.py +2 -2
app.py
CHANGED
|
@@ -113,7 +113,9 @@ def route_and_plot(model_choice: str, hf_token: str, user_prompt: str, assistant
|
|
| 113 |
generation = None
|
| 114 |
|
| 115 |
df = pd.DataFrame({"Expert": EXPERTS, "Percent": vals})
|
|
|
|
| 116 |
fig = px.bar(df, x="Expert", y="Percent", title="Token Routing by Expert (%)", text="Percent")
|
|
|
|
| 117 |
fig.update_traces(texttemplate="%{text:.2f}%", textposition="outside")
|
| 118 |
fig.update_layout(yaxis_range=[0, max(100, max(vals) * 1.25)], bargap=0.35)
|
| 119 |
|
|
|
|
| 113 |
generation = None
|
| 114 |
|
| 115 |
df = pd.DataFrame({"Expert": EXPERTS, "Percent": vals})
|
| 116 |
+
colors = ["#97D077", "#4285F4", "#FFAB40", "#A64D79"]
|
| 117 |
fig = px.bar(df, x="Expert", y="Percent", title="Token Routing by Expert (%)", text="Percent")
|
| 118 |
+
fig.update_traces(marker_color=colors)
|
| 119 |
fig.update_traces(texttemplate="%{text:.2f}%", textposition="outside")
|
| 120 |
fig.update_layout(yaxis_range=[0, max(100, max(vals) * 1.25)], bargap=0.35)
|
| 121 |
|
configs/micro_llama_1b_dpo.yml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run-title: micro-llama-1b-dpo
|
| 2 |
+
model: micro-llama-1b-dpo
|
| 3 |
+
|
| 4 |
+
base-model: meta-llama/Llama-3.2-1B
|
| 5 |
+
tokenizer: meta-llama/Llama-3.2-1B-Instruct
|
| 6 |
+
num-experts: 4
|
| 7 |
+
top-k-experts: 1
|
| 8 |
+
jitter-noise: 0
|
| 9 |
+
use-router: True
|
| 10 |
+
mask-input: True
|
| 11 |
+
max-length: 8192
|
| 12 |
+
|
| 13 |
+
trainable:
|
| 14 |
+
- model
|
configs/micro_llama_3b.yml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run-title: micro-llama-3b
|
| 2 |
+
model: micro-llama-3b
|
| 3 |
+
|
| 4 |
+
base-model: meta-llama/Llama-3.2-3B
|
| 5 |
+
tokenizer: meta-llama/Llama-3.2-3B-Instruct
|
| 6 |
+
num-experts: 4
|
| 7 |
+
top-k-experts: 1
|
| 8 |
+
jitter-noise: 0
|
| 9 |
+
use-router: True
|
| 10 |
+
mask-input: True
|
| 11 |
+
max-length: 8192
|
| 12 |
+
|
| 13 |
+
trainable:
|
| 14 |
+
- model
|
configs/micro_moe_llama_1b.yml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run-title: micro-moe-llama-1b
|
| 2 |
+
|
| 3 |
+
model: micro-moe-llama-1b
|
| 4 |
+
|
| 5 |
+
base-model: meta-llama/Llama-3.2-1B
|
| 6 |
+
tokenizer: meta-llama/Llama-3.2-1B-Instruct
|
| 7 |
+
num-experts: 4
|
| 8 |
+
top-k-experts: 1
|
| 9 |
+
jitter-noise: 0
|
| 10 |
+
router-aux-loss-coef: 0.000
|
| 11 |
+
use-load-balancing: False
|
| 12 |
+
use-router: True
|
| 13 |
+
mask-input: True
|
| 14 |
+
max-length: 8192
|
| 15 |
+
|
| 16 |
+
trainable:
|
| 17 |
+
- model
|
configs/micro_moe_smollm2_135m.yml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run-title: micro-moe-smollm2-135m
|
| 2 |
+
model: micro-moe-smollm2-135m
|
| 3 |
+
|
| 4 |
+
base-model: HuggingFaceTB/SmolLM2-135M
|
| 5 |
+
tokenizer: HuggingFaceTB/SmolLM2-135M-Instruct
|
| 6 |
+
num-experts: 4
|
| 7 |
+
top-k-experts: 1
|
| 8 |
+
jitter-noise: 0
|
| 9 |
+
router-aux-loss-coef: 0.000
|
| 10 |
+
use-load-balancing: False
|
| 11 |
+
use-router: True
|
| 12 |
+
mask-input: True
|
| 13 |
+
max-length: 8192
|
| 14 |
+
|
| 15 |
+
trainable:
|
| 16 |
+
- model
|
configs/micro_moe_smollm2_360m.yml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run-title: micro-moe-smollm2-360m
|
| 2 |
+
model: micro-moe-smollm2-360m
|
| 3 |
+
|
| 4 |
+
base-model: HuggingFaceTB/SmolLM2-360M
|
| 5 |
+
tokenizer: HuggingFaceTB/SmolLM2-360M-Instruct
|
| 6 |
+
num-experts: 4
|
| 7 |
+
top-k-experts: 1
|
| 8 |
+
jitter-noise: 0
|
| 9 |
+
router-aux-loss-coef: 0.000
|
| 10 |
+
use-load-balancing: False
|
| 11 |
+
use-router: True
|
| 12 |
+
mask-input: True
|
| 13 |
+
max-length: 8192
|
| 14 |
+
|
| 15 |
+
trainable:
|
| 16 |
+
- model
|
configs/micro_smollm2_135m.yml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run-title: micro-smollm2-135m
|
| 2 |
+
model: micro-smollm2-135m
|
| 3 |
+
|
| 4 |
+
base-model: HuggingFaceTB/SmolLM2-135M
|
| 5 |
+
tokenizer: HuggingFaceTB/SmolLM2-135M-Instruct
|
| 6 |
+
num-experts: 4
|
| 7 |
+
top-k-experts: 1
|
| 8 |
+
jitter-noise: 0
|
| 9 |
+
use-router: True
|
| 10 |
+
mask-input: True
|
| 11 |
+
max-length: 8192
|
| 12 |
+
gradient-checkpointing: False
|
| 13 |
+
|
| 14 |
+
trainable:
|
| 15 |
+
- model
|
configs/micro_smollm2_360m.yml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run-title: micro-smollm2-360m
|
| 2 |
+
model: micro-smollm2-360m
|
| 3 |
+
|
| 4 |
+
base-model: HuggingFaceTB/SmolLM2-360M
|
| 5 |
+
tokenizer: HuggingFaceTB/SmolLM2-360M-Instruct
|
| 6 |
+
num-experts: 4
|
| 7 |
+
top-k-experts: 1
|
| 8 |
+
jitter-noise: 0
|
| 9 |
+
use-router: True
|
| 10 |
+
mask-input: True
|
| 11 |
+
max-length: 8192
|
| 12 |
+
gradient-checkpointing: False
|
| 13 |
+
|
| 14 |
+
trainable:
|
| 15 |
+
- model
|
router_backend.py
CHANGED
|
@@ -32,7 +32,7 @@ def get_expert_routing(model_id: str, hf_token: str, prompt: Union[str, List[Dic
|
|
| 32 |
|
| 33 |
if isinstance(prompt, str):
|
| 34 |
generation, routing_weights = generate_continuation(model, tokenizer, prompt)
|
| 35 |
-
elif isinstance(prompt,
|
| 36 |
generation = None
|
| 37 |
routing_weights = get_routing_weights(model, tokenizer, [prompt])
|
| 38 |
|
|
@@ -88,7 +88,7 @@ def aggregate_routing_weights(routing_weights):
|
|
| 88 |
def generate_continuation(model,
|
| 89 |
tokenizer,
|
| 90 |
prompts,
|
| 91 |
-
max_tokens=
|
| 92 |
use_cache=True,
|
| 93 |
return_routing_weights=True
|
| 94 |
):
|
|
|
|
| 32 |
|
| 33 |
if isinstance(prompt, str):
|
| 34 |
generation, routing_weights = generate_continuation(model, tokenizer, prompt)
|
| 35 |
+
elif isinstance(prompt, list):
|
| 36 |
generation = None
|
| 37 |
routing_weights = get_routing_weights(model, tokenizer, [prompt])
|
| 38 |
|
|
|
|
| 88 |
def generate_continuation(model,
|
| 89 |
tokenizer,
|
| 90 |
prompts,
|
| 91 |
+
max_tokens=128,
|
| 92 |
use_cache=True,
|
| 93 |
return_routing_weights=True
|
| 94 |
):
|