bkhmsi commited on
Commit
8730f5f
·
1 Parent(s): 4e82a89

added more models

Browse files
app.py CHANGED
@@ -113,7 +113,9 @@ def route_and_plot(model_choice: str, hf_token: str, user_prompt: str, assistant
113
  generation = None
114
 
115
  df = pd.DataFrame({"Expert": EXPERTS, "Percent": vals})
 
116
  fig = px.bar(df, x="Expert", y="Percent", title="Token Routing by Expert (%)", text="Percent")
 
117
  fig.update_traces(texttemplate="%{text:.2f}%", textposition="outside")
118
  fig.update_layout(yaxis_range=[0, max(100, max(vals) * 1.25)], bargap=0.35)
119
 
 
113
  generation = None
114
 
115
  df = pd.DataFrame({"Expert": EXPERTS, "Percent": vals})
116
+ colors = ["#97D077", "#4285F4", "#FFAB40", "#A64D79"]
117
  fig = px.bar(df, x="Expert", y="Percent", title="Token Routing by Expert (%)", text="Percent")
118
+ fig.update_traces(marker_color=colors)
119
  fig.update_traces(texttemplate="%{text:.2f}%", textposition="outside")
120
  fig.update_layout(yaxis_range=[0, max(100, max(vals) * 1.25)], bargap=0.35)
121
 
configs/micro_llama_1b_dpo.yml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run-title: micro-llama-1b-dpo
2
+ model: micro-llama-1b-dpo
3
+
4
+ base-model: meta-llama/Llama-3.2-1B
5
+ tokenizer: meta-llama/Llama-3.2-1B-Instruct
6
+ num-experts: 4
7
+ top-k-experts: 1
8
+ jitter-noise: 0
9
+ use-router: True
10
+ mask-input: True
11
+ max-length: 8192
12
+
13
+ trainable:
14
+ - model
configs/micro_llama_3b.yml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run-title: micro-llama-3b
2
+ model: micro-llama-3b
3
+
4
+ base-model: meta-llama/Llama-3.2-3B
5
+ tokenizer: meta-llama/Llama-3.2-3B-Instruct
6
+ num-experts: 4
7
+ top-k-experts: 1
8
+ jitter-noise: 0
9
+ use-router: True
10
+ mask-input: True
11
+ max-length: 8192
12
+
13
+ trainable:
14
+ - model
configs/micro_moe_llama_1b.yml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run-title: micro-moe-llama-1b
2
+
3
+ model: micro-moe-llama-1b
4
+
5
+ base-model: meta-llama/Llama-3.2-1B
6
+ tokenizer: meta-llama/Llama-3.2-1B-Instruct
7
+ num-experts: 4
8
+ top-k-experts: 1
9
+ jitter-noise: 0
10
+ router-aux-loss-coef: 0.000
11
+ use-load-balancing: False
12
+ use-router: True
13
+ mask-input: True
14
+ max-length: 8192
15
+
16
+ trainable:
17
+ - model
configs/micro_moe_smollm2_135m.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run-title: micro-moe-smollm2-135m
2
+ model: micro-moe-smollm2-135m
3
+
4
+ base-model: HuggingFaceTB/SmolLM2-135M
5
+ tokenizer: HuggingFaceTB/SmolLM2-135M-Instruct
6
+ num-experts: 4
7
+ top-k-experts: 1
8
+ jitter-noise: 0
9
+ router-aux-loss-coef: 0.000
10
+ use-load-balancing: False
11
+ use-router: True
12
+ mask-input: True
13
+ max-length: 8192
14
+
15
+ trainable:
16
+ - model
configs/micro_moe_smollm2_360m.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run-title: micro-moe-smollm2-360m
2
+ model: micro-moe-smollm2-360m
3
+
4
+ base-model: HuggingFaceTB/SmolLM2-360M
5
+ tokenizer: HuggingFaceTB/SmolLM2-360M-Instruct
6
+ num-experts: 4
7
+ top-k-experts: 1
8
+ jitter-noise: 0
9
+ router-aux-loss-coef: 0.000
10
+ use-load-balancing: False
11
+ use-router: True
12
+ mask-input: True
13
+ max-length: 8192
14
+
15
+ trainable:
16
+ - model
configs/micro_smollm2_135m.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run-title: micro-smollm2-135m
2
+ model: micro-smollm2-135m
3
+
4
+ base-model: HuggingFaceTB/SmolLM2-135M
5
+ tokenizer: HuggingFaceTB/SmolLM2-135M-Instruct
6
+ num-experts: 4
7
+ top-k-experts: 1
8
+ jitter-noise: 0
9
+ use-router: True
10
+ mask-input: True
11
+ max-length: 8192
12
+ gradient-checkpointing: False
13
+
14
+ trainable:
15
+ - model
configs/micro_smollm2_360m.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run-title: micro-smollm2-360m
2
+ model: micro-smollm2-360m
3
+
4
+ base-model: HuggingFaceTB/SmolLM2-360M
5
+ tokenizer: HuggingFaceTB/SmolLM2-360M-Instruct
6
+ num-experts: 4
7
+ top-k-experts: 1
8
+ jitter-noise: 0
9
+ use-router: True
10
+ mask-input: True
11
+ max-length: 8192
12
+ gradient-checkpointing: False
13
+
14
+ trainable:
15
+ - model
router_backend.py CHANGED
@@ -32,7 +32,7 @@ def get_expert_routing(model_id: str, hf_token: str, prompt: Union[str, List[Dic
32
 
33
  if isinstance(prompt, str):
34
  generation, routing_weights = generate_continuation(model, tokenizer, prompt)
35
- elif isinstance(prompt, dict):
36
  generation = None
37
  routing_weights = get_routing_weights(model, tokenizer, [prompt])
38
 
@@ -88,7 +88,7 @@ def aggregate_routing_weights(routing_weights):
88
  def generate_continuation(model,
89
  tokenizer,
90
  prompts,
91
- max_tokens=1024,
92
  use_cache=True,
93
  return_routing_weights=True
94
  ):
 
32
 
33
  if isinstance(prompt, str):
34
  generation, routing_weights = generate_continuation(model, tokenizer, prompt)
35
+ elif isinstance(prompt, list):
36
  generation = None
37
  routing_weights = get_routing_weights(model, tokenizer, [prompt])
38
 
 
88
  def generate_continuation(model,
89
  tokenizer,
90
  prompts,
91
+ max_tokens=128,
92
  use_cache=True,
93
  return_routing_weights=True
94
  ):