Spaces:

bkhmsi
/

cognitive-reasoners

Running

App Files Files Community

bkhmsi commited on 10 days ago

Commit

e63a1d1

1 Parent(s): ae072a3

added lineplot

Browse files

Files changed (2) hide show

app.py +47 -23
router_backend.py +28 -11

app.py CHANGED Viewed

@@ -21,20 +21,6 @@ import plotly.express as px
 import pandas as pd
 from router_backend import get_expert_routing
-# ---- Expected backend adapter ------------------------------------------------
-# Implement your real function in router_backend.py with the following signature:
-#   def get_expert_routing(model_id: str, prompt: str) -> Union[List[float], Dict[str, float], Tuple[float, float, float, float]]
-# It MUST return 4 values that sum to ~100 (percentages) in the fixed order:
-#   ["Language", "Logic", "Social", "World"]
-# or a mapping with those keys.
-# try:
-#     from router_backend import get_expert_routing  # your real backend
-#     BACKEND_AVAILABLE = True
-# except Exception as e:  # keep error for display if needed
-#     BACKEND_AVAILABLE = False
-#     _backend_import_error = e
 EXPERTS = ["Language", "Logic", "Social", "World"]
 DEFAULT_MODELS = [
@@ -83,6 +69,42 @@ def _compose_prompt(user_prompt: str, assistant_prompt: str) -> str:
         return [{"role": "user", "content": user_prompt}, {"role": "assistant", "content": assistant_prompt}]
     return user_prompt
 def route_and_plot(
     model_choice: str,
     user_prompt: str,
@@ -129,16 +151,19 @@ def route_and_plot(
             msg = "Using mock data."
             vals = _mock_routing(model_id, prompt, seed=seed)
             generation = None
         else:
             try:
-                raw, generation = get_expert_routing(model_id, hf_token, prompt, ablations)  # <-- your real function
                 vals = _normalize_output(raw)
                 msg = "Routed with real backend."
             except Exception as e:
                 # fallback to mock on error, but surface message
                 msg = f"Backend error: {e}\nFalling back to mock data."
                 vals = _mock_routing(model_id, prompt, seed=seed)
                 generation = None
     df = pd.DataFrame({"Expert": EXPERTS, "Percent": vals})
     colors = ["#97D077", "#4285F4", "#FFAB40", "#A64D79"]
@@ -147,18 +172,19 @@ def route_and_plot(
     fig.update_traces(texttemplate="%{text:.2f}%", textposition="outside")
     fig.update_layout(yaxis_range=[0, max(100, max(vals) * 1.25)], bargap=0.35)
     status = f"Model: {model_id}<br>{msg}"
     if generation is None:
         generation = assistant_prompt
-    return generation, df, fig, status
 with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     gr.Markdown(
         """
         # 🧠 Mixture of Cognitive Reasoner (MiCRo) Expert Routing Visualizer
         ## Enter a prompt (and optionally an assistant reply), pick a model, and visualize how tokens were routed across experts.
-        Paper: [Mixture of Cognitive Reasoners: Modular Reasoning with Brain-Like Specialization](https://arxiv.org/abs/2506.13331)
         ----
         This demo visualizes how modular language models allocate computation across specialized experts—Language, Logic, Social, and World—when processing a given prompt.
         Each expert corresponds to a cognitive domain inspired by human brain networks. Enter a prompt to see how tokens are dynamically routed across modules, revealing the model's internal reasoning structure.
@@ -187,17 +213,15 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
         user_prompt = gr.Textbox(lines=6, label="User prompt", placeholder="Type the user message here...")
         assistant_prompt = gr.Textbox(lines=6, label="Assistant prompt (optional)", placeholder="Type the assistant message here (optional)...")
-    # with gr.Row():
-    #     use_mock = gr.Checkbox(value=True, label="Use mock data (uncheck to call your backend)")
-    #     seed = gr.Slider(value=0, minimum=0, maximum=10_000, step=1, label="Mock seed")
     run = gr.Button("Run Routing", variant="primary")
     generation_output = gr.Textbox(lines=4, label="Generated Response", placeholder="Generated text will appear here...", interactive=False)
     with gr.Row():
         table = gr.Dataframe(label="Routing Percentages", interactive=False)
     plot = gr.Plot(label="Bar Plot")
     status = gr.Markdown("", label="System Message")
@@ -205,7 +229,7 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     run.click(
         route_and_plot,
         inputs=[model_choice, user_prompt, assistant_prompt, ablate_language, ablate_logic, ablate_social, ablate_world],
-        outputs=[generation_output, table, plot, status],
     )
     # example prompts
@@ -242,7 +266,7 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
         label="Try these examples:",
         cache_examples=True,
         fn=route_and_plot,
-        outputs=[generation_output, table, plot, status],
     )
 if __name__ == "__main__":

 import pandas as pd
 from router_backend import get_expert_routing
 EXPERTS = ["Language", "Logic", "Social", "World"]
 DEFAULT_MODELS = [
         return [{"role": "user", "content": user_prompt}, {"role": "assistant", "content": assistant_prompt}]
     return user_prompt
+def plot_lines(arrays):
+    names = EXPERTS
+    LINE_COLORS = ["#97D077", "#4285F4", "#FFAB40", "#A64D79"]
+    LINE_COLORS = {
+        name: color for name, color in zip(names, LINE_COLORS)
+    }
+    # Build a tidy DataFrame: columns = index, value, series
+    records = []
+    for i, array in enumerate(arrays):
+        for name, v in zip(names, array):
+            records.append({"index": i+1, "value": v, "series": name})
+    df = pd.DataFrame.from_records(records)
+    fig = px.line(
+        df,
+        x="index",
+        y="value",
+        color="series",
+        color_discrete_map=LINE_COLORS,
+        title="",
+        markers=True,
+    )
+    fig.update_layout(
+        xaxis_title="Layer Index",
+        yaxis_title="Percentage (%)",
+        legend_title="Layer-wise Expert Routing",
+    )
+    return fig
 def route_and_plot(
     model_choice: str,
     user_prompt: str,
             msg = "Using mock data."
             vals = _mock_routing(model_id, prompt, seed=seed)
             generation = None
+            layer_routing = None
         else:
             try:
+                raw, layer_routing, generation = get_expert_routing(model_id, hf_token, prompt, ablations)  # <-- your real function
                 vals = _normalize_output(raw)
                 msg = "Routed with real backend."
             except Exception as e:
                 # fallback to mock on error, but surface message
+                print(f"Backend error: {e}")
                 msg = f"Backend error: {e}\nFalling back to mock data."
                 vals = _mock_routing(model_id, prompt, seed=seed)
                 generation = None
+                layer_routing = None
     df = pd.DataFrame({"Expert": EXPERTS, "Percent": vals})
     colors = ["#97D077", "#4285F4", "#FFAB40", "#A64D79"]
     fig.update_traces(texttemplate="%{text:.2f}%", textposition="outside")
     fig.update_layout(yaxis_range=[0, max(100, max(vals) * 1.25)], bargap=0.35)
+    line_fig = plot_lines(layer_routing) if layer_routing is not None else None
     status = f"Model: {model_id}<br>{msg}"
     if generation is None:
         generation = assistant_prompt
+    return generation, df, fig, line_fig, status
 with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     gr.Markdown(
         """
         # 🧠 Mixture of Cognitive Reasoner (MiCRo) Expert Routing Visualizer
         ## Enter a prompt (and optionally an assistant reply), pick a model, and visualize how tokens were routed across experts.
         ----
         This demo visualizes how modular language models allocate computation across specialized experts—Language, Logic, Social, and World—when processing a given prompt.
         Each expert corresponds to a cognitive domain inspired by human brain networks. Enter a prompt to see how tokens are dynamically routed across modules, revealing the model's internal reasoning structure.
         user_prompt = gr.Textbox(lines=6, label="User prompt", placeholder="Type the user message here...")
         assistant_prompt = gr.Textbox(lines=6, label="Assistant prompt (optional)", placeholder="Type the assistant message here (optional)...")
     run = gr.Button("Run Routing", variant="primary")
     generation_output = gr.Textbox(lines=4, label="Generated Response", placeholder="Generated text will appear here...", interactive=False)
     with gr.Row():
         table = gr.Dataframe(label="Routing Percentages", interactive=False)
     plot = gr.Plot(label="Bar Plot")
+    line_plot = gr.Plot(label="Layer-wise Routing Percentages")
     status = gr.Markdown("", label="System Message")
     run.click(
         route_and_plot,
         inputs=[model_choice, user_prompt, assistant_prompt, ablate_language, ablate_logic, ablate_social, ablate_world],
+        outputs=[generation_output, table, plot, line_plot, status],
     )
     # example prompts
         label="Try these examples:",
         cache_examples=True,
         fn=route_and_plot,
+        outputs=[generation_output, table, plot, line_plot, status],
     )
 if __name__ == "__main__":

router_backend.py CHANGED Viewed

@@ -37,9 +37,26 @@ def get_expert_routing(model_id: str, hf_token: str, prompt: Union[str, List[Dic
         generation = None
         routing_weights = get_routing_weights(model, tokenizer, [prompt])
-    model_routing_percentages = aggregate_routing_weights(routing_weights)[0]
     print(model_routing_percentages)
     if generation is not None:
         print(f"Generation:\n{generation}")
@@ -48,7 +65,7 @@ def get_expert_routing(model_id: str, hf_token: str, prompt: Union[str, List[Dic
         "Logic": float(model_routing_percentages[0]),
         "Social": float(model_routing_percentages[1]),
         "World": float(model_routing_percentages[2]),
-    }, generation
 def get_model_path(model_name: str) -> Tuple[str, str, AutoModelForCausalLM]:
     return {
@@ -75,15 +92,14 @@ def get_model_path(model_name: str) -> Tuple[str, str, AutoModelForCausalLM]:
 def aggregate_routing_weights(routing_weights):
     experts = ["Logic", "Social", "World", "Language"]
     expert_token_model = np.zeros((len(experts)), dtype=int)
-    expert_layer_token = np.zeros((routing_weights.shape[0], len(experts)), dtype=int)
     num_layers = routing_weights.shape[0]
     for layer_idx in range(num_layers):
         for token_idx in range(len(routing_weights[layer_idx])):
             expert_idx = routing_weights[layer_idx][token_idx].argmax()
-            if layer_idx >= 2 and layer_idx < num_layers - 2:
-                expert_token_model[expert_idx] += 1
-            expert_layer_token[layer_idx][expert_idx] += 1
     return expert_token_model, expert_layer_token
 def generate_continuation(model,
@@ -179,7 +195,8 @@ def get_routing_weights(model, tokenizer, prompts, apply_chat_template=True):
     attention_mask = torch.ones_like(inputs)
     attention_mask[inputs == tokenizer.pad_token_id] = 0
-    model_output = model(input_ids=inputs, attention_mask=attention_mask)
     routing_weights = model_output.routing_weights
     routing_weights = np.stack([F.softmax(rw, dim=-1).detach().float().cpu().numpy() for rw in routing_weights], axis=0).squeeze()
@@ -199,8 +216,8 @@ def build_model(model_id: str, hf_token: str, ablations: List[str], use_cache: b
     model_config.config_path = f"{parent_path}/configs/{model_id.replace('-', '_')}.yml"
-    model_config.torch_dtype = torch.bfloat16
-    model_config.use_bfloat16 = True
     model_config._attn_implementation = "eager" # {sdpa, flash_attention_2, eager}
     model_config.use_cache = use_cache
     model_config.ablate = ablations
@@ -221,9 +238,9 @@ def build_model(model_id: str, hf_token: str, ablations: List[str], use_cache: b
     if "olmo" in model_id:
         model_config.vocab_size = len(tokenizer)
-    model = model_class.from_pretrained(model_path, config=model_config, low_cpu_mem_usage=True)
     model.to(DEVICE)
-    model = model.bfloat16()
     model.eval()
     return model, tokenizer

         generation = None
         routing_weights = get_routing_weights(model, tokenizer, [prompt])
+    model_routing_percentages, layer_token_routing = aggregate_routing_weights(routing_weights)
+    layer_token_routing = np.array(layer_token_routing)
+    num_experts, num_layers = layer_token_routing.shape
     print(model_routing_percentages)
+    layer_token_routing = np.roll(layer_token_routing, shift=1, axis=0)
+    all_layer_routing_percentages = []
+    for layer_idx in range(num_layers):
+        layer_token_percentages = []
+        for expert_idx in range(num_experts):
+            percentage = (layer_token_routing[expert_idx][layer_idx] / sum(layer_token_routing[:, layer_idx])) * 100
+            layer_token_percentages.append(percentage)
+        all_layer_routing_percentages.append(layer_token_percentages)
+    layer_routing_percentages = np.array(all_layer_routing_percentages)
     if generation is not None:
         print(f"Generation:\n{generation}")
         "Logic": float(model_routing_percentages[0]),
         "Social": float(model_routing_percentages[1]),
         "World": float(model_routing_percentages[2]),
+    }, layer_routing_percentages, generation
 def get_model_path(model_name: str) -> Tuple[str, str, AutoModelForCausalLM]:
     return {
 def aggregate_routing_weights(routing_weights):
     experts = ["Logic", "Social", "World", "Language"]
     expert_token_model = np.zeros((len(experts)), dtype=int)
+    expert_layer_token = np.zeros((len(experts), routing_weights.shape[0]), dtype=int)
     num_layers = routing_weights.shape[0]
     for layer_idx in range(num_layers):
         for token_idx in range(len(routing_weights[layer_idx])):
             expert_idx = routing_weights[layer_idx][token_idx].argmax()
+            expert_token_model[expert_idx] += 1
+            expert_layer_token[expert_idx][layer_idx] += 1
     return expert_token_model, expert_layer_token
 def generate_continuation(model,
     attention_mask = torch.ones_like(inputs)
     attention_mask[inputs == tokenizer.pad_token_id] = 0
+    with torch.no_grad():
+        model_output = model(input_ids=inputs, attention_mask=attention_mask)
     routing_weights = model_output.routing_weights
     routing_weights = np.stack([F.softmax(rw, dim=-1).detach().float().cpu().numpy() for rw in routing_weights], axis=0).squeeze()
     model_config.config_path = f"{parent_path}/configs/{model_id.replace('-', '_')}.yml"
+    model_config.torch_dtype = torch.float16
+    model_config.use_bfloat16 = False
     model_config._attn_implementation = "eager" # {sdpa, flash_attention_2, eager}
     model_config.use_cache = use_cache
     model_config.ablate = ablations
     if "olmo" in model_id:
         model_config.vocab_size = len(tokenizer)
+    model = model_class.from_pretrained(model_path, config=model_config, low_cpu_mem_usage=True, dtype=torch.float16)
     model.to(DEVICE)
+    model = model.half()
     model.eval()
     return model, tokenizer