Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
36438b0
1
Parent(s):
29701ab
debug
Browse files- src/about.py +15 -4
- src/display/utils.py +3 -4
- src/populate.py +7 -6
src/about.py
CHANGED
|
@@ -54,20 +54,31 @@ class TaskMIB_Causalgraph:
|
|
| 54 |
counterfactuals: list[str] # symbol_counterfactual, etc.
|
| 55 |
metrics: list[str] # score
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
class TasksMib_Causalgraph(Enum):
|
| 58 |
task0 = TaskMIB_Causalgraph(
|
| 59 |
"MCQA",
|
| 60 |
-
["
|
| 61 |
[str(i) for i in range(32)], # 0-31 layers
|
| 62 |
"mcqa",
|
| 63 |
["output_token", "output_location"],
|
| 64 |
-
["
|
| 65 |
-
"
|
| 66 |
["score"]
|
| 67 |
)
|
| 68 |
|
| 69 |
|
| 70 |
-
|
| 71 |
NUM_FEWSHOT = 0 # Change with your few shot
|
| 72 |
# ---------------------------------------------------
|
| 73 |
|
|
|
|
| 54 |
counterfactuals: list[str] # symbol_counterfactual, etc.
|
| 55 |
metrics: list[str] # score
|
| 56 |
|
| 57 |
+
# class TasksMib_Causalgraph(Enum):
|
| 58 |
+
# task0 = TaskMIB_Causalgraph(
|
| 59 |
+
# "MCQA",
|
| 60 |
+
# ["LlamaForCausalLM", "Qwen2ForCausalLM", "Gemma2ForCausalLM"], # Updated model list
|
| 61 |
+
# [str(i) for i in range(32)], # 0-31 layers
|
| 62 |
+
# "mcqa",
|
| 63 |
+
# ["output_token", "output_location"],
|
| 64 |
+
# ["symbol_counterfactual", "randomLetter_counterfactual",
|
| 65 |
+
# "answerPosition_counterfactual", "answerPosition_symbol_counterfactual"],
|
| 66 |
+
# ["score"]
|
| 67 |
+
# )
|
| 68 |
+
|
| 69 |
class TasksMib_Causalgraph(Enum):
|
| 70 |
task0 = TaskMIB_Causalgraph(
|
| 71 |
"MCQA",
|
| 72 |
+
["qwen2forcausallm", "gemma2forcausallm", "llamaforcausallm"], # Match exact model names
|
| 73 |
[str(i) for i in range(32)], # 0-31 layers
|
| 74 |
"mcqa",
|
| 75 |
["output_token", "output_location"],
|
| 76 |
+
["randomLetter_counterfactual", "answerPosition_counterfactual",
|
| 77 |
+
"answerPosition_randomLetter_counterfactual"], # Match exact counterfactual names
|
| 78 |
["score"]
|
| 79 |
)
|
| 80 |
|
| 81 |
|
|
|
|
| 82 |
NUM_FEWSHOT = 0 # Change with your few shot
|
| 83 |
# ---------------------------------------------------
|
| 84 |
|
src/display/utils.py
CHANGED
|
@@ -171,12 +171,12 @@ auto_eval_column_dict_mib_causalgraph.append(["method", ColumnContent, ColumnCon
|
|
| 171 |
|
| 172 |
# For each model-task-intervention-counterfactual combination
|
| 173 |
for task in TasksMib_Causalgraph:
|
| 174 |
-
for model in task.value.models:
|
| 175 |
-
model_name = model # Don't convert to lowercase
|
| 176 |
for layer in task.value.layers:
|
| 177 |
for intervention in task.value.interventions:
|
| 178 |
for counterfactual in task.value.counterfactuals:
|
| 179 |
-
|
|
|
|
| 180 |
auto_eval_column_dict_mib_causalgraph.append([
|
| 181 |
col_name,
|
| 182 |
ColumnContent,
|
|
@@ -184,7 +184,6 @@ for task in TasksMib_Causalgraph:
|
|
| 184 |
])
|
| 185 |
|
| 186 |
|
| 187 |
-
|
| 188 |
|
| 189 |
|
| 190 |
# Create the dataclass
|
|
|
|
| 171 |
|
| 172 |
# For each model-task-intervention-counterfactual combination
|
| 173 |
for task in TasksMib_Causalgraph:
|
| 174 |
+
for model in task.value.models:
|
|
|
|
| 175 |
for layer in task.value.layers:
|
| 176 |
for intervention in task.value.interventions:
|
| 177 |
for counterfactual in task.value.counterfactuals:
|
| 178 |
+
# Match exact column format from DataFrame
|
| 179 |
+
col_name = f"{model}_layer{layer}_{intervention}_{counterfactual}"
|
| 180 |
auto_eval_column_dict_mib_causalgraph.append([
|
| 181 |
col_name,
|
| 182 |
ColumnContent,
|
|
|
|
| 184 |
])
|
| 185 |
|
| 186 |
|
|
|
|
| 187 |
|
| 188 |
|
| 189 |
# Create the dataclass
|
src/populate.py
CHANGED
|
@@ -180,16 +180,17 @@ def create_intervention_averaged_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 180 |
if 'eval_name' in df_copy.columns:
|
| 181 |
df_copy = df_copy.drop('eval_name', axis=1)
|
| 182 |
|
| 183 |
-
# Group columns by model and
|
| 184 |
result_cols = {}
|
| 185 |
for task in TasksMib_Causalgraph:
|
| 186 |
for model in task.value.models: # Will iterate over all three models
|
| 187 |
-
model = model.lower()
|
| 188 |
for intervention in task.value.interventions:
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
|
|
|
|
|
|
| 193 |
|
| 194 |
averaged_df = pd.DataFrame()
|
| 195 |
if method_col is not None:
|
|
|
|
| 180 |
if 'eval_name' in df_copy.columns:
|
| 181 |
df_copy = df_copy.drop('eval_name', axis=1)
|
| 182 |
|
| 183 |
+
# Group columns by model and intervention
|
| 184 |
result_cols = {}
|
| 185 |
for task in TasksMib_Causalgraph:
|
| 186 |
for model in task.value.models: # Will iterate over all three models
|
|
|
|
| 187 |
for intervention in task.value.interventions:
|
| 188 |
+
for counterfactual in task.value.counterfactuals:
|
| 189 |
+
col_pattern = f"{model}_layer.*_{intervention}_{counterfactual}"
|
| 190 |
+
matching_cols = [c for c in df_copy.columns if pd.Series(c).str.match(col_pattern).any()]
|
| 191 |
+
if matching_cols:
|
| 192 |
+
col_name = f"{model}_{intervention}_{counterfactual}"
|
| 193 |
+
result_cols[col_name] = matching_cols
|
| 194 |
|
| 195 |
averaged_df = pd.DataFrame()
|
| 196 |
if method_col is not None:
|