ytaek-oh commited on
Commit
ce92291
·
1 Parent(s): d3b9fea
data/250206/textonly_decoder_report.csv ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ family,model,tag,all_tasks-add_relation_XS,all_tasks-add_relation_S,all_tasks-add_relation_M,all_tasks-add_relation_L,all_tasks-add_attribute_XS,all_tasks-add_attribute_S,all_tasks-add_attribute_M,all_tasks-add_attribute_L,all_tasks-replace_relation_XS,all_tasks-replace_relation_S,all_tasks-replace_relation_M,all_tasks-replace_relation_L,all_tasks-replace_attribute_XS,all_tasks-replace_attribute_S,all_tasks-replace_attribute_M,all_tasks-replace_attribute_L,all_tasks-replace_entity_XS,all_tasks-replace_entity_S,all_tasks-replace_entity_M,all_tasks-replace_entity_L,all_tasks-swap_relations,all_tasks-swap_attributes,overall-add_relation,overall-add_attribute,overall-replace_relation,overall-replace_attribute,overall-replace_entity,overall-swap_relations,overall-swap_attributes,overall-macro_avg,summary-add,summary-replace,summary-swap,summary-avg_ops,summary-relation,summary-attribute,summary-entity,summary-avg_neg_types,neg_start_positions-add_XS,neg_start_positions-add_S,neg_start_positions-add_M,neg_start_positions-add_L,neg_start_positions-replace_XS,neg_start_positions-replace_S,neg_start_positions-replace_M,neg_start_positions-replace_L,neg_start_positions-Avg_XS,neg_start_positions-Avg_S,neg_start_positions-Avg_M,neg_start_positions-Avg_L
2
+ vqascore,instructblip-flant5-xl,none,56.71641791044776,55.494505494505496,50.0,50.0,50.54945054945055,53.6,50.0,50.0,50.3448275862069,51.42857142857143,50.0,50.0,41.566265060240966,55.14018691588785,50.0,50.0,45.87155963302752,45.774647887323944,50.0,50.0,49.57446808510638,48.17927170868347,53.052730851238316,51.037362637362634,50.443349753694584,49.176612994032205,47.91155188008787,49.57446808510638,48.17927170868347,49.91076398717221,52.04504674430048,49.17717154260489,48.87686989689493,50.0330293946001,51.023516230013094,49.46441578002611,47.91155188008787,49.466494630042355,53.63293422994916,54.54725274725275,50.0,50.0,45.927550759825124,50.78113541059441,50.0,50.0,49.78024249488714,52.66419407892358,50.0,50.0
3
+ vqascore,clip-flant5-xl,none,26.11940298507463,29.94505494505495,31.34328358208955,22.22222222222222,50.0,50.8,58.333333333333336,51.724137931034484,56.206896551724135,54.64285714285714,58.267716535433074,59.67741935483871,61.445783132530124,55.14018691588785,59.16666666666667,61.53846153846155,62.38532110091743,57.04225352112677,56.074766355140184,64.28571428571428,58.08510638297872,58.96358543417367,27.40749093361034,52.71436781609195,57.19872239621326,59.322774563386545,59.94701381572467,58.08510638297872,58.96358543417367,53.37700876316846,40.06092937485114,58.822836925108156,58.524345908576194,52.4693707361785,47.56377323760078,57.00024260455072,59.94701381572467,54.837009885958715,38.059701492537314,40.37252747252747,44.83830845771144,36.97318007662835,60.01266692839056,55.608432526623915,57.83638318574665,61.83386505967152,49.03618421046394,47.9904799995757,51.337345821729045,49.40352256814994
4
+ vqascore,llava-v1.5-7b,none,40.298507462686565,40.93406593406594,42.91044776119403,37.03703703703704,54.120879120879124,54.4,59.55882352941177,48.27586206896552,54.48275862068965,49.28571428571428,43.7007874015748,41.935483870967744,51.204819277108435,51.86915887850468,50.83333333333333,42.30769230769231,47.247706422018354,51.056338028169016,51.4018691588785,69.64285714285714,48.29787234042553,52.10084033613445,40.2950145487459,54.088891179814105,47.35118604473662,49.05375094915969,54.83719268798075,48.29787234042553,52.10084033613445,49.432106869571,47.19195286428,50.41404322729235,50.19935633827999,49.268450809950785,45.31469097796935,51.74782748836941,54.83719268798075,50.633237051439835,47.20969329178284,47.667032967032966,51.2346356453029,42.65644955300128,50.978428106605485,50.73707039746265,48.64532996459554,51.29534444050572,49.09406069919416,49.20205168224781,49.939982804949224,46.975896996753505
5
+ vqascore,sharegpt4v-7b,none,24.626865671641788,33.24175824175824,43.47014925373135,43.05555555555556,44.78021978021978,53.60000000000001,54.41176470588235,63.793103448275865,48.27586206896551,47.142857142857146,51.181102362204726,59.677419354838705,55.12048192771084,51.4018691588785,50.833333333333336,50.0,64.22018348623854,61.971830985915496,51.4018691588785,73.21428571428572,55.31914893617021,53.22128851540616,36.098582180671734,54.1462719835945,51.56931023221652,51.83892110498067,62.70204233632956,55.31914893617021,53.22128851540616,52.127937898481335,45.122427082133115,55.37009122450892,54.27021872578818,51.58757901081007,47.66234711635283,53.06882720132711,62.70204233632956,54.47773888466983,34.70354272593079,43.42087912087912,48.94095697980685,53.42432950191571,55.872175827638294,53.505519095883706,51.138768284805515,60.963901689708145,45.287859276784545,48.46319910838142,50.03986263230618,57.19411559581192
6
+ visualgptscore,instructblip-flant5-xl,none,26.865671641791046,20.87912087912088,33.3955223880597,50.0,23.626373626373624,24.8,37.254901960784316,50.0,65.51724137931035,70.0,63.38582677165354,50.0,62.65060240963856,61.6822429906542,53.33333333333333,50.0,58.71559633027523,68.30985915492957,60.74766355140187,50.0,77.23404255319149,74.50980392156862,32.785078727242905,33.92031889678948,62.22576703774097,56.91654468340653,59.443279759151665,77.23404255319149,74.50980392156862,56.71926222558452,33.35269881201619,59.52853049343306,75.87192323738006,56.25105084760977,57.41496277272512,55.11555583392154,59.443279759151665,57.32459945526611,25.246022634082337,22.83956043956044,35.325212174422006,50.0,62.29448003974138,66.66403404852792,59.155607885462906,50.0,43.770251336911855,44.751797244044184,47.24041002994245,50.0
7
+ visualgptscore,clip-flant5-xl,none,25.37313432835821,15.934065934065933,19.029850746268657,20.37037037037037,18.13186813186813,24.8,31.862745098039213,24.137931034482758,67.58620689655173,66.42857142857143,62.20472440944882,87.09677419354838,62.65060240963856,72.89719626168224,61.25,57.692307692307686,60.550458715596335,68.30985915492957,62.616822429906534,85.71428571428571,76.59574468085107,76.47058823529412,20.17685534476579,24.733136066097526,70.8290692320301,63.62252659090712,69.29785650367954,76.59574468085107,76.47058823529412,57.3893966648036,22.454995705431656,67.91648410887224,76.5331664580726,55.63488209079217,55.86722308588232,54.942083630766255,69.29785650367954,60.03572107344271,21.752501230113168,20.367032967032966,25.446297922153935,22.254150702426564,63.5957560072622,69.21187561506107,62.023848946451785,76.83445586671392,42.67412861868769,44.78945429104702,43.73507343430286,49.54430328457025
8
+ visualgptscore,llava-v1.5-7b,none,10.44776119402985,10.43956043956044,7.835820895522389,5.555555555555555,17.582417582417584,16.8,22.058823529411764,20.689655172413794,73.79310344827587,70.0,74.01574803149606,83.87096774193549,74.69879518072288,71.02803738317756,68.75,65.38461538461539,75.22935779816514,78.16901408450704,71.02803738317756,89.28571428571429,85.1063829787234,83.75350140056022,8.569674521167059,19.282724071060787,75.41995480542685,69.96536198712896,78.428030887891,85.1063829787234,83.75350140056022,60.0750900931369,13.926199296113923,74.6044492268156,84.42994218964182,57.653530237523775,56.36533743510577,57.66719581958332,78.428030887891,64.15352138086003,14.015089388223718,13.619780219780221,14.947322212467077,13.122605363984675,74.57375214238796,73.06568382256152,71.26459513822454,79.5137658040884,44.29442076530584,43.342732021170875,43.1059586753458,46.318185584036534
9
+ visualgptscore,sharegpt4v-7b,none,5.970149253731343,8.241758241758241,7.835820895522389,9.25925925925926,20.32967032967033,13.600000000000001,20.588235294117645,20.689655172413794,71.72413793103448,77.14285714285715,71.65354330708661,87.09677419354838,76.50602409638554,72.89719626168224,64.58333333333334,65.38461538461539,77.06422018348624,79.5774647887324,79.43925233644859,82.14285714285714,83.40425531914893,82.35294117647058,7.826746912567808,18.80189019905044,76.90432814363166,69.84279226900412,79.55594861288108,83.40425531914893,82.35294117647058,59.812700376107806,13.314318555809123,75.43435634183895,82.87859824780975,57.20909104848594,56.04511012511614,56.999207881508376,79.55594861288108,64.20008887316853,13.149909791700836,10.920879120879121,14.212028094820017,14.974457215836527,75.09812740363542,76.5391727310906,71.89204299228952,78.2080822403403,44.124018597668126,43.73002592598486,43.052035543554766,46.59126972808841
pages/report_0206.py CHANGED
@@ -96,6 +96,7 @@ def main():
96
  df_all = pd.concat((df_all, df_decoder_all), axis=0).reset_index(drop=True)
97
  cols_all = [col for col in df.columns if col not in ["family", "model", "tag"]]
98
  df[cols_all] = df[cols_all] - df_all[cols_all]
 
99
 
100
  columns = list(df.columns)
101
  col_dict = defaultdict(list)
@@ -172,6 +173,16 @@ def main():
172
  "- InstructBLIP은 input context length 128, output context length는 256으로 제한. description을 input에 태우면 (VQAScore) context length 초과로 evaluation 불가, output에는 태우기 가능 (VisualGPTScore)"
173
  )
174
  print_table_overall(df, model_names, metric_group, metric_columns)
 
 
 
 
 
 
 
 
 
 
175
  st.markdown("---")
176
 
177
 
 
96
  df_all = pd.concat((df_all, df_decoder_all), axis=0).reset_index(drop=True)
97
  cols_all = [col for col in df.columns if col not in ["family", "model", "tag"]]
98
  df[cols_all] = df[cols_all] - df_all[cols_all]
99
+ textonly_df = pd.read_csv("./data/250206/textonly_decoder_report.csv")
100
 
101
  columns = list(df.columns)
102
  col_dict = defaultdict(list)
 
173
  "- InstructBLIP은 input context length 128, output context length는 256으로 제한. description을 input에 태우면 (VQAScore) context length 초과로 evaluation 불가, output에는 태우기 가능 (VisualGPTScore)"
174
  )
175
  print_table_overall(df, model_names, metric_group, metric_columns)
176
+ if selected == "after filtering" and group == "decoder-based":
177
+ st.write("### decoder-based models (zero-tensor images)")
178
+ if metric_group == "summary":
179
+ st.markdown(
180
+ "- Image 정보가 없을 때 전반적으로 accuracy 하락. VQAScore는 random chance accuracy (50%) 근처인 반면 VisualGPTScore는 여전히 text input만으로 `replace`, `swap` split에서 outperforming 가능"
181
+ )
182
+ st.markdown(
183
+ "- From VisualGPTScore, Output token propability is critical to identify negative descriptions, even one token change."
184
+ )
185
+ print_table_overall(textonly_df, model_names, metric_group, metric_columns)
186
  st.markdown("---")
187
 
188