ytaek-oh commited on
Commit
6b607a9
·
1 Parent(s): c38d116

add relation, attribute sentences

Browse files
data/250124/decoder_overall.csv ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ family,model,tag,add_relation_XS,add_relation_S,add_relation_M,add_relation_L,add_attribute_XS,add_attribute_S,add_attribute_M,add_attribute_L,Avg
2
+ vqascore,instructblip-flant5-xl,none,61.111111111111114,60.902255639097746,50.0,50.0,54.0268456375839,48.4375,50.0,50.0,53.05971404847409
3
+ vqascore,clip-flant5-xl,none,50.55555555555556,49.81203007518798,45.52238805970149,34.92647058823529,57.718120805369125,53.38541666666667,58.33333333333333,54.44444444444444,50.58721994106173
4
+ vqascore,llava-v1.5-7b,none,62.77777777777777,50.18796992481202,50.29850746268657,56.98529411764706,59.060402684563755,54.42708333333333,57.608695652173914,53.33333333333333,55.58488303579097
5
+ vqascore,sharegpt4v-7b,none,53.333333333333336,51.691729323308266,60.44776119402985,68.75,59.22818791946309,49.73958333333333,59.42028985507247,63.33333333333333,58.2430272864842
6
+ visualgptscore,instructblip-flant5-xl,none,30.0,24.06015037593985,34.62686567164179,50.0,24.832214765100673,27.083333333333332,38.22463768115942,50.0,34.853400228396886
7
+ visualgptscore,clip-flant5-xl,none,34.44444444444444,23.684210526315788,22.388059701492537,22.794117647058822,25.503355704697988,29.166666666666668,40.57971014492754,33.33333333333333,28.986737271117136
8
+ visualgptscore,llava-v1.5-7b,none,20.0,16.541353383458645,11.641791044776118,10.294117647058822,23.48993288590604,25.0,36.231884057971016,28.888888888888886,21.510995988507442
9
+ visualgptscore,sharegpt4v-7b,none,21.11111111111111,18.421052631578945,14.328358208955224,11.76470588235294,22.818791946308725,26.041666666666668,35.14492753623188,33.33333333333333,22.870493414567356
data/250124/decoder_summary.csv ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ "('Model', 'family')","('Model', 'model')","('Model', 'tag')","('length_group', 'XS')","('length_group', 'S')","('length_group', 'M')","('length_group', 'L')","('neg_type', 'relation')","('neg_type', 'attribute')","('Avg', 'Avg')"
2
+ vqascore,instructblip-flant5-xl,none,57.56897837434751,54.66987781954887,50.0,50.0,55.503341687552215,50.616086409395976,53.05971404847409
3
+ vqascore,clip-flant5-xl,none,54.13683818046234,51.598723370927324,51.92786069651741,44.685457516339866,45.20411106967008,55.97032881245339,50.58721994106173
4
+ vqascore,llava-v1.5-7b,none,60.91909023117076,52.307526629072676,53.95360155743025,55.15931372549019,55.06238732073086,56.10737875085108,55.58488303579097
5
+ vqascore,sharegpt4v-7b,none,56.28076062639821,50.7156563283208,59.93402552455116,66.04166666666666,58.555705962667865,57.930348610300555,58.2430272864842
6
+ visualgptscore,instructblip-flant5-xl,none,27.416107382550337,25.571741854636592,36.42575167640061,50.0,34.67175401189541,35.035046444898356,34.853400228396886
7
+ visualgptscore,clip-flant5-xl,none,29.973900074571215,26.425438596491226,31.483884923210038,28.063725490196077,25.827708079827897,32.145766462406385,28.986737271117136
8
+ visualgptscore,llava-v1.5-7b,none,21.74496644295302,20.77067669172932,23.936837551373568,19.591503267973856,14.619315518823395,28.402676458191486,21.510995988507442
9
+ visualgptscore,sharegpt4v-7b,none,21.964951528709918,22.23135964912281,24.736642872593553,22.549019607843135,16.406306958499556,29.334679870635153,22.870493414567356
data/250124/overall.csv ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ family,model,tag,add_relation_XS,add_relation_S,add_relation_M,add_relation_L,add_attribute_XS,add_attribute_S,add_attribute_M,add_attribute_L,Avg
2
+ ALIGN,align-base,coyo700m,56.66666666666667,50.0,50.0,50.0,65.1006711409396,50.0,50.0,50.0,52.720917225950785
3
+ CLIPS,CLIPS-Large-14-224,recap-datacomp1b,61.666666666666664,50.0,50.0,50.0,62.58389261744966,50.520833333333336,50.0,50.0,53.096424077181204
4
+ CLIPS,CLIPS-Large-14-336,recap-datacomp1b,58.33333333333333,49.62406015037594,50.0,50.0,60.57046979865772,50.520833333333336,50.0,50.0,52.38108707696254
5
+ CLIPS,CLIPS-Huge-14-224,recap-datacomp1b,52.77777777777777,49.62406015037594,50.0,50.0,67.28187919463087,49.47916666666667,50.0,50.0,52.395360473681414
6
+ DreamLIP,dreamlip-vitb16,cc3m-long,58.888888888888886,50.0,50.0,50.0,59.22818791946309,50.0,50.0,50.0,52.264634601043994
7
+ DreamLIP,dreamlip-vitb16,cc12m-long,54.44444444444444,50.0,50.0,50.0,59.56375838926175,50.0,50.0,50.0,51.751025354213276
8
+ DreamLIP,dreamlip-vitb16,yfcc15m-long,53.33333333333333,50.0,50.0,50.0,61.57718120805369,50.0,50.0,50.0,51.86381431767337
9
+ DreamLIP,dreamlip-vitb16,cc30m-long,57.77777777777778,50.0,50.0,50.0,68.28859060402684,50.0,50.0,50.0,53.25829604772558
10
+ FLAIR,flair-vitb16,cc3m-recap,63.333333333333336,50.0,50.0,50.0,65.93959731543623,50.0,50.0,50.0,53.6591163310962
11
+ FLAIR,flair-vitb16,cc12m-recap,57.77777777777778,50.0,50.0,50.0,66.2751677852349,50.0,50.0,50.0,53.00661819537658
12
+ FLAIR,flair-vitb16,yfcc15m-recap,62.22222222222222,50.0,50.0,50.0,63.255033557046985,50.0,50.0,50.0,53.18465697240865
13
+ FLAIR,flair-vitb16,cc30m-recap,57.77777777777778,50.0,50.0,50.0,66.94630872483222,50.0,50.0,50.0,53.09051081282625
14
+ FSC-CLIP,fsc-clip-ViT-B-32,laioncoco-ft,61.111111111111114,50.0,50.0,50.0,61.241610738255034,50.0,50.0,50.0,52.79409023117077
15
+ FSC-CLIP,fsc-clip-ViT-B-16,laioncoco-ft,58.888888888888886,50.0,50.0,50.0,59.56375838926175,50.0,50.0,50.0,52.306580909768826
16
+ FSC-CLIP,fsc-clip-ViT-L-14,laioncoco-ft,65.55555555555556,50.0,50.0,50.0,58.557046979865774,50.0,50.0,50.0,53.014075316927666
17
+ Jina-CLIP,jina-clip-v1,jinaai,61.111111111111114,55.26315789473685,53.73134328358209,52.94117647058824,66.44295302013423,63.541666666666664,60.14492753623188,66.66666666666666,59.98037533121472
18
+ Jina-CLIP,jina-clip-v2,jinaai,62.77777777777778,63.34586466165413,51.64179104477612,52.5735294117647,67.11409395973155,64.84375,61.594202898550726,55.55555555555556,59.93082066372632
19
+ LoTLIP,LoTLIP-ViT-B-32,lotlip100m,66.66666666666666,49.81203007518797,50.0,50.0,66.77852348993288,57.8125,50.18115942028985,50.0,55.156359956509675
20
+ LoTLIP,LoTLIP-ViT-B-16,lotlip100m,52.22222222222223,49.81203007518797,50.0,50.0,66.10738255033557,51.5625,49.81884057971014,50.0,52.440371928431986
21
+ LongCLIP,longclip-vitb32,sharegpt4v-1m,48.33333333333333,52.819548872180455,48.95522388059702,50.0,54.36241610738255,54.6875,61.05072463768116,50.0,52.52609335389681
22
+ LongCLIP,longclip-vitb16,sharegpt4v-1m,51.111111111111114,52.067669172932334,46.865671641791046,50.0,56.04026845637584,58.333333333333336,60.14492753623188,50.0,53.07037265647194
23
+ LongCLIP,longclip-vitl14,sharegpt4v-1m,62.22222222222223,62.78195488721804,55.07462686567163,50.0,56.87919463087249,58.59375,57.06521739130435,50.0,56.57712074966109
24
+ LongCLIP,longclip-vitl14_336px,sharegpt4v-1m,62.22222222222222,61.278195488721806,53.43283582089552,50.0,53.52348993288591,57.55208333333333,54.347826086956516,50.0,55.294581610626906
25
+ OpenCLIP,roberta-ViT-B-32,laion2b_s12b_b32k,70.0,50.0,50.0,50.0,62.75167785234899,50.0,50.0,50.0,54.09395973154362
26
+ OpenCLIP,coca_ViT-B-32,laion2b_s13b_b90k,67.22222222222223,50.0,50.0,50.0,63.08724832214765,50.0,50.0,50.0,53.78868381804624
27
+ OpenCLIP,coca_ViT-L-14,laion2b_s13b_b90k,59.44444444444444,50.0,50.0,50.0,61.40939597315436,50.0,50.0,50.0,52.60673005219985
28
+ OpenCLIP,ViT-H-14,laion2b_s32b_b79k,67.77777777777777,50.0,50.0,50.0,65.60402684563758,50.0,50.0,50.0,54.17272557792692
29
+ OpenCLIP,ViT-L-14,laion2b_s32b_b82k,61.111111111111114,50.0,50.0,50.0,60.90604026845638,50.0,50.0,50.0,52.75214392244594
30
+ OpenCLIP,ViT-B-32,laion2b_s34b_b79k,58.888888888888886,50.0,50.0,50.0,63.59060402684564,50.0,50.0,50.0,52.809936614466814
31
+ OpenCLIP,ViT-B-16,laion2b_s34b_b88k,63.333333333333336,50.0,50.0,50.0,58.89261744966443,50.0,50.0,50.0,52.77824384787472
32
+ OpenCLIP,ViT-g-14,laion2b_s34b_b88k,61.111111111111114,50.0,50.0,50.0,60.90604026845638,50.0,50.0,50.0,52.75214392244594
33
+ OpenCLIP,ViT-B-16,openai,61.111111111111114,50.0,50.0,50.0,59.22818791946309,50.0,50.0,50.0,52.54241237882177
34
+ OpenCLIP,ViT-B-32,openai,62.22222222222222,50.0,50.0,50.0,59.56375838926175,50.0,50.0,50.0,52.7232475764355
35
+ OpenCLIP,ViT-L-14,openai,58.888888888888886,50.0,50.0,50.0,58.22147651006712,50.0,50.0,50.0,52.1387956748695
36
+ OpenCLIP,ViT-L-14-336,openai,61.111111111111114,50.0,50.0,50.0,60.570469798657726,50.0,50.0,50.0,52.71019761372111
37
+ OpenCLIP,ViT-B-16-SigLIP,webli,50.0,50.0,50.0,50.0,60.738255033557046,50.0,50.0,50.0,51.34228187919463
38
+ OpenCLIP,ViT-B-16-SigLIP-384,webli,50.0,50.0,50.0,50.0,60.40268456375839,50.0,50.0,50.0,51.3003355704698
39
+ OpenCLIP,ViT-L-16-SigLIP-256,webli,54.44444444444444,50.0,50.0,50.0,60.40268456375839,50.0,50.0,50.0,51.855891126025355
40
+ OpenCLIP,ViT-L-16-SigLIP-384,webli,52.222222222222214,50.0,50.0,50.0,60.40268456375839,50.0,50.0,50.0,51.57811334824758
41
+ OpenCLIP,ViT-SO400M-14-SigLIP,webli,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
42
+ Recap-CLIP,ViT-L-16-HTxt-Recap-CLIP,recap-datacomp1b,60.0,52.819548872180455,50.0,50.0,63.758389261744966,57.8125,50.18115942028985,50.0,54.321449694276914
43
+ StructuredCLIP,NegCLIP-ViT-B-32,coco-ft,71.11111111111111,50.0,50.0,50.0,58.22147651006712,50.0,50.0,50.0,53.66657345264728
44
+ StructuredCLIP,CE-CLIP-ViT-B-32,coco-ft,68.88888888888889,50.0,50.0,50.0,61.241610738255034,50.0,50.0,50.0,53.76631245339299
45
+ StructuredCLIP,DAC-LLM-ViT-B-32,cc3m-ft,68.88888888888889,50.0,50.0,50.0,60.570469798657726,50.0,50.0,50.0,53.68241983594333
46
+ StructuredCLIP,DAC-SAM-ViT-B-32,cc3m-ft,63.333333333333336,50.0,50.0,50.0,54.194630872483216,50.0,50.0,50.0,52.19099552572707
data/250124/summary.csv ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "('Model', 'family')","('Model', 'model')","('Model', 'tag')","('length_group', 'XS')","('length_group', 'S')","('length_group', 'M')","('length_group', 'L')","('neg_type', 'relation')","('neg_type', 'attribute')","('Avg', 'Avg')"
2
+ ALIGN,align-base,coyo700m,60.883668903803134,50.0,50.0,50.0,51.66666666666667,53.7751677852349,52.720917225950785
3
+ CLIPS,CLIPS-Large-14-224,recap-datacomp1b,62.12527964205816,50.26041666666667,50.0,50.0,52.916666666666664,53.27618148769575,53.096424077181204
4
+ CLIPS,CLIPS-Large-14-336,recap-datacomp1b,59.45190156599553,50.072446741854634,50.0,50.0,51.98934837092732,52.772825782997764,52.38108707696254
5
+ CLIPS,CLIPS-Huge-14-224,recap-datacomp1b,60.02982848620432,49.551613408521305,50.0,50.0,50.60045948203843,54.19026146532438,52.395360473681414
6
+ DreamLIP,dreamlip-vitb16,cc3m-long,59.05853840417599,50.0,50.0,50.0,52.22222222222222,52.307046979865774,52.264634601043994
7
+ DreamLIP,dreamlip-vitb16,cc12m-long,57.004101416853096,50.0,50.0,50.0,51.111111111111114,52.39093959731544,51.751025354213276
8
+ DreamLIP,dreamlip-vitb16,yfcc15m-long,57.45525727069351,50.0,50.0,50.0,50.83333333333333,52.89429530201342,51.86381431767337
9
+ DreamLIP,dreamlip-vitb16,cc30m-long,63.033184190902304,50.0,50.0,50.0,51.94444444444444,54.57214765100671,53.25829604772558
10
+ FLAIR,flair-vitb16,cc3m-recap,64.63646532438479,50.0,50.0,50.0,53.333333333333336,53.98489932885906,53.6591163310962
11
+ FLAIR,flair-vitb16,cc12m-recap,62.02647278150634,50.0,50.0,50.0,51.94444444444444,54.06879194630872,53.00661819537658
12
+ FLAIR,flair-vitb16,yfcc15m-recap,62.7386278896346,50.0,50.0,50.0,53.05555555555556,53.31375838926175,53.18465697240865
13
+ FLAIR,flair-vitb16,cc30m-recap,62.362043251304996,50.0,50.0,50.0,51.94444444444444,54.236577181208055,53.09051081282625
14
+ FSC-CLIP,fsc-clip-ViT-B-32,laioncoco-ft,61.176360924683074,50.0,50.0,50.0,52.77777777777778,52.810402684563755,52.79409023117077
15
+ FSC-CLIP,fsc-clip-ViT-B-16,laioncoco-ft,59.22632363907532,50.0,50.0,50.0,52.22222222222222,52.39093959731544,52.306580909768826
16
+ FSC-CLIP,fsc-clip-ViT-L-14,laioncoco-ft,62.056301267710666,50.0,50.0,50.0,53.888888888888886,52.13926174496645,53.014075316927666
17
+ Jina-CLIP,jina-clip-v1,jinaai,63.77703206562267,59.40241228070175,56.93813540990699,59.803921568627445,55.76169719000457,64.19905347242485,59.98037533121472
18
+ Jina-CLIP,jina-clip-v2,jinaai,64.94593586875466,64.09480733082707,56.61799697166342,54.064542483660134,57.58474072399318,62.27690060345945,59.93082066372632
19
+ LoTLIP,LoTLIP-ViT-B-32,lotlip100m,66.72259507829978,53.81226503759399,50.09057971014492,50.0,54.11967418546366,56.193045727555685,55.156359956509675
20
+ LoTLIP,LoTLIP-ViT-B-16,lotlip100m,59.1648023862789,50.68726503759399,49.90942028985507,50.0,50.50856307435255,54.37218078251143,52.440371928431986
21
+ LongCLIP,longclip-vitb32,sharegpt4v-1m,51.347874720357936,53.75352443609023,55.00297425913909,50.0,50.0270265215277,55.02516018626593,52.52609335389681
22
+ LongCLIP,longclip-vitb16,sharegpt4v-1m,53.57568978374348,55.200501253132835,53.505299589011464,50.0,50.01111298145862,56.12963233148526,53.07037265647194
23
+ LongCLIP,longclip-vitl14,sharegpt4v-1m,59.55070842654736,60.68785244360902,56.06992212848799,50.0,57.519700993777974,55.63454050554421,56.57712074966109
24
+ LongCLIP,longclip-vitl14_336px,sharegpt4v-1m,57.87285607755406,59.41513941102757,53.89033095392602,50.0,56.733313382959885,53.855849838293935,55.294581610626906
25
+ OpenCLIP,roberta-ViT-B-32,laion2b_s12b_b32k,66.3758389261745,50.0,50.0,50.0,55.0,53.18791946308725,54.09395973154362
26
+ OpenCLIP,coca_ViT-B-32,laion2b_s13b_b90k,65.15473527218494,50.0,50.0,50.0,54.30555555555556,53.27181208053691,53.78868381804624
27
+ OpenCLIP,coca_ViT-L-14,laion2b_s13b_b90k,60.4269202087994,50.0,50.0,50.0,52.361111111111114,52.85234899328859,52.60673005219985
28
+ OpenCLIP,ViT-H-14,laion2b_s32b_b79k,66.69090231170767,50.0,50.0,50.0,54.44444444444444,53.901006711409394,54.17272557792692
29
+ OpenCLIP,ViT-L-14,laion2b_s32b_b82k,61.00857568978375,50.0,50.0,50.0,52.77777777777778,52.72651006711409,52.75214392244594
30
+ OpenCLIP,ViT-B-32,laion2b_s34b_b79k,61.23974645786726,50.0,50.0,50.0,52.22222222222222,53.39765100671141,52.809936614466814
31
+ OpenCLIP,ViT-B-16,laion2b_s34b_b88k,61.11297539149888,50.0,50.0,50.0,53.333333333333336,52.22315436241611,52.77824384787472
32
+ OpenCLIP,ViT-g-14,laion2b_s34b_b88k,61.00857568978375,50.0,50.0,50.0,52.77777777777778,52.72651006711409,52.75214392244594
33
+ OpenCLIP,ViT-B-16,openai,60.169649515287105,50.0,50.0,50.0,52.77777777777778,52.307046979865774,52.54241237882177
34
+ OpenCLIP,ViT-B-32,openai,60.89299030574199,50.0,50.0,50.0,53.05555555555556,52.39093959731544,52.7232475764355
35
+ OpenCLIP,ViT-L-14,openai,58.555182699478,50.0,50.0,50.0,52.22222222222222,52.055369127516784,52.1387956748695
36
+ OpenCLIP,ViT-L-14-336,openai,60.84079045488442,50.0,50.0,50.0,52.77777777777778,52.64261744966443,52.71019761372111
37
+ OpenCLIP,ViT-B-16-SigLIP,webli,55.36912751677852,50.0,50.0,50.0,50.0,52.68456375838926,51.34228187919463
38
+ OpenCLIP,ViT-B-16-SigLIP-384,webli,55.20134228187919,50.0,50.0,50.0,50.0,52.600671140939596,51.3003355704698
39
+ OpenCLIP,ViT-L-16-SigLIP-256,webli,57.42356450410142,50.0,50.0,50.0,51.111111111111114,52.600671140939596,51.855891126025355
40
+ OpenCLIP,ViT-L-16-SigLIP-384,webli,56.31245339299031,50.0,50.0,50.0,50.55555555555556,52.600671140939596,51.57811334824758
41
+ OpenCLIP,ViT-SO400M-14-SigLIP,webli,50.0,50.0,50.0,50.0,50.0,50.0,50.0
42
+ Recap-CLIP,ViT-L-16-HTxt-Recap-CLIP,recap-datacomp1b,61.87919463087248,55.31602443609023,50.09057971014492,50.0,53.204887218045116,55.438012170508706,54.321449694276914
43
+ StructuredCLIP,NegCLIP-ViT-B-32,coco-ft,64.66629381058911,50.0,50.0,50.0,55.27777777777778,52.055369127516784,53.66657345264728
44
+ StructuredCLIP,CE-CLIP-ViT-B-32,coco-ft,65.06524981357197,50.0,50.0,50.0,54.72222222222222,52.810402684563755,53.76631245339299
45
+ StructuredCLIP,DAC-LLM-ViT-B-32,cc3m-ft,64.72967934377331,50.0,50.0,50.0,54.72222222222222,52.64261744966443,53.68241983594333
46
+ StructuredCLIP,DAC-SAM-ViT-B-32,cc3m-ft,58.76398210290827,50.0,50.0,50.0,53.333333333333336,51.04865771812081,52.19099552572707
pages/overall_acc_250124.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ import streamlit as st
4
+
5
+ st.set_page_config(layout="wide")
6
+ SHORT_CAPTIONS = [
7
+ 'ALIGN:align-base:coyo700m', 'OpenCLIP:ViT-B-32:openai', 'OpenCLIP:ViT-B-16:openai',
8
+ 'OpenCLIP:ViT-L-14:openai', 'OpenCLIP:ViT-L-14-336:openai',
9
+ 'OpenCLIP:ViT-B-32:laion2b_s34b_b79k', 'OpenCLIP:ViT-B-16:laion2b_s34b_b88k',
10
+ 'OpenCLIP:ViT-L-14:laion2b_s32b_b82k', 'OpenCLIP:ViT-g-14:laion2b_s34b_b88k',
11
+ 'OpenCLIP:ViT-H-14:laion2b_s32b_b79k', 'OpenCLIP:roberta-ViT-B-32:laion2b_s12b_b32k',
12
+ 'OpenCLIP:ViT-B-16-SigLIP:webli', 'OpenCLIP:ViT-B-16-SigLIP-384:webli',
13
+ 'OpenCLIP:ViT-L-16-SigLIP-256:webli', 'OpenCLIP:ViT-L-16-SigLIP-384:webli',
14
+ 'OpenCLIP:ViT-SO400M-14-SigLIP:webli', 'OpenCLIP:coca_ViT-B-32:laion2b_s13b_b90k',
15
+ 'OpenCLIP:coca_ViT-L-14:laion2b_s13b_b90k'
16
+ ]
17
+ LONG_CAPTIONS = [
18
+ 'DreamLIP:dreamlip-vitb16:cc3m-long', 'DreamLIP:dreamlip-vitb16:cc12m-long',
19
+ 'DreamLIP:dreamlip-vitb16:yfcc15m-long', 'DreamLIP:dreamlip-vitb16:cc30m-long',
20
+ 'FLAIR:flair-vitb16:cc3m-recap', 'FLAIR:flair-vitb16:cc12m-recap',
21
+ 'FLAIR:flair-vitb16:yfcc15m-recap', 'FLAIR:flair-vitb16:cc30m-recap',
22
+ 'CLIPS:CLIPS-Large-14-224:recap-datacomp1b', 'CLIPS:CLIPS-Large-14-336:recap-datacomp1b',
23
+ 'CLIPS:CLIPS-Huge-14-224:recap-datacomp1b', 'LoTLIP:LoTLIP-ViT-B-32:lotlip100m',
24
+ 'LoTLIP:LoTLIP-ViT-B-16:lotlip100m', 'Recap-CLIP:ViT-L-16-HTxt-Recap-CLIP:recap-datacomp1b',
25
+ 'LongCLIP:longclip-vitb32:sharegpt4v-1m', 'LongCLIP:longclip-vitb16:sharegpt4v-1m',
26
+ 'LongCLIP:longclip-vitl14:sharegpt4v-1m', 'LongCLIP:longclip-vitl14_336px:sharegpt4v-1m',
27
+ 'Jina-CLIP:jina-clip-v1:jinaai', 'Jina-CLIP:jina-clip-v2:jinaai'
28
+ ]
29
+ COMPOSITIONALITY = [
30
+ "OpenCLIP:ViT-B-32:openai", 'StructuredCLIP:NegCLIP-ViT-B-32:coco-ft',
31
+ 'StructuredCLIP:CE-CLIP-ViT-B-32:coco-ft', 'StructuredCLIP:DAC-LLM-ViT-B-32:cc3m-ft',
32
+ 'StructuredCLIP:DAC-SAM-ViT-B-32:cc3m-ft', 'FSC-CLIP:fsc-clip-ViT-B-32:laioncoco-ft',
33
+ 'FSC-CLIP:fsc-clip-ViT-B-16:laioncoco-ft', 'FSC-CLIP:fsc-clip-ViT-L-14:laioncoco-ft'
34
+ ]
35
+
36
+ DECODERS = [
37
+ 'vqascore:instructblip-flant5-xl:none', 'vqascore:clip-flant5-xl:none',
38
+ 'vqascore:llava-v1.5-7b:none', 'vqascore:sharegpt4v-7b:none',
39
+ 'visualgptscore:instructblip-flant5-xl:none', 'visualgptscore:clip-flant5-xl:none',
40
+ 'visualgptscore:llava-v1.5-7b:none', 'visualgptscore:sharegpt4v-7b:none'
41
+ ]
42
+
43
+ MODEL_GROUPS = {
44
+ "short_captions": SHORT_CAPTIONS,
45
+ "long_captions": LONG_CAPTIONS,
46
+ "compositionality": COMPOSITIONALITY
47
+ }
48
+
49
+
50
+ def format_df(df):
51
+ cols = []
52
+ for col in df.columns:
53
+ if col in ["family", "model", "tag"]:
54
+ continue
55
+ cols.append(col)
56
+ formatted_df = df.style.format({col: "{:.1f}" for col in cols})
57
+ return formatted_df
58
+
59
+
60
+ def print_table_overall(df, model_names):
61
+ named_rows = df[["family", "model", "tag"]].apply(lambda row: ":".join(row), axis=1)
62
+ new_rows = []
63
+ for name in model_names:
64
+ new_rows.append(df[named_rows == name])
65
+ new_rows = format_df(pd.concat(new_rows, axis=0))
66
+ st.table(new_rows)
67
+
68
+
69
+ # Streamlit app
70
+ def main():
71
+ st.title("Interface")
72
+ df = pd.read_csv("data/250124/overall.csv")
73
+ for group, model_names in MODEL_GROUPS.items():
74
+ st.markdown(f"## {group} models")
75
+ print_table_overall(df, model_names)
76
+
77
+ df = pd.read_csv("data/250124/decoder_overall.csv")
78
+ st.markdown("## Decoder-based models")
79
+ print_table_overall(df, DECODERS)
80
+
81
+
82
+ if __name__ == "__main__":
83
+ main()
pages/summary_acc_250124.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+
3
+ import pandas as pd
4
+
5
+ import streamlit as st
6
+
7
+ st.set_page_config(layout="wide")
8
+ SHORT_CAPTIONS = [
9
+ 'ALIGN:align-base:coyo700m', 'OpenCLIP:ViT-B-32:openai', 'OpenCLIP:ViT-B-16:openai',
10
+ 'OpenCLIP:ViT-L-14:openai', 'OpenCLIP:ViT-L-14-336:openai',
11
+ 'OpenCLIP:ViT-B-32:laion2b_s34b_b79k', 'OpenCLIP:ViT-B-16:laion2b_s34b_b88k',
12
+ 'OpenCLIP:ViT-L-14:laion2b_s32b_b82k', 'OpenCLIP:ViT-g-14:laion2b_s34b_b88k',
13
+ 'OpenCLIP:ViT-H-14:laion2b_s32b_b79k', 'OpenCLIP:roberta-ViT-B-32:laion2b_s12b_b32k',
14
+ 'OpenCLIP:ViT-B-16-SigLIP:webli', 'OpenCLIP:ViT-B-16-SigLIP-384:webli',
15
+ 'OpenCLIP:ViT-L-16-SigLIP-256:webli', 'OpenCLIP:ViT-L-16-SigLIP-384:webli',
16
+ 'OpenCLIP:ViT-SO400M-14-SigLIP:webli', 'OpenCLIP:coca_ViT-B-32:laion2b_s13b_b90k',
17
+ 'OpenCLIP:coca_ViT-L-14:laion2b_s13b_b90k'
18
+ ]
19
+ LONG_CAPTIONS = [
20
+ 'DreamLIP:dreamlip-vitb16:cc3m-long', 'DreamLIP:dreamlip-vitb16:cc12m-long',
21
+ 'DreamLIP:dreamlip-vitb16:yfcc15m-long', 'DreamLIP:dreamlip-vitb16:cc30m-long',
22
+ "FLAIR:flair-vitb16:cc3m-recap", "FLAIR:flair-vitb16:cc12m-recap",
23
+ "FLAIR:flair-vitb16:yfcc15m-recap", "FLAIR:flair-vitb16:cc30m-recap",
24
+ 'CLIPS:CLIPS-Large-14-224:recap-datacomp1b', 'CLIPS:CLIPS-Large-14-336:recap-datacomp1b',
25
+ 'CLIPS:CLIPS-Huge-14-224:recap-datacomp1b', 'LoTLIP:LoTLIP-ViT-B-32:lotlip100m',
26
+ 'LoTLIP:LoTLIP-ViT-B-16:lotlip100m', 'Recap-CLIP:ViT-L-16-HTxt-Recap-CLIP:recap-datacomp1b',
27
+ 'LongCLIP:longclip-vitb32:sharegpt4v-1m', 'LongCLIP:longclip-vitb16:sharegpt4v-1m',
28
+ 'LongCLIP:longclip-vitl14:sharegpt4v-1m', 'LongCLIP:longclip-vitl14_336px:sharegpt4v-1m',
29
+ 'Jina-CLIP:jina-clip-v1:jinaai', 'Jina-CLIP:jina-clip-v2:jinaai'
30
+ ]
31
+ COMPOSITIONALITY = [
32
+ 'OpenCLIP:ViT-B-32:openai', 'StructuredCLIP:NegCLIP-ViT-B-32:coco-ft',
33
+ 'StructuredCLIP:CE-CLIP-ViT-B-32:coco-ft', 'StructuredCLIP:DAC-LLM-ViT-B-32:cc3m-ft',
34
+ 'StructuredCLIP:DAC-SAM-ViT-B-32:cc3m-ft', 'FSC-CLIP:fsc-clip-ViT-B-32:laioncoco-ft',
35
+ 'FSC-CLIP:fsc-clip-ViT-B-16:laioncoco-ft', 'FSC-CLIP:fsc-clip-ViT-L-14:laioncoco-ft'
36
+ ]
37
+
38
+ DECODERS = [
39
+ 'vqascore:instructblip-flant5-xl:none', 'vqascore:clip-flant5-xl:none',
40
+ 'vqascore:llava-v1.5-7b:none', 'vqascore:sharegpt4v-7b:none',
41
+ 'visualgptscore:instructblip-flant5-xl:none', 'visualgptscore:clip-flant5-xl:none',
42
+ 'visualgptscore:llava-v1.5-7b:none', 'visualgptscore:sharegpt4v-7b:none'
43
+ ]
44
+
45
+ MODEL_GROUPS = {
46
+ "short_captions": SHORT_CAPTIONS,
47
+ "long_captions": LONG_CAPTIONS,
48
+ "compositionality": COMPOSITIONALITY
49
+ }
50
+
51
+
52
+ def render_mi_table(df, level0_cols):
53
+ # HTML 스타일 정의
54
+ table_style = """
55
+ <style>
56
+ table {
57
+ width: 100%;
58
+ border-collapse: collapse;
59
+ }
60
+ th, td {
61
+ border: 1px solid black;
62
+ text-align: center;
63
+ padding: 8px;
64
+ }
65
+ th {
66
+ background-color: #262730;
67
+ }
68
+ </style>
69
+ """
70
+
71
+ # 상위 헤더 (레벨 0)
72
+ header_html = "<tr>"
73
+ for col in level0_cols:
74
+ colspan = len(df.xs(col, axis=1, level=0).columns) if col else 1
75
+ header_html += f'<th colspan="{colspan}" style="text-align: center;">{col if col else ""}</th>'
76
+ header_html += "</tr>"
77
+
78
+ # 하위 헤더 (레벨 1)
79
+ sub_header_html = "<tr>"
80
+ for col in df.columns:
81
+ sub_header_html += f"<th style='text-align: center;'>{col[1] if len(col) > 1 else col[0]}</th>"
82
+ sub_header_html += "</tr>"
83
+
84
+ # 데이터 HTML 생성
85
+ def map_val(value):
86
+ try:
87
+ value = f"{float(value):.1f}"
88
+ except:
89
+ value = value
90
+ return value
91
+
92
+ rows_html = ""
93
+ for _, row in df.iterrows():
94
+
95
+ rows_html += "<tr>" + "".join(f"<td>{map_val(value)}</td>" for value in row) + "</tr>"
96
+
97
+ # 최종 HTML 합치기
98
+ table_html = f"""
99
+ {table_style}
100
+ <table>
101
+ {header_html}
102
+ {sub_header_html}
103
+ {rows_html}
104
+ </table>
105
+ """
106
+ return table_html
107
+
108
+
109
+ def format_df(df):
110
+ cols = []
111
+ for col in df.columns:
112
+ if col in [("Model", "family"), ("Model", "model"), ("Model", "tag")]:
113
+ continue
114
+ cols.append(col)
115
+ formatted_df = df.style.format({col: "{:.1f}" for col in cols})
116
+ return formatted_df
117
+
118
+
119
+ def print_table(df):
120
+ level0_cols = []
121
+ for col in df.columns:
122
+ if col[0] not in level0_cols:
123
+ level0_cols.append(col[0])
124
+ st.markdown(render_mi_table(df, level0_cols), unsafe_allow_html=True)
125
+
126
+
127
+ def print_table_summary(df, model_names):
128
+ columns = [("Model", "family"), ("Model", "model"), ("Model", "tag")]
129
+ named_rows = df[columns].apply(lambda row: ":".join(row), axis=1)
130
+ new_rows = []
131
+ for name in model_names:
132
+ new_rows.append(df[named_rows == name])
133
+ new_rows = pd.concat(new_rows, axis=0)
134
+ new_rows.columns = pd.MultiIndex.from_tuples(new_rows.columns)
135
+ print_table(new_rows)
136
+
137
+
138
+ # Streamlit app
139
+ def main():
140
+ st.title("Interface")
141
+ df = pd.read_csv("data/250124/summary.csv")
142
+ df.columns = [ast.literal_eval(col) for col in df.columns]
143
+ for group, model_names in MODEL_GROUPS.items():
144
+ st.markdown(f"## {group} models")
145
+ if group == "short_captions":
146
+ pass
147
+
148
+ if group == "long_captions":
149
+ pass
150
+ if group == "compositionality":
151
+ pass
152
+ print_table_summary(df, model_names)
153
+
154
+ df = pd.read_csv("data/250124/decoder_summary.csv")
155
+ df.columns = [ast.literal_eval(col) for col in df.columns]
156
+ st.markdown("## Decoder-based models")
157
+ print_table_summary(df, DECODERS)
158
+
159
+
160
+ if __name__ == "__main__":
161
+ main()