add relation, attribute sentences
Browse files- data/250124/decoder_overall.csv +9 -0
- data/250124/decoder_summary.csv +9 -0
- data/250124/overall.csv +46 -0
- data/250124/summary.csv +46 -0
- pages/overall_acc_250124.py +83 -0
- pages/summary_acc_250124.py +161 -0
data/250124/decoder_overall.csv
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
family,model,tag,add_relation_XS,add_relation_S,add_relation_M,add_relation_L,add_attribute_XS,add_attribute_S,add_attribute_M,add_attribute_L,Avg
|
| 2 |
+
vqascore,instructblip-flant5-xl,none,61.111111111111114,60.902255639097746,50.0,50.0,54.0268456375839,48.4375,50.0,50.0,53.05971404847409
|
| 3 |
+
vqascore,clip-flant5-xl,none,50.55555555555556,49.81203007518798,45.52238805970149,34.92647058823529,57.718120805369125,53.38541666666667,58.33333333333333,54.44444444444444,50.58721994106173
|
| 4 |
+
vqascore,llava-v1.5-7b,none,62.77777777777777,50.18796992481202,50.29850746268657,56.98529411764706,59.060402684563755,54.42708333333333,57.608695652173914,53.33333333333333,55.58488303579097
|
| 5 |
+
vqascore,sharegpt4v-7b,none,53.333333333333336,51.691729323308266,60.44776119402985,68.75,59.22818791946309,49.73958333333333,59.42028985507247,63.33333333333333,58.2430272864842
|
| 6 |
+
visualgptscore,instructblip-flant5-xl,none,30.0,24.06015037593985,34.62686567164179,50.0,24.832214765100673,27.083333333333332,38.22463768115942,50.0,34.853400228396886
|
| 7 |
+
visualgptscore,clip-flant5-xl,none,34.44444444444444,23.684210526315788,22.388059701492537,22.794117647058822,25.503355704697988,29.166666666666668,40.57971014492754,33.33333333333333,28.986737271117136
|
| 8 |
+
visualgptscore,llava-v1.5-7b,none,20.0,16.541353383458645,11.641791044776118,10.294117647058822,23.48993288590604,25.0,36.231884057971016,28.888888888888886,21.510995988507442
|
| 9 |
+
visualgptscore,sharegpt4v-7b,none,21.11111111111111,18.421052631578945,14.328358208955224,11.76470588235294,22.818791946308725,26.041666666666668,35.14492753623188,33.33333333333333,22.870493414567356
|
data/250124/decoder_summary.csv
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"('Model', 'family')","('Model', 'model')","('Model', 'tag')","('length_group', 'XS')","('length_group', 'S')","('length_group', 'M')","('length_group', 'L')","('neg_type', 'relation')","('neg_type', 'attribute')","('Avg', 'Avg')"
|
| 2 |
+
vqascore,instructblip-flant5-xl,none,57.56897837434751,54.66987781954887,50.0,50.0,55.503341687552215,50.616086409395976,53.05971404847409
|
| 3 |
+
vqascore,clip-flant5-xl,none,54.13683818046234,51.598723370927324,51.92786069651741,44.685457516339866,45.20411106967008,55.97032881245339,50.58721994106173
|
| 4 |
+
vqascore,llava-v1.5-7b,none,60.91909023117076,52.307526629072676,53.95360155743025,55.15931372549019,55.06238732073086,56.10737875085108,55.58488303579097
|
| 5 |
+
vqascore,sharegpt4v-7b,none,56.28076062639821,50.7156563283208,59.93402552455116,66.04166666666666,58.555705962667865,57.930348610300555,58.2430272864842
|
| 6 |
+
visualgptscore,instructblip-flant5-xl,none,27.416107382550337,25.571741854636592,36.42575167640061,50.0,34.67175401189541,35.035046444898356,34.853400228396886
|
| 7 |
+
visualgptscore,clip-flant5-xl,none,29.973900074571215,26.425438596491226,31.483884923210038,28.063725490196077,25.827708079827897,32.145766462406385,28.986737271117136
|
| 8 |
+
visualgptscore,llava-v1.5-7b,none,21.74496644295302,20.77067669172932,23.936837551373568,19.591503267973856,14.619315518823395,28.402676458191486,21.510995988507442
|
| 9 |
+
visualgptscore,sharegpt4v-7b,none,21.964951528709918,22.23135964912281,24.736642872593553,22.549019607843135,16.406306958499556,29.334679870635153,22.870493414567356
|
data/250124/overall.csv
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
family,model,tag,add_relation_XS,add_relation_S,add_relation_M,add_relation_L,add_attribute_XS,add_attribute_S,add_attribute_M,add_attribute_L,Avg
|
| 2 |
+
ALIGN,align-base,coyo700m,56.66666666666667,50.0,50.0,50.0,65.1006711409396,50.0,50.0,50.0,52.720917225950785
|
| 3 |
+
CLIPS,CLIPS-Large-14-224,recap-datacomp1b,61.666666666666664,50.0,50.0,50.0,62.58389261744966,50.520833333333336,50.0,50.0,53.096424077181204
|
| 4 |
+
CLIPS,CLIPS-Large-14-336,recap-datacomp1b,58.33333333333333,49.62406015037594,50.0,50.0,60.57046979865772,50.520833333333336,50.0,50.0,52.38108707696254
|
| 5 |
+
CLIPS,CLIPS-Huge-14-224,recap-datacomp1b,52.77777777777777,49.62406015037594,50.0,50.0,67.28187919463087,49.47916666666667,50.0,50.0,52.395360473681414
|
| 6 |
+
DreamLIP,dreamlip-vitb16,cc3m-long,58.888888888888886,50.0,50.0,50.0,59.22818791946309,50.0,50.0,50.0,52.264634601043994
|
| 7 |
+
DreamLIP,dreamlip-vitb16,cc12m-long,54.44444444444444,50.0,50.0,50.0,59.56375838926175,50.0,50.0,50.0,51.751025354213276
|
| 8 |
+
DreamLIP,dreamlip-vitb16,yfcc15m-long,53.33333333333333,50.0,50.0,50.0,61.57718120805369,50.0,50.0,50.0,51.86381431767337
|
| 9 |
+
DreamLIP,dreamlip-vitb16,cc30m-long,57.77777777777778,50.0,50.0,50.0,68.28859060402684,50.0,50.0,50.0,53.25829604772558
|
| 10 |
+
FLAIR,flair-vitb16,cc3m-recap,63.333333333333336,50.0,50.0,50.0,65.93959731543623,50.0,50.0,50.0,53.6591163310962
|
| 11 |
+
FLAIR,flair-vitb16,cc12m-recap,57.77777777777778,50.0,50.0,50.0,66.2751677852349,50.0,50.0,50.0,53.00661819537658
|
| 12 |
+
FLAIR,flair-vitb16,yfcc15m-recap,62.22222222222222,50.0,50.0,50.0,63.255033557046985,50.0,50.0,50.0,53.18465697240865
|
| 13 |
+
FLAIR,flair-vitb16,cc30m-recap,57.77777777777778,50.0,50.0,50.0,66.94630872483222,50.0,50.0,50.0,53.09051081282625
|
| 14 |
+
FSC-CLIP,fsc-clip-ViT-B-32,laioncoco-ft,61.111111111111114,50.0,50.0,50.0,61.241610738255034,50.0,50.0,50.0,52.79409023117077
|
| 15 |
+
FSC-CLIP,fsc-clip-ViT-B-16,laioncoco-ft,58.888888888888886,50.0,50.0,50.0,59.56375838926175,50.0,50.0,50.0,52.306580909768826
|
| 16 |
+
FSC-CLIP,fsc-clip-ViT-L-14,laioncoco-ft,65.55555555555556,50.0,50.0,50.0,58.557046979865774,50.0,50.0,50.0,53.014075316927666
|
| 17 |
+
Jina-CLIP,jina-clip-v1,jinaai,61.111111111111114,55.26315789473685,53.73134328358209,52.94117647058824,66.44295302013423,63.541666666666664,60.14492753623188,66.66666666666666,59.98037533121472
|
| 18 |
+
Jina-CLIP,jina-clip-v2,jinaai,62.77777777777778,63.34586466165413,51.64179104477612,52.5735294117647,67.11409395973155,64.84375,61.594202898550726,55.55555555555556,59.93082066372632
|
| 19 |
+
LoTLIP,LoTLIP-ViT-B-32,lotlip100m,66.66666666666666,49.81203007518797,50.0,50.0,66.77852348993288,57.8125,50.18115942028985,50.0,55.156359956509675
|
| 20 |
+
LoTLIP,LoTLIP-ViT-B-16,lotlip100m,52.22222222222223,49.81203007518797,50.0,50.0,66.10738255033557,51.5625,49.81884057971014,50.0,52.440371928431986
|
| 21 |
+
LongCLIP,longclip-vitb32,sharegpt4v-1m,48.33333333333333,52.819548872180455,48.95522388059702,50.0,54.36241610738255,54.6875,61.05072463768116,50.0,52.52609335389681
|
| 22 |
+
LongCLIP,longclip-vitb16,sharegpt4v-1m,51.111111111111114,52.067669172932334,46.865671641791046,50.0,56.04026845637584,58.333333333333336,60.14492753623188,50.0,53.07037265647194
|
| 23 |
+
LongCLIP,longclip-vitl14,sharegpt4v-1m,62.22222222222223,62.78195488721804,55.07462686567163,50.0,56.87919463087249,58.59375,57.06521739130435,50.0,56.57712074966109
|
| 24 |
+
LongCLIP,longclip-vitl14_336px,sharegpt4v-1m,62.22222222222222,61.278195488721806,53.43283582089552,50.0,53.52348993288591,57.55208333333333,54.347826086956516,50.0,55.294581610626906
|
| 25 |
+
OpenCLIP,roberta-ViT-B-32,laion2b_s12b_b32k,70.0,50.0,50.0,50.0,62.75167785234899,50.0,50.0,50.0,54.09395973154362
|
| 26 |
+
OpenCLIP,coca_ViT-B-32,laion2b_s13b_b90k,67.22222222222223,50.0,50.0,50.0,63.08724832214765,50.0,50.0,50.0,53.78868381804624
|
| 27 |
+
OpenCLIP,coca_ViT-L-14,laion2b_s13b_b90k,59.44444444444444,50.0,50.0,50.0,61.40939597315436,50.0,50.0,50.0,52.60673005219985
|
| 28 |
+
OpenCLIP,ViT-H-14,laion2b_s32b_b79k,67.77777777777777,50.0,50.0,50.0,65.60402684563758,50.0,50.0,50.0,54.17272557792692
|
| 29 |
+
OpenCLIP,ViT-L-14,laion2b_s32b_b82k,61.111111111111114,50.0,50.0,50.0,60.90604026845638,50.0,50.0,50.0,52.75214392244594
|
| 30 |
+
OpenCLIP,ViT-B-32,laion2b_s34b_b79k,58.888888888888886,50.0,50.0,50.0,63.59060402684564,50.0,50.0,50.0,52.809936614466814
|
| 31 |
+
OpenCLIP,ViT-B-16,laion2b_s34b_b88k,63.333333333333336,50.0,50.0,50.0,58.89261744966443,50.0,50.0,50.0,52.77824384787472
|
| 32 |
+
OpenCLIP,ViT-g-14,laion2b_s34b_b88k,61.111111111111114,50.0,50.0,50.0,60.90604026845638,50.0,50.0,50.0,52.75214392244594
|
| 33 |
+
OpenCLIP,ViT-B-16,openai,61.111111111111114,50.0,50.0,50.0,59.22818791946309,50.0,50.0,50.0,52.54241237882177
|
| 34 |
+
OpenCLIP,ViT-B-32,openai,62.22222222222222,50.0,50.0,50.0,59.56375838926175,50.0,50.0,50.0,52.7232475764355
|
| 35 |
+
OpenCLIP,ViT-L-14,openai,58.888888888888886,50.0,50.0,50.0,58.22147651006712,50.0,50.0,50.0,52.1387956748695
|
| 36 |
+
OpenCLIP,ViT-L-14-336,openai,61.111111111111114,50.0,50.0,50.0,60.570469798657726,50.0,50.0,50.0,52.71019761372111
|
| 37 |
+
OpenCLIP,ViT-B-16-SigLIP,webli,50.0,50.0,50.0,50.0,60.738255033557046,50.0,50.0,50.0,51.34228187919463
|
| 38 |
+
OpenCLIP,ViT-B-16-SigLIP-384,webli,50.0,50.0,50.0,50.0,60.40268456375839,50.0,50.0,50.0,51.3003355704698
|
| 39 |
+
OpenCLIP,ViT-L-16-SigLIP-256,webli,54.44444444444444,50.0,50.0,50.0,60.40268456375839,50.0,50.0,50.0,51.855891126025355
|
| 40 |
+
OpenCLIP,ViT-L-16-SigLIP-384,webli,52.222222222222214,50.0,50.0,50.0,60.40268456375839,50.0,50.0,50.0,51.57811334824758
|
| 41 |
+
OpenCLIP,ViT-SO400M-14-SigLIP,webli,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
|
| 42 |
+
Recap-CLIP,ViT-L-16-HTxt-Recap-CLIP,recap-datacomp1b,60.0,52.819548872180455,50.0,50.0,63.758389261744966,57.8125,50.18115942028985,50.0,54.321449694276914
|
| 43 |
+
StructuredCLIP,NegCLIP-ViT-B-32,coco-ft,71.11111111111111,50.0,50.0,50.0,58.22147651006712,50.0,50.0,50.0,53.66657345264728
|
| 44 |
+
StructuredCLIP,CE-CLIP-ViT-B-32,coco-ft,68.88888888888889,50.0,50.0,50.0,61.241610738255034,50.0,50.0,50.0,53.76631245339299
|
| 45 |
+
StructuredCLIP,DAC-LLM-ViT-B-32,cc3m-ft,68.88888888888889,50.0,50.0,50.0,60.570469798657726,50.0,50.0,50.0,53.68241983594333
|
| 46 |
+
StructuredCLIP,DAC-SAM-ViT-B-32,cc3m-ft,63.333333333333336,50.0,50.0,50.0,54.194630872483216,50.0,50.0,50.0,52.19099552572707
|
data/250124/summary.csv
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"('Model', 'family')","('Model', 'model')","('Model', 'tag')","('length_group', 'XS')","('length_group', 'S')","('length_group', 'M')","('length_group', 'L')","('neg_type', 'relation')","('neg_type', 'attribute')","('Avg', 'Avg')"
|
| 2 |
+
ALIGN,align-base,coyo700m,60.883668903803134,50.0,50.0,50.0,51.66666666666667,53.7751677852349,52.720917225950785
|
| 3 |
+
CLIPS,CLIPS-Large-14-224,recap-datacomp1b,62.12527964205816,50.26041666666667,50.0,50.0,52.916666666666664,53.27618148769575,53.096424077181204
|
| 4 |
+
CLIPS,CLIPS-Large-14-336,recap-datacomp1b,59.45190156599553,50.072446741854634,50.0,50.0,51.98934837092732,52.772825782997764,52.38108707696254
|
| 5 |
+
CLIPS,CLIPS-Huge-14-224,recap-datacomp1b,60.02982848620432,49.551613408521305,50.0,50.0,50.60045948203843,54.19026146532438,52.395360473681414
|
| 6 |
+
DreamLIP,dreamlip-vitb16,cc3m-long,59.05853840417599,50.0,50.0,50.0,52.22222222222222,52.307046979865774,52.264634601043994
|
| 7 |
+
DreamLIP,dreamlip-vitb16,cc12m-long,57.004101416853096,50.0,50.0,50.0,51.111111111111114,52.39093959731544,51.751025354213276
|
| 8 |
+
DreamLIP,dreamlip-vitb16,yfcc15m-long,57.45525727069351,50.0,50.0,50.0,50.83333333333333,52.89429530201342,51.86381431767337
|
| 9 |
+
DreamLIP,dreamlip-vitb16,cc30m-long,63.033184190902304,50.0,50.0,50.0,51.94444444444444,54.57214765100671,53.25829604772558
|
| 10 |
+
FLAIR,flair-vitb16,cc3m-recap,64.63646532438479,50.0,50.0,50.0,53.333333333333336,53.98489932885906,53.6591163310962
|
| 11 |
+
FLAIR,flair-vitb16,cc12m-recap,62.02647278150634,50.0,50.0,50.0,51.94444444444444,54.06879194630872,53.00661819537658
|
| 12 |
+
FLAIR,flair-vitb16,yfcc15m-recap,62.7386278896346,50.0,50.0,50.0,53.05555555555556,53.31375838926175,53.18465697240865
|
| 13 |
+
FLAIR,flair-vitb16,cc30m-recap,62.362043251304996,50.0,50.0,50.0,51.94444444444444,54.236577181208055,53.09051081282625
|
| 14 |
+
FSC-CLIP,fsc-clip-ViT-B-32,laioncoco-ft,61.176360924683074,50.0,50.0,50.0,52.77777777777778,52.810402684563755,52.79409023117077
|
| 15 |
+
FSC-CLIP,fsc-clip-ViT-B-16,laioncoco-ft,59.22632363907532,50.0,50.0,50.0,52.22222222222222,52.39093959731544,52.306580909768826
|
| 16 |
+
FSC-CLIP,fsc-clip-ViT-L-14,laioncoco-ft,62.056301267710666,50.0,50.0,50.0,53.888888888888886,52.13926174496645,53.014075316927666
|
| 17 |
+
Jina-CLIP,jina-clip-v1,jinaai,63.77703206562267,59.40241228070175,56.93813540990699,59.803921568627445,55.76169719000457,64.19905347242485,59.98037533121472
|
| 18 |
+
Jina-CLIP,jina-clip-v2,jinaai,64.94593586875466,64.09480733082707,56.61799697166342,54.064542483660134,57.58474072399318,62.27690060345945,59.93082066372632
|
| 19 |
+
LoTLIP,LoTLIP-ViT-B-32,lotlip100m,66.72259507829978,53.81226503759399,50.09057971014492,50.0,54.11967418546366,56.193045727555685,55.156359956509675
|
| 20 |
+
LoTLIP,LoTLIP-ViT-B-16,lotlip100m,59.1648023862789,50.68726503759399,49.90942028985507,50.0,50.50856307435255,54.37218078251143,52.440371928431986
|
| 21 |
+
LongCLIP,longclip-vitb32,sharegpt4v-1m,51.347874720357936,53.75352443609023,55.00297425913909,50.0,50.0270265215277,55.02516018626593,52.52609335389681
|
| 22 |
+
LongCLIP,longclip-vitb16,sharegpt4v-1m,53.57568978374348,55.200501253132835,53.505299589011464,50.0,50.01111298145862,56.12963233148526,53.07037265647194
|
| 23 |
+
LongCLIP,longclip-vitl14,sharegpt4v-1m,59.55070842654736,60.68785244360902,56.06992212848799,50.0,57.519700993777974,55.63454050554421,56.57712074966109
|
| 24 |
+
LongCLIP,longclip-vitl14_336px,sharegpt4v-1m,57.87285607755406,59.41513941102757,53.89033095392602,50.0,56.733313382959885,53.855849838293935,55.294581610626906
|
| 25 |
+
OpenCLIP,roberta-ViT-B-32,laion2b_s12b_b32k,66.3758389261745,50.0,50.0,50.0,55.0,53.18791946308725,54.09395973154362
|
| 26 |
+
OpenCLIP,coca_ViT-B-32,laion2b_s13b_b90k,65.15473527218494,50.0,50.0,50.0,54.30555555555556,53.27181208053691,53.78868381804624
|
| 27 |
+
OpenCLIP,coca_ViT-L-14,laion2b_s13b_b90k,60.4269202087994,50.0,50.0,50.0,52.361111111111114,52.85234899328859,52.60673005219985
|
| 28 |
+
OpenCLIP,ViT-H-14,laion2b_s32b_b79k,66.69090231170767,50.0,50.0,50.0,54.44444444444444,53.901006711409394,54.17272557792692
|
| 29 |
+
OpenCLIP,ViT-L-14,laion2b_s32b_b82k,61.00857568978375,50.0,50.0,50.0,52.77777777777778,52.72651006711409,52.75214392244594
|
| 30 |
+
OpenCLIP,ViT-B-32,laion2b_s34b_b79k,61.23974645786726,50.0,50.0,50.0,52.22222222222222,53.39765100671141,52.809936614466814
|
| 31 |
+
OpenCLIP,ViT-B-16,laion2b_s34b_b88k,61.11297539149888,50.0,50.0,50.0,53.333333333333336,52.22315436241611,52.77824384787472
|
| 32 |
+
OpenCLIP,ViT-g-14,laion2b_s34b_b88k,61.00857568978375,50.0,50.0,50.0,52.77777777777778,52.72651006711409,52.75214392244594
|
| 33 |
+
OpenCLIP,ViT-B-16,openai,60.169649515287105,50.0,50.0,50.0,52.77777777777778,52.307046979865774,52.54241237882177
|
| 34 |
+
OpenCLIP,ViT-B-32,openai,60.89299030574199,50.0,50.0,50.0,53.05555555555556,52.39093959731544,52.7232475764355
|
| 35 |
+
OpenCLIP,ViT-L-14,openai,58.555182699478,50.0,50.0,50.0,52.22222222222222,52.055369127516784,52.1387956748695
|
| 36 |
+
OpenCLIP,ViT-L-14-336,openai,60.84079045488442,50.0,50.0,50.0,52.77777777777778,52.64261744966443,52.71019761372111
|
| 37 |
+
OpenCLIP,ViT-B-16-SigLIP,webli,55.36912751677852,50.0,50.0,50.0,50.0,52.68456375838926,51.34228187919463
|
| 38 |
+
OpenCLIP,ViT-B-16-SigLIP-384,webli,55.20134228187919,50.0,50.0,50.0,50.0,52.600671140939596,51.3003355704698
|
| 39 |
+
OpenCLIP,ViT-L-16-SigLIP-256,webli,57.42356450410142,50.0,50.0,50.0,51.111111111111114,52.600671140939596,51.855891126025355
|
| 40 |
+
OpenCLIP,ViT-L-16-SigLIP-384,webli,56.31245339299031,50.0,50.0,50.0,50.55555555555556,52.600671140939596,51.57811334824758
|
| 41 |
+
OpenCLIP,ViT-SO400M-14-SigLIP,webli,50.0,50.0,50.0,50.0,50.0,50.0,50.0
|
| 42 |
+
Recap-CLIP,ViT-L-16-HTxt-Recap-CLIP,recap-datacomp1b,61.87919463087248,55.31602443609023,50.09057971014492,50.0,53.204887218045116,55.438012170508706,54.321449694276914
|
| 43 |
+
StructuredCLIP,NegCLIP-ViT-B-32,coco-ft,64.66629381058911,50.0,50.0,50.0,55.27777777777778,52.055369127516784,53.66657345264728
|
| 44 |
+
StructuredCLIP,CE-CLIP-ViT-B-32,coco-ft,65.06524981357197,50.0,50.0,50.0,54.72222222222222,52.810402684563755,53.76631245339299
|
| 45 |
+
StructuredCLIP,DAC-LLM-ViT-B-32,cc3m-ft,64.72967934377331,50.0,50.0,50.0,54.72222222222222,52.64261744966443,53.68241983594333
|
| 46 |
+
StructuredCLIP,DAC-SAM-ViT-B-32,cc3m-ft,58.76398210290827,50.0,50.0,50.0,53.333333333333336,51.04865771812081,52.19099552572707
|
pages/overall_acc_250124.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
st.set_page_config(layout="wide")
|
| 6 |
+
SHORT_CAPTIONS = [
|
| 7 |
+
'ALIGN:align-base:coyo700m', 'OpenCLIP:ViT-B-32:openai', 'OpenCLIP:ViT-B-16:openai',
|
| 8 |
+
'OpenCLIP:ViT-L-14:openai', 'OpenCLIP:ViT-L-14-336:openai',
|
| 9 |
+
'OpenCLIP:ViT-B-32:laion2b_s34b_b79k', 'OpenCLIP:ViT-B-16:laion2b_s34b_b88k',
|
| 10 |
+
'OpenCLIP:ViT-L-14:laion2b_s32b_b82k', 'OpenCLIP:ViT-g-14:laion2b_s34b_b88k',
|
| 11 |
+
'OpenCLIP:ViT-H-14:laion2b_s32b_b79k', 'OpenCLIP:roberta-ViT-B-32:laion2b_s12b_b32k',
|
| 12 |
+
'OpenCLIP:ViT-B-16-SigLIP:webli', 'OpenCLIP:ViT-B-16-SigLIP-384:webli',
|
| 13 |
+
'OpenCLIP:ViT-L-16-SigLIP-256:webli', 'OpenCLIP:ViT-L-16-SigLIP-384:webli',
|
| 14 |
+
'OpenCLIP:ViT-SO400M-14-SigLIP:webli', 'OpenCLIP:coca_ViT-B-32:laion2b_s13b_b90k',
|
| 15 |
+
'OpenCLIP:coca_ViT-L-14:laion2b_s13b_b90k'
|
| 16 |
+
]
|
| 17 |
+
LONG_CAPTIONS = [
|
| 18 |
+
'DreamLIP:dreamlip-vitb16:cc3m-long', 'DreamLIP:dreamlip-vitb16:cc12m-long',
|
| 19 |
+
'DreamLIP:dreamlip-vitb16:yfcc15m-long', 'DreamLIP:dreamlip-vitb16:cc30m-long',
|
| 20 |
+
'FLAIR:flair-vitb16:cc3m-recap', 'FLAIR:flair-vitb16:cc12m-recap',
|
| 21 |
+
'FLAIR:flair-vitb16:yfcc15m-recap', 'FLAIR:flair-vitb16:cc30m-recap',
|
| 22 |
+
'CLIPS:CLIPS-Large-14-224:recap-datacomp1b', 'CLIPS:CLIPS-Large-14-336:recap-datacomp1b',
|
| 23 |
+
'CLIPS:CLIPS-Huge-14-224:recap-datacomp1b', 'LoTLIP:LoTLIP-ViT-B-32:lotlip100m',
|
| 24 |
+
'LoTLIP:LoTLIP-ViT-B-16:lotlip100m', 'Recap-CLIP:ViT-L-16-HTxt-Recap-CLIP:recap-datacomp1b',
|
| 25 |
+
'LongCLIP:longclip-vitb32:sharegpt4v-1m', 'LongCLIP:longclip-vitb16:sharegpt4v-1m',
|
| 26 |
+
'LongCLIP:longclip-vitl14:sharegpt4v-1m', 'LongCLIP:longclip-vitl14_336px:sharegpt4v-1m',
|
| 27 |
+
'Jina-CLIP:jina-clip-v1:jinaai', 'Jina-CLIP:jina-clip-v2:jinaai'
|
| 28 |
+
]
|
| 29 |
+
COMPOSITIONALITY = [
|
| 30 |
+
"OpenCLIP:ViT-B-32:openai", 'StructuredCLIP:NegCLIP-ViT-B-32:coco-ft',
|
| 31 |
+
'StructuredCLIP:CE-CLIP-ViT-B-32:coco-ft', 'StructuredCLIP:DAC-LLM-ViT-B-32:cc3m-ft',
|
| 32 |
+
'StructuredCLIP:DAC-SAM-ViT-B-32:cc3m-ft', 'FSC-CLIP:fsc-clip-ViT-B-32:laioncoco-ft',
|
| 33 |
+
'FSC-CLIP:fsc-clip-ViT-B-16:laioncoco-ft', 'FSC-CLIP:fsc-clip-ViT-L-14:laioncoco-ft'
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
DECODERS = [
|
| 37 |
+
'vqascore:instructblip-flant5-xl:none', 'vqascore:clip-flant5-xl:none',
|
| 38 |
+
'vqascore:llava-v1.5-7b:none', 'vqascore:sharegpt4v-7b:none',
|
| 39 |
+
'visualgptscore:instructblip-flant5-xl:none', 'visualgptscore:clip-flant5-xl:none',
|
| 40 |
+
'visualgptscore:llava-v1.5-7b:none', 'visualgptscore:sharegpt4v-7b:none'
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
MODEL_GROUPS = {
|
| 44 |
+
"short_captions": SHORT_CAPTIONS,
|
| 45 |
+
"long_captions": LONG_CAPTIONS,
|
| 46 |
+
"compositionality": COMPOSITIONALITY
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def format_df(df):
|
| 51 |
+
cols = []
|
| 52 |
+
for col in df.columns:
|
| 53 |
+
if col in ["family", "model", "tag"]:
|
| 54 |
+
continue
|
| 55 |
+
cols.append(col)
|
| 56 |
+
formatted_df = df.style.format({col: "{:.1f}" for col in cols})
|
| 57 |
+
return formatted_df
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def print_table_overall(df, model_names):
|
| 61 |
+
named_rows = df[["family", "model", "tag"]].apply(lambda row: ":".join(row), axis=1)
|
| 62 |
+
new_rows = []
|
| 63 |
+
for name in model_names:
|
| 64 |
+
new_rows.append(df[named_rows == name])
|
| 65 |
+
new_rows = format_df(pd.concat(new_rows, axis=0))
|
| 66 |
+
st.table(new_rows)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# Streamlit app
|
| 70 |
+
def main():
|
| 71 |
+
st.title("Interface")
|
| 72 |
+
df = pd.read_csv("data/250124/overall.csv")
|
| 73 |
+
for group, model_names in MODEL_GROUPS.items():
|
| 74 |
+
st.markdown(f"## {group} models")
|
| 75 |
+
print_table_overall(df, model_names)
|
| 76 |
+
|
| 77 |
+
df = pd.read_csv("data/250124/decoder_overall.csv")
|
| 78 |
+
st.markdown("## Decoder-based models")
|
| 79 |
+
print_table_overall(df, DECODERS)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
if __name__ == "__main__":
|
| 83 |
+
main()
|
pages/summary_acc_250124.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ast
|
| 2 |
+
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
import streamlit as st
|
| 6 |
+
|
| 7 |
+
st.set_page_config(layout="wide")
|
| 8 |
+
SHORT_CAPTIONS = [
|
| 9 |
+
'ALIGN:align-base:coyo700m', 'OpenCLIP:ViT-B-32:openai', 'OpenCLIP:ViT-B-16:openai',
|
| 10 |
+
'OpenCLIP:ViT-L-14:openai', 'OpenCLIP:ViT-L-14-336:openai',
|
| 11 |
+
'OpenCLIP:ViT-B-32:laion2b_s34b_b79k', 'OpenCLIP:ViT-B-16:laion2b_s34b_b88k',
|
| 12 |
+
'OpenCLIP:ViT-L-14:laion2b_s32b_b82k', 'OpenCLIP:ViT-g-14:laion2b_s34b_b88k',
|
| 13 |
+
'OpenCLIP:ViT-H-14:laion2b_s32b_b79k', 'OpenCLIP:roberta-ViT-B-32:laion2b_s12b_b32k',
|
| 14 |
+
'OpenCLIP:ViT-B-16-SigLIP:webli', 'OpenCLIP:ViT-B-16-SigLIP-384:webli',
|
| 15 |
+
'OpenCLIP:ViT-L-16-SigLIP-256:webli', 'OpenCLIP:ViT-L-16-SigLIP-384:webli',
|
| 16 |
+
'OpenCLIP:ViT-SO400M-14-SigLIP:webli', 'OpenCLIP:coca_ViT-B-32:laion2b_s13b_b90k',
|
| 17 |
+
'OpenCLIP:coca_ViT-L-14:laion2b_s13b_b90k'
|
| 18 |
+
]
|
| 19 |
+
LONG_CAPTIONS = [
|
| 20 |
+
'DreamLIP:dreamlip-vitb16:cc3m-long', 'DreamLIP:dreamlip-vitb16:cc12m-long',
|
| 21 |
+
'DreamLIP:dreamlip-vitb16:yfcc15m-long', 'DreamLIP:dreamlip-vitb16:cc30m-long',
|
| 22 |
+
"FLAIR:flair-vitb16:cc3m-recap", "FLAIR:flair-vitb16:cc12m-recap",
|
| 23 |
+
"FLAIR:flair-vitb16:yfcc15m-recap", "FLAIR:flair-vitb16:cc30m-recap",
|
| 24 |
+
'CLIPS:CLIPS-Large-14-224:recap-datacomp1b', 'CLIPS:CLIPS-Large-14-336:recap-datacomp1b',
|
| 25 |
+
'CLIPS:CLIPS-Huge-14-224:recap-datacomp1b', 'LoTLIP:LoTLIP-ViT-B-32:lotlip100m',
|
| 26 |
+
'LoTLIP:LoTLIP-ViT-B-16:lotlip100m', 'Recap-CLIP:ViT-L-16-HTxt-Recap-CLIP:recap-datacomp1b',
|
| 27 |
+
'LongCLIP:longclip-vitb32:sharegpt4v-1m', 'LongCLIP:longclip-vitb16:sharegpt4v-1m',
|
| 28 |
+
'LongCLIP:longclip-vitl14:sharegpt4v-1m', 'LongCLIP:longclip-vitl14_336px:sharegpt4v-1m',
|
| 29 |
+
'Jina-CLIP:jina-clip-v1:jinaai', 'Jina-CLIP:jina-clip-v2:jinaai'
|
| 30 |
+
]
|
| 31 |
+
COMPOSITIONALITY = [
|
| 32 |
+
'OpenCLIP:ViT-B-32:openai', 'StructuredCLIP:NegCLIP-ViT-B-32:coco-ft',
|
| 33 |
+
'StructuredCLIP:CE-CLIP-ViT-B-32:coco-ft', 'StructuredCLIP:DAC-LLM-ViT-B-32:cc3m-ft',
|
| 34 |
+
'StructuredCLIP:DAC-SAM-ViT-B-32:cc3m-ft', 'FSC-CLIP:fsc-clip-ViT-B-32:laioncoco-ft',
|
| 35 |
+
'FSC-CLIP:fsc-clip-ViT-B-16:laioncoco-ft', 'FSC-CLIP:fsc-clip-ViT-L-14:laioncoco-ft'
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
DECODERS = [
|
| 39 |
+
'vqascore:instructblip-flant5-xl:none', 'vqascore:clip-flant5-xl:none',
|
| 40 |
+
'vqascore:llava-v1.5-7b:none', 'vqascore:sharegpt4v-7b:none',
|
| 41 |
+
'visualgptscore:instructblip-flant5-xl:none', 'visualgptscore:clip-flant5-xl:none',
|
| 42 |
+
'visualgptscore:llava-v1.5-7b:none', 'visualgptscore:sharegpt4v-7b:none'
|
| 43 |
+
]
|
| 44 |
+
|
| 45 |
+
MODEL_GROUPS = {
|
| 46 |
+
"short_captions": SHORT_CAPTIONS,
|
| 47 |
+
"long_captions": LONG_CAPTIONS,
|
| 48 |
+
"compositionality": COMPOSITIONALITY
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def render_mi_table(df, level0_cols):
|
| 53 |
+
# HTML 스타일 정의
|
| 54 |
+
table_style = """
|
| 55 |
+
<style>
|
| 56 |
+
table {
|
| 57 |
+
width: 100%;
|
| 58 |
+
border-collapse: collapse;
|
| 59 |
+
}
|
| 60 |
+
th, td {
|
| 61 |
+
border: 1px solid black;
|
| 62 |
+
text-align: center;
|
| 63 |
+
padding: 8px;
|
| 64 |
+
}
|
| 65 |
+
th {
|
| 66 |
+
background-color: #262730;
|
| 67 |
+
}
|
| 68 |
+
</style>
|
| 69 |
+
"""
|
| 70 |
+
|
| 71 |
+
# 상위 헤더 (레벨 0)
|
| 72 |
+
header_html = "<tr>"
|
| 73 |
+
for col in level0_cols:
|
| 74 |
+
colspan = len(df.xs(col, axis=1, level=0).columns) if col else 1
|
| 75 |
+
header_html += f'<th colspan="{colspan}" style="text-align: center;">{col if col else ""}</th>'
|
| 76 |
+
header_html += "</tr>"
|
| 77 |
+
|
| 78 |
+
# 하위 헤더 (레벨 1)
|
| 79 |
+
sub_header_html = "<tr>"
|
| 80 |
+
for col in df.columns:
|
| 81 |
+
sub_header_html += f"<th style='text-align: center;'>{col[1] if len(col) > 1 else col[0]}</th>"
|
| 82 |
+
sub_header_html += "</tr>"
|
| 83 |
+
|
| 84 |
+
# 데이터 HTML 생성
|
| 85 |
+
def map_val(value):
|
| 86 |
+
try:
|
| 87 |
+
value = f"{float(value):.1f}"
|
| 88 |
+
except:
|
| 89 |
+
value = value
|
| 90 |
+
return value
|
| 91 |
+
|
| 92 |
+
rows_html = ""
|
| 93 |
+
for _, row in df.iterrows():
|
| 94 |
+
|
| 95 |
+
rows_html += "<tr>" + "".join(f"<td>{map_val(value)}</td>" for value in row) + "</tr>"
|
| 96 |
+
|
| 97 |
+
# 최종 HTML 합치기
|
| 98 |
+
table_html = f"""
|
| 99 |
+
{table_style}
|
| 100 |
+
<table>
|
| 101 |
+
{header_html}
|
| 102 |
+
{sub_header_html}
|
| 103 |
+
{rows_html}
|
| 104 |
+
</table>
|
| 105 |
+
"""
|
| 106 |
+
return table_html
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def format_df(df):
|
| 110 |
+
cols = []
|
| 111 |
+
for col in df.columns:
|
| 112 |
+
if col in [("Model", "family"), ("Model", "model"), ("Model", "tag")]:
|
| 113 |
+
continue
|
| 114 |
+
cols.append(col)
|
| 115 |
+
formatted_df = df.style.format({col: "{:.1f}" for col in cols})
|
| 116 |
+
return formatted_df
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def print_table(df):
|
| 120 |
+
level0_cols = []
|
| 121 |
+
for col in df.columns:
|
| 122 |
+
if col[0] not in level0_cols:
|
| 123 |
+
level0_cols.append(col[0])
|
| 124 |
+
st.markdown(render_mi_table(df, level0_cols), unsafe_allow_html=True)
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def print_table_summary(df, model_names):
|
| 128 |
+
columns = [("Model", "family"), ("Model", "model"), ("Model", "tag")]
|
| 129 |
+
named_rows = df[columns].apply(lambda row: ":".join(row), axis=1)
|
| 130 |
+
new_rows = []
|
| 131 |
+
for name in model_names:
|
| 132 |
+
new_rows.append(df[named_rows == name])
|
| 133 |
+
new_rows = pd.concat(new_rows, axis=0)
|
| 134 |
+
new_rows.columns = pd.MultiIndex.from_tuples(new_rows.columns)
|
| 135 |
+
print_table(new_rows)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
# Streamlit app
|
| 139 |
+
def main():
|
| 140 |
+
st.title("Interface")
|
| 141 |
+
df = pd.read_csv("data/250124/summary.csv")
|
| 142 |
+
df.columns = [ast.literal_eval(col) for col in df.columns]
|
| 143 |
+
for group, model_names in MODEL_GROUPS.items():
|
| 144 |
+
st.markdown(f"## {group} models")
|
| 145 |
+
if group == "short_captions":
|
| 146 |
+
pass
|
| 147 |
+
|
| 148 |
+
if group == "long_captions":
|
| 149 |
+
pass
|
| 150 |
+
if group == "compositionality":
|
| 151 |
+
pass
|
| 152 |
+
print_table_summary(df, model_names)
|
| 153 |
+
|
| 154 |
+
df = pd.read_csv("data/250124/decoder_summary.csv")
|
| 155 |
+
df.columns = [ast.literal_eval(col) for col in df.columns]
|
| 156 |
+
st.markdown("## Decoder-based models")
|
| 157 |
+
print_table_summary(df, DECODERS)
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
if __name__ == "__main__":
|
| 161 |
+
main()
|