upload
Browse files- pages/summary_acc_250116.py +21 -4
pages/summary_acc_250116.py
CHANGED
|
@@ -27,10 +27,10 @@ LONG_CAPTIONS = [
|
|
| 27 |
'Jina-CLIP:jina-clip-v1:jinaai', 'Jina-CLIP:jina-clip-v2:jinaai'
|
| 28 |
]
|
| 29 |
COMPOSITIONALITY = [
|
| 30 |
-
'
|
| 31 |
-
'StructuredCLIP:
|
| 32 |
-
'
|
| 33 |
-
'FSC-CLIP:fsc-clip-ViT-L-14:laioncoco-ft'
|
| 34 |
]
|
| 35 |
|
| 36 |
MODEL_GROUPS = {
|
|
@@ -133,6 +133,23 @@ def main():
|
|
| 133 |
df.columns = [ast.literal_eval(col) for col in df.columns]
|
| 134 |
for group, model_names in MODEL_GROUPS.items():
|
| 135 |
st.markdown(f"## {group} models")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
get_model_key_from_df(df, model_names)
|
| 137 |
|
| 138 |
|
|
|
|
| 27 |
'Jina-CLIP:jina-clip-v1:jinaai', 'Jina-CLIP:jina-clip-v2:jinaai'
|
| 28 |
]
|
| 29 |
COMPOSITIONALITY = [
|
| 30 |
+
'OpenCLIP:ViT-B-32:openai', 'StructuredCLIP:NegCLIP-ViT-B-32:coco-ft',
|
| 31 |
+
'StructuredCLIP:CE-CLIP-ViT-B-32:coco-ft', 'StructuredCLIP:DAC-LLM-ViT-B-32:cc3m-ft',
|
| 32 |
+
'StructuredCLIP:DAC-SAM-ViT-B-32:cc3m-ft', 'FSC-CLIP:fsc-clip-ViT-B-32:laioncoco-ft',
|
| 33 |
+
'FSC-CLIP:fsc-clip-ViT-B-16:laioncoco-ft', 'FSC-CLIP:fsc-clip-ViT-L-14:laioncoco-ft'
|
| 34 |
]
|
| 35 |
|
| 36 |
MODEL_GROUPS = {
|
|
|
|
| 133 |
df.columns = [ast.literal_eval(col) for col in df.columns]
|
| 134 |
for group, model_names in MODEL_GROUPS.items():
|
| 135 |
st.markdown(f"## {group} models")
|
| 136 |
+
if group == "short_captions":
|
| 137 |
+
st.markdown(
|
| 138 |
+
"- **Length group**: 이미 short group부터, 80<(Num_tokens)<120. 중간에 문장 더해졌으면 60-70%정도 맞추고, 끝에 문장 더해졌으면 애초에 added sentence encoding 불가 -> accuracy 0%"
|
| 139 |
+
)
|
| 140 |
+
st.markdown(
|
| 141 |
+
"- **neg_target**: description의 끝 (=background)에 sentence 더해진 경우 accuracy 0%"
|
| 142 |
+
)
|
| 143 |
+
st.markdown("- **neg_type**: contradictory sentence가 모델 입장에서 맞추기 더 어려움")
|
| 144 |
+
|
| 145 |
+
if group == "long_captions":
|
| 146 |
+
st.markdown(
|
| 147 |
+
"- **Length group**: 모델의 context length에 성능 심하게 dependent함. DreamLIP: 77, CLIPS: 80, LoTLIP: 128, Recap-CLIP: 128, LongCLIP: 248, Jina-CLIP: 512"
|
| 148 |
+
)
|
| 149 |
+
st.markdown("- **neg_target**: 여전히 background level에서 sentence 더해진게 전반적으로 어려움")
|
| 150 |
+
st.markdown("- **neg_type**: contradictory sentence가 모델 입장에서 맞추기 더 어려움")
|
| 151 |
+
if group == "compositionality":
|
| 152 |
+
st.markdown("- context length 77의 한계. Hard Negative Caption으로 Fine-tuning 하면 일부 좋아짐")
|
| 153 |
get_model_key_from_df(df, model_names)
|
| 154 |
|
| 155 |
|