Spaces:
Sleeping
Sleeping
Create new file
Browse files- apps/sdg.py +63 -0
apps/sdg.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import plotly.express as px
|
| 2 |
+
import streamlit as st
|
| 3 |
+
from sentence_transformers import SentenceTransformer
|
| 4 |
+
import umap.umap_ as umap
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
st.title("SDG Embedding Visualisation")
|
| 9 |
+
|
| 10 |
+
with st.spinner("👑 load language model (sentence transformer)"):
|
| 11 |
+
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
|
| 12 |
+
model = SentenceTransformer(model_name)
|
| 13 |
+
|
| 14 |
+
with st.spinner("👑 load sdg data"):
|
| 15 |
+
df_osdg = pd.read_csv('https://zenodo.org/record/5550238/files/osdg-community-dataset-v21-09-30.csv',sep='\t')
|
| 16 |
+
df_osdg = df_osdg[df_osdg['agreement']>.95]
|
| 17 |
+
df_osdg = df_osdg[df_osdg['labels_positive']>3]
|
| 18 |
+
#df_osdg = df_osdg[:1000]
|
| 19 |
+
|
| 20 |
+
_lab_dict = {0: 'no_cat',
|
| 21 |
+
1:'SDG 1 - No poverty',
|
| 22 |
+
2:'SDG 2 - Zero hunger',
|
| 23 |
+
3:'SDG 3 - Good health and well-being',
|
| 24 |
+
4:'SDG 4 - Quality education',
|
| 25 |
+
5:'SDG 5 - Gender equality',
|
| 26 |
+
6:'SDG 6 - Clean water and sanitation',
|
| 27 |
+
7:'SDG 7 - Affordable and clean energy',
|
| 28 |
+
8:'SDG 8 - Decent work and economic growth',
|
| 29 |
+
9:'SDG 9 - Industry, Innovation and Infrastructure',
|
| 30 |
+
10:'SDG 10 - Reduced inequality',
|
| 31 |
+
11:'SDG 11 - Sustainable cities and communities',
|
| 32 |
+
12:'SDG 12 - Responsible consumption and production',
|
| 33 |
+
13:'SDG 13 - Climate action',
|
| 34 |
+
14:'SDG 14 - Life below water',
|
| 35 |
+
15:'SDG 15 - Life on land',
|
| 36 |
+
16:'SDG 16 - Peace, justice and strong institutions',
|
| 37 |
+
17:'SDG 17 - Partnership for the goals',}
|
| 38 |
+
|
| 39 |
+
labels = [_lab_dict[lab] for lab in df_osdg['sdg'] ]
|
| 40 |
+
#keys = list(df_osdg['keys'])
|
| 41 |
+
docs = list(df_osdg['text'])
|
| 42 |
+
docs_embeddings = model.encode(docs)
|
| 43 |
+
|
| 44 |
+
with st.spinner("👑 prepare visualisation"):
|
| 45 |
+
n_neighbors = 15
|
| 46 |
+
n_components = 3
|
| 47 |
+
random_state =42
|
| 48 |
+
umap_model = (umap.UMAP(n_neighbors=n_neighbors,
|
| 49 |
+
n_components=n_components,
|
| 50 |
+
metric='cosine',
|
| 51 |
+
random_state=random_state)
|
| 52 |
+
.fit(docs_embeddings))
|
| 53 |
+
|
| 54 |
+
docs_umap = umap_model.transform(docs_embeddings)
|
| 55 |
+
|
| 56 |
+
with st.spinner("👑 create visualisation"):
|
| 57 |
+
fig = px.scatter_3d(
|
| 58 |
+
docs_umap, x=0, y=1, z=2,
|
| 59 |
+
color=labels,
|
| 60 |
+
opacity = .5)#, hover_data=[keys])
|
| 61 |
+
fig.update_scenes(xaxis_visible=False, yaxis_visible=False,zaxis_visible=False )
|
| 62 |
+
fig.update_traces(marker_size=4)
|
| 63 |
+
st.plotly_chart(fig)
|