Spaces:
Sleeping
Sleeping
| import plotly.express as px | |
| import streamlit as st | |
| from sentence_transformers import SentenceTransformer | |
| from huggingface_hub import hf_hub_url, cached_download | |
| import umap.umap_ as umap | |
| import pandas as pd | |
| import os | |
| import joblib | |
| import pkg_resources | |
| def init_models(): | |
| model_name = 'sentence-transformers/all-MiniLM-L6-v2' | |
| model = SentenceTransformer(model_name) | |
| REPO_ID = "peter2000/umap_embed_3d_all-MiniLM-L6-v2" | |
| FILENAME = "umap_embed_3d_all-MiniLM-L6-v2.sav" | |
| umap_model= joblib.load(cached_download(hf_hub_url(REPO_ID, FILENAME))) | |
| return model, umap_model | |
| def app(): | |
| with st.container(): | |
| st.markdown("<h1 style='text-align: center; \ | |
| color: black;'> Text Embedder</h1>", | |
| unsafe_allow_html=True) | |
| st.write(' ') | |
| st.write(' ') | |
| with st.expander("ℹ️ - About this app", expanded=True): | |
| st.write( | |
| """ | |
| Information cartography - Get your word/phrase/sentence/paragraph embedded and visualized. | |
| The (English) sentence-transformers model "all-MiniLM-L6-v2" maps sentences & paragraphs to a 384-dimensional dense vector space This is normally used for tasks like clustering or semantic search, but in this case, we use it to place your text to a 3D map. Before plotting, the dimension needs to be reduced to three so we can actually plot it, but preserve as much information as possible. For this, we use a technology called umap. The sentence transformer is context-sensitive and works best with whole sentences, to account for that we extend your text with "The book is about <text>". | |
| Simply put in your text and press EMBED, your examples will add up. You can use the category for different coloring. | |
| """) | |
| st.markdown("") | |
| word_to_embed_list = st.session_state['embed_list'] | |
| cat_list = st.session_state['cat_list'] | |
| with st.container(): | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| word_to_embed= st.text_input("Please enter your text here and we will embed it for you.", value="",) | |
| with col2: | |
| cat= st.selectbox('Category', ('1', '2', '3', '4', '5')) | |
| if st.button("Embed"): | |
| with st.spinner("👑 Embedding your input"): | |
| model, umap_model = init_models() | |
| word_to_embed_list.append(word_to_embed) | |
| st.session_state['embed_list'] = word_to_embed_list | |
| cat_list .append(cat) | |
| st.session_state['cat_list '] = cat_list | |
| phrase_to_embed = ["The book is about "+ wte for wte in word_to_embed_list] | |
| examples_embeddings = model.encode(phrase_to_embed) | |
| examples_umap = umap_model.transform(examples_embeddings) | |
| #st.write(len(examples_umap)) | |
| with st.spinner("👑 create visualisation"): | |
| fig = px.scatter_3d( | |
| examples_umap[1:] , x=0, y=1, z=2, | |
| color=cat_list[1:] , | |
| opacity = .7, hover_data=[word_to_embed_list[1:]]) | |
| fig.update_scenes(xaxis_visible=False, yaxis_visible=False,zaxis_visible=False ) | |
| fig.update_traces(marker_size=4) | |
| st.plotly_chart(fig) |