Spaces:
Sleeping
Sleeping
Erva Ulusoy
commited on
Commit
·
da1c3d0
1
Parent(s):
867722f
added fuzzy search feature
Browse files- ProtHGT_app.py +13 -18
- requirements.txt +2 -1
ProtHGT_app.py
CHANGED
|
@@ -1,10 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
import streamlit as st
|
| 3 |
-
import
|
| 4 |
-
import streamlit.components.v1 as components
|
| 5 |
-
import pandas as pd
|
| 6 |
-
|
| 7 |
-
|
| 8 |
|
| 9 |
# with st.spinner("Initializing the environment... This may take up to 10 minutes at the start of each session."):
|
| 10 |
# # Create a temporary placeholder for the message
|
|
@@ -92,26 +88,25 @@ with st.sidebar:
|
|
| 92 |
)
|
| 93 |
|
| 94 |
if selection_method == "Search proteins":
|
| 95 |
-
#
|
| 96 |
-
st.
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
selected_proteins = st.multiselect(
|
| 106 |
-
"Select
|
| 107 |
-
options=
|
| 108 |
placeholder="Start typing to search...",
|
| 109 |
max_selections=1000
|
| 110 |
)
|
| 111 |
|
| 112 |
if selected_proteins:
|
| 113 |
st.write(f"Selected {len(selected_proteins)} proteins")
|
| 114 |
-
|
| 115 |
else:
|
| 116 |
uploaded_file = st.file_uploader(
|
| 117 |
"Upload a text file with UniProt IDs (one per line, max 1000)*",
|
|
|
|
| 1 |
import os
|
| 2 |
import streamlit as st
|
| 3 |
+
from rapidfuzz import process
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
# with st.spinner("Initializing the environment... This may take up to 10 minutes at the start of each session."):
|
| 6 |
# # Create a temporary placeholder for the message
|
|
|
|
| 88 |
)
|
| 89 |
|
| 90 |
if selection_method == "Search proteins":
|
| 91 |
+
# User enters search term
|
| 92 |
+
search_query = st.text_input("Start typing a protein ID (at least 3 characters)", "")
|
| 93 |
+
|
| 94 |
+
# Apply fuzzy search only if query length is >= 3
|
| 95 |
+
filtered_proteins = []
|
| 96 |
+
if len(search_query) >= 3:
|
| 97 |
+
filtered_proteins = [match[0] for match in process.extract(search_query, available_proteins, limit=50)] # Show top 50 matches
|
| 98 |
+
|
| 99 |
+
# Multi-select for filtered results
|
|
|
|
| 100 |
selected_proteins = st.multiselect(
|
| 101 |
+
"Select proteins from search results",
|
| 102 |
+
options=filtered_proteins,
|
| 103 |
placeholder="Start typing to search...",
|
| 104 |
max_selections=1000
|
| 105 |
)
|
| 106 |
|
| 107 |
if selected_proteins:
|
| 108 |
st.write(f"Selected {len(selected_proteins)} proteins")
|
| 109 |
+
|
| 110 |
else:
|
| 111 |
uploaded_file = st.file_uploader(
|
| 112 |
"Upload a text file with UniProt IDs (one per line, max 1000)*",
|
requirements.txt
CHANGED
|
@@ -6,4 +6,5 @@ torch==1.12.1+cpu
|
|
| 6 |
torch_sparse==0.6.15
|
| 7 |
torch_scatter==2.1.0
|
| 8 |
torch_geometric==2.2.0
|
| 9 |
-
gdown
|
|
|
|
|
|
| 6 |
torch_sparse==0.6.15
|
| 7 |
torch_scatter==2.1.0
|
| 8 |
torch_geometric==2.2.0
|
| 9 |
+
gdown
|
| 10 |
+
rapidfuzz
|