r3gm commited on
Commit
699386e
·
verified ·
1 Parent(s): c176252

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -0
app.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import sqlite3
3
+ import gradio as gr
4
+ import unicodedata
5
+ import re
6
+ import ast
7
+ import requests
8
+
9
+ database_url = "https://raw.githubusercontent.com/R3gm/database_zip_files/main/archive/database.csv"
10
+ database_path = "database.csv"
11
+ description = "This app digs through Hugging Face’s public zip files hunting for RVC models… and occasionally brings back random stuff that has nothing to do with them. Don’t worry though—the best RVC matches are always shown first, because we like to pretend we’re organized."
12
+
13
+
14
+ def clean_file_url(val):
15
+ # If missing
16
+ if pd.isna(val):
17
+ return ""
18
+
19
+ # If it's already a list (e.g. from JSON/df directly)
20
+ if isinstance(val, list):
21
+ return ", ".join(map(str, val))
22
+
23
+ # If it's a string like '["a","b"]'
24
+ if isinstance(val, str) and val.strip().startswith("[") and val.strip().endswith("]"):
25
+ try:
26
+ parsed = ast.literal_eval(val)
27
+ if isinstance(parsed, list):
28
+ return ", ".join(map(str, parsed))
29
+ except Exception:
30
+ return val # fallback: leave as-is
31
+
32
+ # Otherwise, return as-is
33
+ return str(val)
34
+
35
+
36
+ def normalize(text: str) -> str:
37
+ if pd.isna(text):
38
+ return ""
39
+ # Convert to lowercase
40
+ text = text.lower()
41
+ # Remove accents
42
+ text = ''.join(
43
+ c for c in unicodedata.normalize('NFD', text)
44
+ if unicodedata.category(c) != 'Mn'
45
+ )
46
+ # Replace separators with space
47
+ return re.sub(r"[+()\-_/.]", " ", text)
48
+
49
+
50
+ def search_files(query: str):
51
+ if not query.strip():
52
+ return pd.DataFrame([{"Result": "Empty query"}])
53
+
54
+ keywords = normalize(query).split()
55
+
56
+ whole_conditions = " AND ".join([
57
+ f"(FILENAME_NORM LIKE '% {k} %' OR FILENAME_NORM LIKE '{k} %' OR FILENAME_NORM LIKE '% {k}' OR FILENAME_NORM = '{k}')"
58
+ for k in keywords
59
+ ])
60
+ partial_conditions = " AND ".join([f"FILENAME_NORM LIKE '%{k}%'" for k in keywords])
61
+
62
+ sql = f"""
63
+ SELECT *,
64
+ CASE WHEN {whole_conditions} THEN 1 ELSE 0 END AS whole_match
65
+ FROM files
66
+ WHERE {partial_conditions}
67
+ ORDER BY whole_match DESC, orig_index ASC;
68
+ """
69
+
70
+ df = pd.read_sql(sql, conn)
71
+
72
+ if df.empty:
73
+ return "<p>No matches found</p>"
74
+
75
+ df_subset = df.head(250) # limit 250 results
76
+
77
+ rows = []
78
+ for i, row in enumerate(df_subset.itertuples(index=False)):
79
+ filename = row.FILENAME
80
+ url = row.PARSED_URL
81
+ model_id = row.MODEL_ID
82
+
83
+ rows.append(f"""
84
+ <tr>
85
+ <td>{filename}</td>
86
+ <td>
87
+ <input type="text" value="{url}" id="copytext{i}" readonly
88
+ style="width:300px; padding:4px; border-radius:6px; border:1px solid #666;
89
+ background-color:var(--block-background-fill);
90
+ color:var(--body-text-color);" />
91
+ <button style="margin-left:5px; padding:4px 8px; border-radius:6px;
92
+ background-color:var(--button-primary-background-fill);
93
+ color:var(--button-primary-text-color);
94
+ border:none; cursor:pointer;"
95
+ onclick="navigator.clipboard.writeText(document.getElementById('copytext{i}').value)">
96
+ Copy
97
+ </button>
98
+ </td>
99
+ <td>{model_id}</td>
100
+ </tr>
101
+ """)
102
+
103
+ html = f"""
104
+ <table border=1 style="border-collapse:collapse; width:100%; text-align:left;">
105
+ <thead>
106
+ <tr>
107
+ <th style="padding:6px;">Filename</th>
108
+ <th style="padding:6px;">File URL</th>
109
+ <th style="padding:6px;">Repo ID</th>
110
+ </tr>
111
+ </thead>
112
+ <tbody>
113
+ {''.join(rows)}
114
+ </tbody>
115
+ </table>
116
+ """
117
+
118
+ return html
119
+
120
+
121
+ response = requests.get(database_url, stream=True)
122
+ with open(database_path, "wb") as f:
123
+ for chunk in response.iter_content(chunk_size=8192):
124
+ f.write(chunk)
125
+
126
+ df = pd.read_csv(database_path)
127
+
128
+ df["FILENAME_NORM"] = df["FILENAME"].apply(normalize)
129
+ df["PARSED_URL"] = df["PARSED_URL"].apply(clean_file_url)
130
+
131
+ df = df.reset_index(drop=True)
132
+ df["orig_index"] = df.index
133
+
134
+ # Connect to SQLite
135
+ conn = sqlite3.connect(":memory:", check_same_thread=False)
136
+ df.to_sql("files", conn, index=False, if_exists="replace")
137
+
138
+ with gr.Blocks() as demo:
139
+ gr.Markdown("## 🔍 RVC Voice Finder")
140
+ query_input = gr.Textbox(label="Search here", placeholder="Hatsune Miku")
141
+ button_query = gr.Button("Search")
142
+ output = gr.HTML(label="Search Results")
143
+ gr.Markdown(description)
144
+
145
+ query_input.submit(search_files, inputs=query_input, outputs=output)
146
+ button_query.click(search_files, inputs=query_input, outputs=output)
147
+
148
+ if __name__ == "__main__":
149
+ demo.launch(debug=True, show_error=True)