Spaces:
Sleeping
Sleeping
Commit
·
4eaf9a0
1
Parent(s):
fc031cb
add search interface
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
#TODO: Quran results have numbers
|
| 2 |
-
|
| 3 |
import logging
|
|
|
|
|
|
|
| 4 |
logger = logging.getLogger(__name__)
|
| 5 |
logging.basicConfig(level=logging.INFO)
|
| 6 |
|
|
@@ -23,8 +23,8 @@ import re
|
|
| 23 |
import sqlite3
|
| 24 |
from collections import defaultdict
|
| 25 |
from typing import List, Tuple
|
| 26 |
-
import rich
|
| 27 |
-
from fuzzywuzzy import fuzz
|
| 28 |
import calendar
|
| 29 |
import translation_utils
|
| 30 |
import hashlib
|
|
@@ -33,9 +33,9 @@ translation_utils.create_translation_table()
|
|
| 33 |
|
| 34 |
# Create a translator instance *once* globally
|
| 35 |
translator = GoogleTranslator(source='auto', target='auto')
|
| 36 |
-
LANGUAGES_SUPPORTED = translator.get_supported_languages(as_dict=True)
|
| 37 |
|
| 38 |
-
LANGUAGE_CODE_MAP = LANGUAGES_SUPPORTED
|
| 39 |
|
| 40 |
# --- Constants ---
|
| 41 |
DATABASE_FILE = 'gematria.db'
|
|
@@ -49,7 +49,7 @@ def create_els_cache_table():
|
|
| 49 |
if not os.path.exists(ELS_CACHE_DB):
|
| 50 |
with sqlite3.connect(ELS_CACHE_DB) as conn:
|
| 51 |
conn.execute('''
|
| 52 |
-
CREATE TABLE els_cache (
|
| 53 |
query_hash TEXT PRIMARY KEY,
|
| 54 |
function_name TEXT,
|
| 55 |
args TEXT,
|
|
@@ -111,7 +111,6 @@ def get_query_hash(func, args, kwargs):
|
|
| 111 |
key = (func.__name__, args, kwargs)
|
| 112 |
return hashlib.sha256(json.dumps(key).encode()).hexdigest()
|
| 113 |
|
| 114 |
-
|
| 115 |
def cached_process_json_files(func, *args, **kwargs):
|
| 116 |
# Create a dictionary to store the parameters
|
| 117 |
params = {
|
|
@@ -154,15 +153,15 @@ def cached_process_json_files(func, *args, **kwargs):
|
|
| 154 |
try:
|
| 155 |
with sqlite3.connect(ELS_CACHE_DB, timeout=DATABASE_TIMEOUT) as conn:
|
| 156 |
cursor = conn.cursor()
|
| 157 |
-
cursor.execute(
|
| 158 |
-
|
|
|
|
| 159 |
conn.commit()
|
| 160 |
except sqlite3.Error as e:
|
| 161 |
logger.error(f"Database error caching results: {e}")
|
| 162 |
|
| 163 |
return results
|
| 164 |
|
| 165 |
-
|
| 166 |
# --- Helper Functions (from Network app.py) ---
|
| 167 |
def flatten_text(text: List) -> str:
|
| 168 |
if isinstance(text, list):
|
|
@@ -185,13 +184,13 @@ def get_most_frequent_phrase(results):
|
|
| 185 |
phrase_counts = defaultdict(int)
|
| 186 |
for words, book, chapter, verse, phrase_length, word_position in results:
|
| 187 |
phrase_counts[words] += 1
|
| 188 |
-
most_frequent_phrase = max(phrase_counts, key=phrase_counts.get) if phrase_counts else None
|
| 189 |
return most_frequent_phrase
|
| 190 |
|
| 191 |
# --- Functions from BOS app.py ---
|
| 192 |
-
def create_language_dropdown(label, default_value='English', show_label=True):
|
| 193 |
return gr.Dropdown(
|
| 194 |
-
choices=list(LANGUAGE_CODE_MAP.keys()),
|
| 195 |
label=label,
|
| 196 |
value=default_value,
|
| 197 |
show_label=show_label
|
|
@@ -210,22 +209,21 @@ def calculate_gematria_sum(text, date_words):
|
|
| 210 |
else:
|
| 211 |
return None
|
| 212 |
|
| 213 |
-
def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk,
|
|
|
|
| 214 |
if step == 0 or rounds_combination == "0,0":
|
| 215 |
return None
|
| 216 |
|
| 217 |
results = {}
|
| 218 |
length = 0
|
| 219 |
|
| 220 |
-
selected_language_long = tlang
|
| 221 |
-
# Get the short code.
|
| 222 |
tlang = LANGUAGES_SUPPORTED.get(selected_language_long)
|
| 223 |
-
if tlang is None:
|
| 224 |
tlang = "en"
|
| 225 |
logger.warning(
|
| 226 |
f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
|
| 227 |
|
| 228 |
-
# Cache Update: Pass parameters individually
|
| 229 |
if include_torah:
|
| 230 |
logger.debug(
|
| 231 |
f"Arguments for Torah: {(1, 39, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)}")
|
|
@@ -235,35 +233,37 @@ def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_b
|
|
| 235 |
results["Torah"] = []
|
| 236 |
|
| 237 |
if include_bible:
|
| 238 |
-
results["Bible"] = cached_process_json_files(bible.process_json_files, 40, 66, step, rounds_combination,
|
|
|
|
| 239 |
tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
|
| 240 |
else:
|
| 241 |
results["Bible"] = []
|
| 242 |
|
| 243 |
if include_quran:
|
| 244 |
-
results["Quran"] = cached_process_json_files(quran.process_json_files, 1, 114, step, rounds_combination,
|
|
|
|
| 245 |
tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
|
| 246 |
else:
|
| 247 |
results["Quran"] = []
|
| 248 |
|
| 249 |
if include_hindu:
|
| 250 |
results["Rig Veda"] = cached_process_json_files(
|
| 251 |
-
hindu.process_json_files, 1, 10, step, rounds_combination, length, tlang, False, strip_in_braces,
|
|
|
|
| 252 |
else:
|
| 253 |
results["Rig Veda"] = []
|
| 254 |
|
| 255 |
if include_tripitaka:
|
| 256 |
results["Tripitaka"] = cached_process_json_files(
|
| 257 |
-
tripitaka.process_json_files, 1, 52, step, rounds_combination, length, tlang, strip_spaces,
|
|
|
|
| 258 |
else:
|
| 259 |
results["Tripitaka"] = []
|
| 260 |
|
| 261 |
return results
|
| 262 |
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
def add_24h_projection(results_dict): #Now takes a dictionary of results
|
| 266 |
-
for book_name, results in results_dict.items(): # Iterate per book
|
| 267 |
num_results = len(results)
|
| 268 |
if num_results > 0:
|
| 269 |
time_interval = timedelta(minutes=24 * 60 / num_results)
|
|
@@ -275,12 +275,11 @@ def add_24h_projection(results_dict): #Now takes a dictionary of results
|
|
| 275 |
current_time = next_time
|
| 276 |
return results_dict
|
| 277 |
|
| 278 |
-
|
| 279 |
def add_monthly_projection(results_dict, selected_date):
|
| 280 |
if selected_date is None:
|
| 281 |
-
return results_dict
|
| 282 |
|
| 283 |
-
for book_name, results in results_dict.items():
|
| 284 |
num_results = len(results)
|
| 285 |
if num_results > 0:
|
| 286 |
days_in_month = calendar.monthrange(selected_date.year, selected_date.month)[1]
|
|
@@ -289,23 +288,21 @@ def add_monthly_projection(results_dict, selected_date):
|
|
| 289 |
start_datetime = datetime(selected_date.year, selected_date.month, 1)
|
| 290 |
current_datetime = start_datetime
|
| 291 |
|
| 292 |
-
|
| 293 |
for i in range(num_results):
|
| 294 |
next_datetime = current_datetime + timedelta(seconds=seconds_interval)
|
| 295 |
-
current_date = current_datetime.date()
|
| 296 |
next_date = next_datetime.date()
|
| 297 |
date_range_str = f"{current_date.strftime('%h %d')} - {next_date.strftime('%h %d')}"
|
| 298 |
results[i]['Monthly Projection'] = date_range_str
|
| 299 |
-
current_datetime = next_datetime
|
| 300 |
-
current_date = next_datetime.date()
|
| 301 |
return results_dict
|
| 302 |
|
| 303 |
-
|
| 304 |
-
def add_yearly_projection(results_dict, selected_date): #Correct name, handle dictionary input
|
| 305 |
if selected_date is None:
|
| 306 |
-
return results_dict
|
| 307 |
|
| 308 |
-
for book_name, results in results_dict.items():
|
| 309 |
num_results = len(results)
|
| 310 |
if num_results > 0:
|
| 311 |
days_in_year = 366 if calendar.isleap(selected_date.year) else 365
|
|
@@ -314,80 +311,293 @@ def add_yearly_projection(results_dict, selected_date): #Correct name, handle di
|
|
| 314 |
start_datetime = datetime(selected_date.year, 1, 1)
|
| 315 |
current_datetime = start_datetime
|
| 316 |
|
| 317 |
-
|
| 318 |
for i in range(num_results):
|
| 319 |
next_datetime = current_datetime + timedelta(seconds=seconds_interval)
|
| 320 |
-
current_date = current_datetime.date()
|
| 321 |
next_date = next_datetime.date()
|
| 322 |
date_range_str = f"{current_date.strftime('%b %d')} - {next_date.strftime('%b %d')}"
|
| 323 |
results[i]['Yearly Projection'] = date_range_str
|
| 324 |
-
current_datetime = next_datetime
|
| 325 |
|
| 326 |
return results_dict
|
| 327 |
|
| 328 |
-
|
| 329 |
def sort_results(results):
|
| 330 |
def parse_time(time_str):
|
| 331 |
try:
|
| 332 |
hours, minutes = map(int, time_str.split(':'))
|
| 333 |
-
return hours * 60 + minutes
|
| 334 |
except ValueError:
|
| 335 |
-
return 24 * 60
|
| 336 |
|
| 337 |
return sorted(results, key=lambda x: (
|
| 338 |
-
parse_time(x.get('24h Projection', '23:59').split('-')[0]),
|
| 339 |
-
parse_time(x.get('24h Projection', '23:59').split('-')[1])
|
| 340 |
))
|
| 341 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
# --- Main Gradio App ---
|
| 343 |
with gr.Blocks() as app:
|
| 344 |
-
with gr.
|
| 345 |
-
with gr.
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
|
| 389 |
# --- Event Handlers ---
|
| 390 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
def update_date_words(selected_date, date_language_input, use_day, use_month, use_year):
|
| 392 |
if selected_date is None:
|
| 393 |
return ""
|
|
@@ -408,10 +618,8 @@ with gr.Blocks() as app:
|
|
| 408 |
else: # Return empty string if no date components are selected
|
| 409 |
return ""
|
| 410 |
|
| 411 |
-
|
| 412 |
date_in_words = date_to_words(date_obj)
|
| 413 |
|
| 414 |
-
|
| 415 |
translator = GoogleTranslator(source='auto', target=date_language_input)
|
| 416 |
translated_date_words = translator.translate(date_in_words)
|
| 417 |
return custom_normalize(translated_date_words)
|
|
@@ -431,51 +639,50 @@ with gr.Blocks() as app:
|
|
| 431 |
new_step = math.ceil(float_step * 2)
|
| 432 |
return new_step, float_step * 2
|
| 433 |
|
| 434 |
-
|
| 435 |
def find_closest_phrase(target_phrase, phrases):
|
| 436 |
best_match = None
|
| 437 |
best_score = 0
|
| 438 |
|
| 439 |
-
logging.debug(f"Target phrase for similarity search: {target_phrase}")
|
| 440 |
|
| 441 |
for phrase, _, _, _, _, _ in phrases:
|
| 442 |
word_length_diff = abs(len(target_phrase.split()) - len(phrase.split()))
|
| 443 |
similarity_score = fuzz.ratio(target_phrase, phrase)
|
| 444 |
combined_score = similarity_score - word_length_diff
|
| 445 |
|
| 446 |
-
logging.debug(f"Comparing with phrase: {phrase}")
|
| 447 |
logging.debug(
|
| 448 |
-
f"Word Length Difference: {word_length_diff}, Similarity Score: {similarity_score}, Combined Score: {combined_score}")
|
| 449 |
|
| 450 |
if combined_score > best_score:
|
| 451 |
best_score = combined_score
|
| 452 |
best_match = phrase
|
| 453 |
|
| 454 |
-
logging.debug(f"Closest phrase found: {best_match} with score: {best_score}")
|
| 455 |
return best_match
|
| 456 |
|
| 457 |
-
def perform_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk,
|
| 458 |
-
|
|
|
|
| 459 |
els_results = perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces,
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
|
| 464 |
-
# --- Network Search Integration ---
|
| 465 |
most_frequent_phrases = {}
|
| 466 |
-
combined_and_sorted_results = []
|
| 467 |
|
| 468 |
for book_name, book_results in els_results.items():
|
| 469 |
-
if book_results:
|
| 470 |
-
most_frequent_phrases[book_name] = ""
|
| 471 |
|
| 472 |
for result in book_results:
|
| 473 |
try:
|
| 474 |
-
gematria_sum = calculate_gematria(result['result_text'])
|
| 475 |
max_words = len(result['result_text'].split())
|
| 476 |
matching_phrases = search_gematria_in_db(gematria_sum, max_words)
|
| 477 |
max_words_limit = 20
|
| 478 |
-
while not matching_phrases and max_words < max_words_limit:
|
| 479 |
max_words += 1
|
| 480 |
matching_phrases = search_gematria_in_db(gematria_sum, max_words)
|
| 481 |
|
|
@@ -484,13 +691,13 @@ with gr.Blocks() as app:
|
|
| 484 |
most_frequent_phrases[book_name] = most_frequent_phrase
|
| 485 |
else:
|
| 486 |
closest_phrase = find_closest_phrase(result['result_text'],
|
| 487 |
-
|
| 488 |
most_frequent_phrases[
|
| 489 |
-
book_name] = closest_phrase or ""
|
| 490 |
|
| 491 |
result['Most Frequent Phrase'] = most_frequent_phrases[book_name]
|
| 492 |
if 'book' in result:
|
| 493 |
-
if isinstance(result['book'], int):
|
| 494 |
result['book'] = f"{book_name} {result['book']}."
|
| 495 |
combined_and_sorted_results.append(result)
|
| 496 |
|
|
@@ -498,49 +705,46 @@ with gr.Blocks() as app:
|
|
| 498 |
print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.")
|
| 499 |
continue
|
| 500 |
|
| 501 |
-
# --- Batch Translation ---
|
| 502 |
selected_language_long = tlang
|
| 503 |
tlang_short = LANGUAGES_SUPPORTED.get(selected_language_long)
|
| 504 |
if tlang_short is None:
|
| 505 |
tlang_short = "en"
|
| 506 |
logger.warning(f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
|
| 507 |
|
| 508 |
-
# Prepare lists for batch translation, including source language
|
| 509 |
phrases_to_translate = []
|
| 510 |
-
phrases_source_langs = []
|
| 511 |
results_to_translate = []
|
| 512 |
-
results_source_langs = []
|
| 513 |
for result in combined_and_sorted_results:
|
| 514 |
phrases_to_translate.append(result.get('Most Frequent Phrase', ''))
|
| 515 |
-
# Always use 'iw' as the source language for "Most Frequent Phrase"
|
| 516 |
phrases_source_langs.append("he")
|
| 517 |
results_to_translate.append(result.get('result_text', ''))
|
| 518 |
results_source_langs.append(result.get("source_language", "auto"))
|
| 519 |
|
| 520 |
-
translated_phrases = translation_utils.batch_translate(phrases_to_translate, tlang_short,
|
| 521 |
-
|
| 522 |
-
|
|
|
|
| 523 |
|
| 524 |
for i, result in enumerate(combined_and_sorted_results):
|
| 525 |
result['translated_text'] = translated_result_texts.get(results_to_translate[i], None)
|
| 526 |
result['Translated Most Frequent Phrase'] = translated_phrases.get(phrases_to_translate[i], None)
|
| 527 |
|
| 528 |
-
|
| 529 |
-
updated_els_results =
|
| 530 |
-
updated_els_results = add_monthly_projection(updated_els_results, selected_date) # Call correct functions with correct params
|
| 531 |
updated_els_results = add_yearly_projection(updated_els_results, selected_date)
|
| 532 |
|
| 533 |
combined_and_sorted_results = []
|
| 534 |
-
for book_results in updated_els_results.values():
|
| 535 |
combined_and_sorted_results.extend(book_results)
|
| 536 |
-
combined_and_sorted_results = sort_results(combined_and_sorted_results)
|
| 537 |
|
| 538 |
df = pd.DataFrame(combined_and_sorted_results)
|
| 539 |
df.index = range(1, len(df) + 1)
|
| 540 |
df.reset_index(inplace=True)
|
| 541 |
df.rename(columns={'index': 'Result Number'}, inplace=True)
|
| 542 |
|
| 543 |
-
for i, result in enumerate(combined_and_sorted_results):
|
| 544 |
result['Result Number'] = i + 1
|
| 545 |
|
| 546 |
search_config = {
|
|
@@ -561,40 +765,44 @@ with gr.Blocks() as app:
|
|
| 561 |
|
| 562 |
output_data = {
|
| 563 |
"search_configuration": search_config,
|
| 564 |
-
"results": combined_and_sorted_results
|
| 565 |
}
|
| 566 |
|
| 567 |
json_data = output_data
|
| 568 |
|
| 569 |
-
# --- Return results ---
|
| 570 |
combined_most_frequent = "\n".join(
|
| 571 |
-
f"{book}: {phrase}" for book, phrase in most_frequent_phrases.items())
|
| 572 |
return df, combined_most_frequent, json_data
|
| 573 |
|
| 574 |
-
|
| 575 |
-
|
| 576 |
# --- Event Triggers ---
|
| 577 |
round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
|
| 578 |
round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
|
| 579 |
|
| 580 |
-
selected_date.change(update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year],
|
| 581 |
-
|
|
|
|
|
|
|
|
|
|
| 582 |
|
| 583 |
-
gematria_text.change(update_journal_sum, inputs=[gematria_text, date_words_output],
|
| 584 |
-
|
|
|
|
|
|
|
| 585 |
|
| 586 |
half_step_btn.click(update_step_half, inputs=[float_step], outputs=[step, float_step])
|
| 587 |
double_step_btn.click(update_step_double, inputs=[float_step], outputs=[step, float_step])
|
| 588 |
|
| 589 |
translate_btn.click(
|
| 590 |
perform_search,
|
| 591 |
-
inputs=[step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah_chk,
|
|
|
|
|
|
|
| 592 |
outputs=[markdown_output, most_frequent_phrase_output, json_output]
|
| 593 |
)
|
| 594 |
|
| 595 |
app.load(
|
| 596 |
update_date_words,
|
| 597 |
-
inputs=[selected_date, date_language_input, use_day, use_month, use_year],
|
| 598 |
outputs=[date_words_output]
|
| 599 |
)
|
| 600 |
|
|
@@ -615,14 +823,13 @@ with gr.Blocks() as app:
|
|
| 615 |
)
|
| 616 |
|
| 617 |
def checkbox_behavior(use_day_value, use_month_value):
|
| 618 |
-
if use_day_value:
|
| 619 |
return True, True
|
| 620 |
|
| 621 |
-
return use_month_value, True
|
| 622 |
|
| 623 |
use_day.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year])
|
| 624 |
-
use_month.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year])
|
| 625 |
-
|
| 626 |
|
| 627 |
if __name__ == "__main__":
|
| 628 |
app.launch(share=False)
|
|
|
|
|
|
|
|
|
|
| 1 |
import logging
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
logger = logging.getLogger(__name__)
|
| 5 |
logging.basicConfig(level=logging.INFO)
|
| 6 |
|
|
|
|
| 23 |
import sqlite3
|
| 24 |
from collections import defaultdict
|
| 25 |
from typing import List, Tuple
|
| 26 |
+
# import rich # Removed rich
|
| 27 |
+
# from fuzzywuzzy import fuzz # Removed fuzzywuzzy
|
| 28 |
import calendar
|
| 29 |
import translation_utils
|
| 30 |
import hashlib
|
|
|
|
| 33 |
|
| 34 |
# Create a translator instance *once* globally
|
| 35 |
translator = GoogleTranslator(source='auto', target='auto')
|
| 36 |
+
LANGUAGES_SUPPORTED = translator.get_supported_languages(as_dict=True)
|
| 37 |
|
| 38 |
+
LANGUAGE_CODE_MAP = LANGUAGES_SUPPORTED # Use deep_translator's mapping directly
|
| 39 |
|
| 40 |
# --- Constants ---
|
| 41 |
DATABASE_FILE = 'gematria.db'
|
|
|
|
| 49 |
if not os.path.exists(ELS_CACHE_DB):
|
| 50 |
with sqlite3.connect(ELS_CACHE_DB) as conn:
|
| 51 |
conn.execute('''
|
| 52 |
+
CREATE TABLE IF NOT EXISTS els_cache (
|
| 53 |
query_hash TEXT PRIMARY KEY,
|
| 54 |
function_name TEXT,
|
| 55 |
args TEXT,
|
|
|
|
| 111 |
key = (func.__name__, args, kwargs)
|
| 112 |
return hashlib.sha256(json.dumps(key).encode()).hexdigest()
|
| 113 |
|
|
|
|
| 114 |
def cached_process_json_files(func, *args, **kwargs):
|
| 115 |
# Create a dictionary to store the parameters
|
| 116 |
params = {
|
|
|
|
| 153 |
try:
|
| 154 |
with sqlite3.connect(ELS_CACHE_DB, timeout=DATABASE_TIMEOUT) as conn:
|
| 155 |
cursor = conn.cursor()
|
| 156 |
+
cursor.execute(
|
| 157 |
+
"INSERT INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)",
|
| 158 |
+
(query_hash, params["function"], params_json, json.dumps({}), json.dumps(results)))
|
| 159 |
conn.commit()
|
| 160 |
except sqlite3.Error as e:
|
| 161 |
logger.error(f"Database error caching results: {e}")
|
| 162 |
|
| 163 |
return results
|
| 164 |
|
|
|
|
| 165 |
# --- Helper Functions (from Network app.py) ---
|
| 166 |
def flatten_text(text: List) -> str:
|
| 167 |
if isinstance(text, list):
|
|
|
|
| 184 |
phrase_counts = defaultdict(int)
|
| 185 |
for words, book, chapter, verse, phrase_length, word_position in results:
|
| 186 |
phrase_counts[words] += 1
|
| 187 |
+
most_frequent_phrase = max(phrase_counts, key=phrase_counts.get) if phrase_counts else None
|
| 188 |
return most_frequent_phrase
|
| 189 |
|
| 190 |
# --- Functions from BOS app.py ---
|
| 191 |
+
def create_language_dropdown(label, default_value='English', show_label=True):
|
| 192 |
return gr.Dropdown(
|
| 193 |
+
choices=list(LANGUAGE_CODE_MAP.keys()),
|
| 194 |
label=label,
|
| 195 |
value=default_value,
|
| 196 |
show_label=show_label
|
|
|
|
| 209 |
else:
|
| 210 |
return None
|
| 211 |
|
| 212 |
+
def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk,
|
| 213 |
+
include_torah, include_bible, include_quran, include_hindu, include_tripitaka):
|
| 214 |
if step == 0 or rounds_combination == "0,0":
|
| 215 |
return None
|
| 216 |
|
| 217 |
results = {}
|
| 218 |
length = 0
|
| 219 |
|
| 220 |
+
selected_language_long = tlang
|
|
|
|
| 221 |
tlang = LANGUAGES_SUPPORTED.get(selected_language_long)
|
| 222 |
+
if tlang is None:
|
| 223 |
tlang = "en"
|
| 224 |
logger.warning(
|
| 225 |
f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
|
| 226 |
|
|
|
|
| 227 |
if include_torah:
|
| 228 |
logger.debug(
|
| 229 |
f"Arguments for Torah: {(1, 39, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)}")
|
|
|
|
| 233 |
results["Torah"] = []
|
| 234 |
|
| 235 |
if include_bible:
|
| 236 |
+
results["Bible"] = cached_process_json_files(bible.process_json_files, 40, 66, step, rounds_combination,
|
| 237 |
+
length,
|
| 238 |
tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
|
| 239 |
else:
|
| 240 |
results["Bible"] = []
|
| 241 |
|
| 242 |
if include_quran:
|
| 243 |
+
results["Quran"] = cached_process_json_files(quran.process_json_files, 1, 114, step, rounds_combination,
|
| 244 |
+
length,
|
| 245 |
tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
|
| 246 |
else:
|
| 247 |
results["Quran"] = []
|
| 248 |
|
| 249 |
if include_hindu:
|
| 250 |
results["Rig Veda"] = cached_process_json_files(
|
| 251 |
+
hindu.process_json_files, 1, 10, step, rounds_combination, length, tlang, False, strip_in_braces,
|
| 252 |
+
strip_diacritics_chk)
|
| 253 |
else:
|
| 254 |
results["Rig Veda"] = []
|
| 255 |
|
| 256 |
if include_tripitaka:
|
| 257 |
results["Tripitaka"] = cached_process_json_files(
|
| 258 |
+
tripitaka.process_json_files, 1, 52, step, rounds_combination, length, tlang, strip_spaces,
|
| 259 |
+
strip_in_braces, strip_diacritics_chk)
|
| 260 |
else:
|
| 261 |
results["Tripitaka"] = []
|
| 262 |
|
| 263 |
return results
|
| 264 |
|
| 265 |
+
def add_24h_projection(results_dict):
|
| 266 |
+
for book_name, results in results_dict.items():
|
|
|
|
|
|
|
| 267 |
num_results = len(results)
|
| 268 |
if num_results > 0:
|
| 269 |
time_interval = timedelta(minutes=24 * 60 / num_results)
|
|
|
|
| 275 |
current_time = next_time
|
| 276 |
return results_dict
|
| 277 |
|
|
|
|
| 278 |
def add_monthly_projection(results_dict, selected_date):
|
| 279 |
if selected_date is None:
|
| 280 |
+
return results_dict
|
| 281 |
|
| 282 |
+
for book_name, results in results_dict.items():
|
| 283 |
num_results = len(results)
|
| 284 |
if num_results > 0:
|
| 285 |
days_in_month = calendar.monthrange(selected_date.year, selected_date.month)[1]
|
|
|
|
| 288 |
start_datetime = datetime(selected_date.year, selected_date.month, 1)
|
| 289 |
current_datetime = start_datetime
|
| 290 |
|
|
|
|
| 291 |
for i in range(num_results):
|
| 292 |
next_datetime = current_datetime + timedelta(seconds=seconds_interval)
|
| 293 |
+
current_date = current_datetime.date()
|
| 294 |
next_date = next_datetime.date()
|
| 295 |
date_range_str = f"{current_date.strftime('%h %d')} - {next_date.strftime('%h %d')}"
|
| 296 |
results[i]['Monthly Projection'] = date_range_str
|
| 297 |
+
current_datetime = next_datetime
|
| 298 |
+
current_date = next_datetime.date()
|
| 299 |
return results_dict
|
| 300 |
|
| 301 |
+
def add_yearly_projection(results_dict, selected_date):
|
|
|
|
| 302 |
if selected_date is None:
|
| 303 |
+
return results_dict
|
| 304 |
|
| 305 |
+
for book_name, results in results_dict.items():
|
| 306 |
num_results = len(results)
|
| 307 |
if num_results > 0:
|
| 308 |
days_in_year = 366 if calendar.isleap(selected_date.year) else 365
|
|
|
|
| 311 |
start_datetime = datetime(selected_date.year, 1, 1)
|
| 312 |
current_datetime = start_datetime
|
| 313 |
|
|
|
|
| 314 |
for i in range(num_results):
|
| 315 |
next_datetime = current_datetime + timedelta(seconds=seconds_interval)
|
| 316 |
+
current_date = current_datetime.date()
|
| 317 |
next_date = next_datetime.date()
|
| 318 |
date_range_str = f"{current_date.strftime('%b %d')} - {next_date.strftime('%b %d')}"
|
| 319 |
results[i]['Yearly Projection'] = date_range_str
|
| 320 |
+
current_datetime = next_datetime
|
| 321 |
|
| 322 |
return results_dict
|
| 323 |
|
|
|
|
| 324 |
def sort_results(results):
|
| 325 |
def parse_time(time_str):
|
| 326 |
try:
|
| 327 |
hours, minutes = map(int, time_str.split(':'))
|
| 328 |
+
return hours * 60 + minutes
|
| 329 |
except ValueError:
|
| 330 |
+
return 24 * 60
|
| 331 |
|
| 332 |
return sorted(results, key=lambda x: (
|
| 333 |
+
parse_time(x.get('24h Projection', '23:59').split('-')[0]),
|
| 334 |
+
parse_time(x.get('24h Projection', '23:59').split('-')[1])
|
| 335 |
))
|
| 336 |
|
| 337 |
+
def extract_rounds_combinations():
|
| 338 |
+
"""Extracts unique rounds combinations from the database."""
|
| 339 |
+
combinations = set()
|
| 340 |
+
try:
|
| 341 |
+
with sqlite3.connect(ELS_CACHE_DB) as conn:
|
| 342 |
+
cursor = conn.cursor()
|
| 343 |
+
cursor.execute("SELECT args FROM els_cache")
|
| 344 |
+
all_args = cursor.fetchall()
|
| 345 |
+
for args_tuple in all_args:
|
| 346 |
+
args_str = args_tuple[0]
|
| 347 |
+
try:
|
| 348 |
+
args_json = json.loads(args_str)
|
| 349 |
+
if 'rounds' in args_json:
|
| 350 |
+
combinations.add(args_json['rounds'])
|
| 351 |
+
except json.JSONDecodeError:
|
| 352 |
+
logger.error(f"Could not decode JSON for args: {args_str}")
|
| 353 |
+
except sqlite3.Error as e:
|
| 354 |
+
logger.error(f"Database error: {e}")
|
| 355 |
+
logger.info(f"Found unique rounds combinations: {combinations}")
|
| 356 |
+
return ["All"] + sorted(list(combinations))
|
| 357 |
+
|
| 358 |
+
def update_rounds_dropdown():
|
| 359 |
+
new_choices = extract_rounds_combinations()
|
| 360 |
+
return new_choices
|
| 361 |
+
|
| 362 |
# --- Main Gradio App ---
|
| 363 |
with gr.Blocks() as app:
|
| 364 |
+
with gr.Tab("ELS Search"):
|
| 365 |
+
with gr.Column():
|
| 366 |
+
with gr.Row():
|
| 367 |
+
tlang = create_language_dropdown("Target Language for Result Translation", default_value='english')
|
| 368 |
+
selected_date = Calendar(type="datetime", label="Date to investigate (optional)",
|
| 369 |
+
info="Pick a date from the calendar")
|
| 370 |
+
use_day = gr.Checkbox(label="Use Day", info="Check to include day in search", value=True)
|
| 371 |
+
use_month = gr.Checkbox(label="Use Month", info="Check to include month in search", value=True)
|
| 372 |
+
use_year = gr.Checkbox(label="Use Year", info="Check to include year in search", value=True)
|
| 373 |
+
date_language_input = create_language_dropdown(
|
| 374 |
+
"Language of the person/topic (optional) (Date Word Language)", default_value='english')
|
| 375 |
+
with gr.Row():
|
| 376 |
+
gematria_text = gr.Textbox(label="Name and/or Topic (required)",
|
| 377 |
+
value="Hans Albert Einstein Mileva Marity-Einstein")
|
| 378 |
+
date_words_output = gr.Textbox(label="Date in Words Translated (optional)")
|
| 379 |
+
gematria_result = gr.Number(label="Journal Sum")
|
| 380 |
+
# with gr.Row():
|
| 381 |
+
|
| 382 |
+
with gr.Row():
|
| 383 |
+
step = gr.Number(label="Jump Width (Steps) for ELS")
|
| 384 |
+
float_step = gr.Number(visible=False, value=1)
|
| 385 |
+
half_step_btn = gr.Button("Steps / 2")
|
| 386 |
+
double_step_btn = gr.Button("Steps * 2")
|
| 387 |
+
|
| 388 |
+
with gr.Column():
|
| 389 |
+
round_x = gr.Number(label="Round (1)", value=1)
|
| 390 |
+
round_y = gr.Number(label="Round (2)", value=-1)
|
| 391 |
+
|
| 392 |
+
rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")
|
| 393 |
+
|
| 394 |
+
with gr.Row():
|
| 395 |
+
include_torah_chk = gr.Checkbox(label="Include Torah", value=True)
|
| 396 |
+
include_bible_chk = gr.Checkbox(label="Include Bible", value=True)
|
| 397 |
+
include_quran_chk = gr.Checkbox(label="Include Quran", value=True)
|
| 398 |
+
include_hindu_chk = gr.Checkbox(label="Include Rigveda", value=False)
|
| 399 |
+
include_tripitaka_chk = gr.Checkbox(label="Include Tripitaka", value=False)
|
| 400 |
+
|
| 401 |
+
strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
|
| 402 |
+
strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
|
| 403 |
+
strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
|
| 404 |
+
|
| 405 |
+
translate_btn = gr.Button("Search with ELS")
|
| 406 |
+
|
| 407 |
+
# --- Output Components ---
|
| 408 |
+
markdown_output = gr.Dataframe(label="ELS Results")
|
| 409 |
+
most_frequent_phrase_output = gr.Textbox(label="Most Frequent Phrase in Network Search")
|
| 410 |
+
json_output = gr.JSON(label="JSON Output")
|
| 411 |
+
|
| 412 |
+
with gr.Tab("Cache Database Search"):
|
| 413 |
+
with gr.Column():
|
| 414 |
+
with gr.Row():
|
| 415 |
+
main_book_filter = gr.Dropdown(label="Filter by Main Book",
|
| 416 |
+
choices=["All", "Torah", "Bible", "Quran", "Rig Veda", "Tripitaka"],
|
| 417 |
+
value="All")
|
| 418 |
+
# Keine choices hier, nur das Label und den Initialwert
|
| 419 |
+
rounds_filter = gr.Dropdown(label="Filter by Rounds", value="All")
|
| 420 |
+
|
| 421 |
+
with gr.Row():
|
| 422 |
+
search_type = gr.Radio(label="Search by",
|
| 423 |
+
choices=["Text in result_text", "Gematria Sum in results"],
|
| 424 |
+
value="Text in result_text")
|
| 425 |
+
with gr.Row():
|
| 426 |
+
search_term = gr.Textbox(label="Search Term", visible=True)
|
| 427 |
+
gematria_sum_search = gr.Number(label="Gematria Sum", visible=False)
|
| 428 |
+
|
| 429 |
+
with gr.Row():
|
| 430 |
+
search_db_btn = gr.Button("Search Cache Database")
|
| 431 |
+
with gr.Row():
|
| 432 |
+
cache_search_results = gr.JSON(label="Cache Search Results")
|
| 433 |
+
|
| 434 |
+
def update_search_components(search_type):
|
| 435 |
+
if search_type == "Text in result_text":
|
| 436 |
+
return gr.Textbox.update(visible=True), gr.Number.update(visible=False)
|
| 437 |
+
else:
|
| 438 |
+
return gr.Textbox.update(visible=False), gr.Number.update(visible=True)
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
def search_cache_database(search_type, search_term, gematria_sum_search, main_book_filter, rounds_filter):
|
| 442 |
+
"""Searches the cache database based on the selected filters and search term."""
|
| 443 |
+
results = []
|
| 444 |
+
if main_book_filter == "All" and rounds_filter == "All" and not search_term and not gematria_sum_search:
|
| 445 |
+
return results
|
| 446 |
+
|
| 447 |
+
try:
|
| 448 |
+
with sqlite3.connect(ELS_CACHE_DB) as conn:
|
| 449 |
+
cursor = conn.cursor()
|
| 450 |
+
|
| 451 |
+
if search_type == "Text in result_text":
|
| 452 |
+
# Optimization: If only main_book_filter is selected, don't perform a full search
|
| 453 |
+
if main_book_filter != "All" and rounds_filter == "All" and not search_term:
|
| 454 |
+
return results
|
| 455 |
+
|
| 456 |
+
cursor.execute("SELECT * FROM els_cache")
|
| 457 |
+
all_results = cursor.fetchall()
|
| 458 |
+
columns = [desc[0] for desc in cursor.description]
|
| 459 |
+
|
| 460 |
+
for row in all_results:
|
| 461 |
+
row_dict = dict(zip(columns, row))
|
| 462 |
+
args_dict = json.loads(row_dict['args'])
|
| 463 |
+
function_name = row_dict['function_name']
|
| 464 |
+
|
| 465 |
+
# Function name filtering
|
| 466 |
+
include_result = False
|
| 467 |
+
if main_book_filter == "All":
|
| 468 |
+
include_result = True
|
| 469 |
+
elif main_book_filter == "Torah" and function_name == "torah.process_json_files":
|
| 470 |
+
include_result = True
|
| 471 |
+
elif main_book_filter == "Bible" and function_name == "bible.process_json_files":
|
| 472 |
+
include_result = True
|
| 473 |
+
elif main_book_filter == "Quran" and function_name == "quran.process_json_files":
|
| 474 |
+
include_result = True
|
| 475 |
+
elif main_book_filter == "Rig Veda" and function_name == "hindu.process_json_files":
|
| 476 |
+
include_result = True
|
| 477 |
+
elif main_book_filter == "Tripitaka" and function_name == "tripitaka.process_json_files":
|
| 478 |
+
include_result = True
|
| 479 |
+
|
| 480 |
+
if not include_result:
|
| 481 |
+
continue
|
| 482 |
+
|
| 483 |
+
# Rounds filtering
|
| 484 |
+
if rounds_filter != "All" and args_dict.get('rounds') != rounds_filter:
|
| 485 |
+
continue
|
| 486 |
+
|
| 487 |
+
try:
|
| 488 |
+
results_json = json.loads(row_dict['results'])
|
| 489 |
+
for result_entry in results_json:
|
| 490 |
+
if 'result_text' in result_entry and search_term in result_entry['result_text']:
|
| 491 |
+
entry = {
|
| 492 |
+
'function_name': function_name,
|
| 493 |
+
'step': args_dict.get('step'),
|
| 494 |
+
'rounds': args_dict.get('rounds'),
|
| 495 |
+
'result': result_entry
|
| 496 |
+
}
|
| 497 |
+
results.append(entry)
|
| 498 |
+
except (json.JSONDecodeError, TypeError) as e:
|
| 499 |
+
logger.error(f"Error processing row: {e}")
|
| 500 |
+
continue
|
| 501 |
+
|
| 502 |
+
elif search_type == "Gematria Sum in results":
|
| 503 |
+
|
| 504 |
+
# Optimization: If only main_book_filter is selected, don't perform a full search
|
| 505 |
+
if main_book_filter != "All" and rounds_filter == "All" and not gematria_sum_search:
|
| 506 |
+
return results
|
| 507 |
+
|
| 508 |
+
if not isinstance(gematria_sum_search, (int, float)):
|
| 509 |
+
return results
|
| 510 |
+
|
| 511 |
+
cursor.execute("SELECT * FROM els_cache")
|
| 512 |
+
all_results = cursor.fetchall()
|
| 513 |
+
columns = [desc[0] for desc in cursor.description]
|
| 514 |
+
|
| 515 |
+
for row in all_results:
|
| 516 |
+
row_dict = dict(zip(columns, row))
|
| 517 |
+
args_dict = json.loads(row_dict['args'])
|
| 518 |
+
function_name = row_dict['function_name']
|
| 519 |
+
|
| 520 |
+
# Function name filtering
|
| 521 |
+
include_result = False
|
| 522 |
+
if main_book_filter == "All":
|
| 523 |
+
include_result = True
|
| 524 |
+
elif main_book_filter == "Torah" and function_name == "torah.process_json_files":
|
| 525 |
+
include_result = True
|
| 526 |
+
elif main_book_filter == "Bible" and function_name == "bible.process_json_files":
|
| 527 |
+
include_result = True
|
| 528 |
+
elif main_book_filter == "Quran" and function_name == "quran.process_json_files":
|
| 529 |
+
include_result = True
|
| 530 |
+
elif main_book_filter == "Rig Veda" and function_name == "hindu.process_json_files":
|
| 531 |
+
include_result = True
|
| 532 |
+
elif main_book_filter == "Tripitaka" and function_name == "tripitaka.process_json_files":
|
| 533 |
+
include_result = True
|
| 534 |
+
|
| 535 |
+
if not include_result:
|
| 536 |
+
continue
|
| 537 |
+
|
| 538 |
+
# Rounds filtering
|
| 539 |
+
if rounds_filter != "All" and args_dict.get('rounds') != rounds_filter:
|
| 540 |
+
continue
|
| 541 |
+
|
| 542 |
+
try:
|
| 543 |
+
results_json = json.loads(row_dict['results'])
|
| 544 |
+
for result_entry in results_json:
|
| 545 |
+
if 'result_sum' in result_entry and result_entry[
|
| 546 |
+
'result_sum'] == gematria_sum_search:
|
| 547 |
+
entry = {
|
| 548 |
+
'function_name': function_name,
|
| 549 |
+
'step': args_dict.get('step'),
|
| 550 |
+
'rounds': args_dict.get('rounds'),
|
| 551 |
+
'result': result_entry
|
| 552 |
+
}
|
| 553 |
+
results.append(entry)
|
| 554 |
+
except (json.JSONDecodeError, TypeError) as e:
|
| 555 |
+
logger.error(f"Error processing row: {e}")
|
| 556 |
+
continue
|
| 557 |
+
|
| 558 |
+
# Sort results by gematria sum
|
| 559 |
+
results.sort(
|
| 560 |
+
key=lambda x: x['result']['result_sum'] if 'result' in x and 'result_sum' in x['result'] else 0)
|
| 561 |
+
return results
|
| 562 |
+
|
| 563 |
+
except sqlite3.Error as e:
|
| 564 |
+
logger.error(f"Database error: {e}")
|
| 565 |
+
return []
|
| 566 |
+
|
| 567 |
+
def update_search_components(search_type):
|
| 568 |
+
"""Updates the visibility of the search term and gematria sum input fields."""
|
| 569 |
+
if search_type == "Text in result_text":
|
| 570 |
+
return {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}
|
| 571 |
+
else:
|
| 572 |
+
return {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"}
|
| 573 |
|
| 574 |
# --- Event Handlers ---
|
| 575 |
|
| 576 |
+
search_type.change(
|
| 577 |
+
fn=update_search_components,
|
| 578 |
+
inputs=[search_type],
|
| 579 |
+
outputs=[search_term, gematria_sum_search]
|
| 580 |
+
)
|
| 581 |
+
|
| 582 |
+
search_db_btn.click(
|
| 583 |
+
fn=search_cache_database,
|
| 584 |
+
inputs=[search_type, search_term, gematria_sum_search, main_book_filter, rounds_filter],
|
| 585 |
+
outputs=cache_search_results
|
| 586 |
+
)
|
| 587 |
+
|
| 588 |
+
|
| 589 |
+
def update_rounds_choices():
|
| 590 |
+
return gr.update(choices=extract_rounds_combinations()) # gr.update, nicht gr.Dropdown.update
|
| 591 |
+
|
| 592 |
+
app.load(fn=update_rounds_choices, inputs=None, outputs=rounds_filter)
|
| 593 |
+
|
| 594 |
+
main_book_filter.change(
|
| 595 |
+
fn=update_rounds_choices,
|
| 596 |
+
inputs=None, # No input needed here
|
| 597 |
+
outputs=rounds_filter
|
| 598 |
+
)
|
| 599 |
+
|
| 600 |
+
# rest of the handlers
|
| 601 |
def update_date_words(selected_date, date_language_input, use_day, use_month, use_year):
|
| 602 |
if selected_date is None:
|
| 603 |
return ""
|
|
|
|
| 618 |
else: # Return empty string if no date components are selected
|
| 619 |
return ""
|
| 620 |
|
|
|
|
| 621 |
date_in_words = date_to_words(date_obj)
|
| 622 |
|
|
|
|
| 623 |
translator = GoogleTranslator(source='auto', target=date_language_input)
|
| 624 |
translated_date_words = translator.translate(date_in_words)
|
| 625 |
return custom_normalize(translated_date_words)
|
|
|
|
| 639 |
new_step = math.ceil(float_step * 2)
|
| 640 |
return new_step, float_step * 2
|
| 641 |
|
|
|
|
| 642 |
def find_closest_phrase(target_phrase, phrases):
|
| 643 |
best_match = None
|
| 644 |
best_score = 0
|
| 645 |
|
| 646 |
+
logging.debug(f"Target phrase for similarity search: {target_phrase}")
|
| 647 |
|
| 648 |
for phrase, _, _, _, _, _ in phrases:
|
| 649 |
word_length_diff = abs(len(target_phrase.split()) - len(phrase.split()))
|
| 650 |
similarity_score = fuzz.ratio(target_phrase, phrase)
|
| 651 |
combined_score = similarity_score - word_length_diff
|
| 652 |
|
| 653 |
+
logging.debug(f"Comparing with phrase: {phrase}")
|
| 654 |
logging.debug(
|
| 655 |
+
f"Word Length Difference: {word_length_diff}, Similarity Score: {similarity_score}, Combined Score: {combined_score}")
|
| 656 |
|
| 657 |
if combined_score > best_score:
|
| 658 |
best_score = combined_score
|
| 659 |
best_match = phrase
|
| 660 |
|
| 661 |
+
logging.debug(f"Closest phrase found: {best_match} with score: {best_score}")
|
| 662 |
return best_match
|
| 663 |
|
| 664 |
+
def perform_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk,
|
| 665 |
+
include_torah, include_bible, include_quran, include_hindu, include_tripitaka, gematria_text,
|
| 666 |
+
date_words_output, selected_date):
|
| 667 |
els_results = perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces,
|
| 668 |
+
strip_diacritics_chk, include_torah, include_bible, include_quran,
|
| 669 |
+
include_hindu,
|
| 670 |
+
include_tripitaka)
|
| 671 |
|
|
|
|
| 672 |
most_frequent_phrases = {}
|
| 673 |
+
combined_and_sorted_results = []
|
| 674 |
|
| 675 |
for book_name, book_results in els_results.items():
|
| 676 |
+
if book_results:
|
| 677 |
+
most_frequent_phrases[book_name] = ""
|
| 678 |
|
| 679 |
for result in book_results:
|
| 680 |
try:
|
| 681 |
+
gematria_sum = calculate_gematria(result['result_text'])
|
| 682 |
max_words = len(result['result_text'].split())
|
| 683 |
matching_phrases = search_gematria_in_db(gematria_sum, max_words)
|
| 684 |
max_words_limit = 20
|
| 685 |
+
while not matching_phrases and max_words < max_words_limit:
|
| 686 |
max_words += 1
|
| 687 |
matching_phrases = search_gematria_in_db(gematria_sum, max_words)
|
| 688 |
|
|
|
|
| 691 |
most_frequent_phrases[book_name] = most_frequent_phrase
|
| 692 |
else:
|
| 693 |
closest_phrase = find_closest_phrase(result['result_text'],
|
| 694 |
+
search_gematria_in_db(gematria_sum, max_words_limit))
|
| 695 |
most_frequent_phrases[
|
| 696 |
+
book_name] = closest_phrase or ""
|
| 697 |
|
| 698 |
result['Most Frequent Phrase'] = most_frequent_phrases[book_name]
|
| 699 |
if 'book' in result:
|
| 700 |
+
if isinstance(result['book'], int):
|
| 701 |
result['book'] = f"{book_name} {result['book']}."
|
| 702 |
combined_and_sorted_results.append(result)
|
| 703 |
|
|
|
|
| 705 |
print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.")
|
| 706 |
continue
|
| 707 |
|
|
|
|
| 708 |
selected_language_long = tlang
|
| 709 |
tlang_short = LANGUAGES_SUPPORTED.get(selected_language_long)
|
| 710 |
if tlang_short is None:
|
| 711 |
tlang_short = "en"
|
| 712 |
logger.warning(f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
|
| 713 |
|
|
|
|
| 714 |
phrases_to_translate = []
|
| 715 |
+
phrases_source_langs = []
|
| 716 |
results_to_translate = []
|
| 717 |
+
results_source_langs = []
|
| 718 |
for result in combined_and_sorted_results:
|
| 719 |
phrases_to_translate.append(result.get('Most Frequent Phrase', ''))
|
|
|
|
| 720 |
phrases_source_langs.append("he")
|
| 721 |
results_to_translate.append(result.get('result_text', ''))
|
| 722 |
results_source_langs.append(result.get("source_language", "auto"))
|
| 723 |
|
| 724 |
+
translated_phrases = translation_utils.batch_translate(phrases_to_translate, tlang_short,
|
| 725 |
+
phrases_source_langs)
|
| 726 |
+
translated_result_texts = translation_utils.batch_translate(results_to_translate, tlang_short,
|
| 727 |
+
results_source_langs)
|
| 728 |
|
| 729 |
for i, result in enumerate(combined_and_sorted_results):
|
| 730 |
result['translated_text'] = translated_result_texts.get(results_to_translate[i], None)
|
| 731 |
result['Translated Most Frequent Phrase'] = translated_phrases.get(phrases_to_translate[i], None)
|
| 732 |
|
| 733 |
+
updated_els_results = add_24h_projection(els_results)
|
| 734 |
+
updated_els_results = add_monthly_projection(updated_els_results, selected_date)
|
|
|
|
| 735 |
updated_els_results = add_yearly_projection(updated_els_results, selected_date)
|
| 736 |
|
| 737 |
combined_and_sorted_results = []
|
| 738 |
+
for book_results in updated_els_results.values():
|
| 739 |
combined_and_sorted_results.extend(book_results)
|
| 740 |
+
combined_and_sorted_results = sort_results(combined_and_sorted_results)
|
| 741 |
|
| 742 |
df = pd.DataFrame(combined_and_sorted_results)
|
| 743 |
df.index = range(1, len(df) + 1)
|
| 744 |
df.reset_index(inplace=True)
|
| 745 |
df.rename(columns={'index': 'Result Number'}, inplace=True)
|
| 746 |
|
| 747 |
+
for i, result in enumerate(combined_and_sorted_results):
|
| 748 |
result['Result Number'] = i + 1
|
| 749 |
|
| 750 |
search_config = {
|
|
|
|
| 765 |
|
| 766 |
output_data = {
|
| 767 |
"search_configuration": search_config,
|
| 768 |
+
"results": combined_and_sorted_results
|
| 769 |
}
|
| 770 |
|
| 771 |
json_data = output_data
|
| 772 |
|
|
|
|
| 773 |
combined_most_frequent = "\n".join(
|
| 774 |
+
f"{book}: {phrase}" for book, phrase in most_frequent_phrases.items())
|
| 775 |
return df, combined_most_frequent, json_data
|
| 776 |
|
|
|
|
|
|
|
| 777 |
# --- Event Triggers ---
|
| 778 |
round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
|
| 779 |
round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
|
| 780 |
|
| 781 |
+
selected_date.change(update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year],
|
| 782 |
+
outputs=[date_words_output])
|
| 783 |
+
date_language_input.change(update_date_words,
|
| 784 |
+
inputs=[selected_date, date_language_input, use_day, use_month, use_year],
|
| 785 |
+
outputs=[date_words_output])
|
| 786 |
|
| 787 |
+
gematria_text.change(update_journal_sum, inputs=[gematria_text, date_words_output],
|
| 788 |
+
outputs=[gematria_result, step, float_step])
|
| 789 |
+
date_words_output.change(update_journal_sum, inputs=[gematria_text, date_words_output],
|
| 790 |
+
outputs=[gematria_result, step, float_step])
|
| 791 |
|
| 792 |
half_step_btn.click(update_step_half, inputs=[float_step], outputs=[step, float_step])
|
| 793 |
double_step_btn.click(update_step_double, inputs=[float_step], outputs=[step, float_step])
|
| 794 |
|
| 795 |
translate_btn.click(
|
| 796 |
perform_search,
|
| 797 |
+
inputs=[step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah_chk,
|
| 798 |
+
include_bible_chk, include_quran_chk, include_hindu_chk, include_tripitaka_chk, gematria_text,
|
| 799 |
+
date_words_output, selected_date],
|
| 800 |
outputs=[markdown_output, most_frequent_phrase_output, json_output]
|
| 801 |
)
|
| 802 |
|
| 803 |
app.load(
|
| 804 |
update_date_words,
|
| 805 |
+
inputs=[selected_date, date_language_input, use_day, use_month, use_year],
|
| 806 |
outputs=[date_words_output]
|
| 807 |
)
|
| 808 |
|
|
|
|
| 823 |
)
|
| 824 |
|
| 825 |
def checkbox_behavior(use_day_value, use_month_value):
|
| 826 |
+
if use_day_value:
|
| 827 |
return True, True
|
| 828 |
|
| 829 |
+
return use_month_value, True
|
| 830 |
|
| 831 |
use_day.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year])
|
| 832 |
+
use_month.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year])
|
|
|
|
| 833 |
|
| 834 |
if __name__ == "__main__":
|
| 835 |
app.launch(share=False)
|