Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import logging | |
| import sqlite3 | |
| import re | |
| from typing import Dict, List, Any | |
| from gematria import calculate_gematria, strip_diacritics | |
| from deep_translator import GoogleTranslator | |
| logger = logging.getLogger(__name__) | |
| def process_bible_files(start: int, end: int) -> Dict[int, Dict[str, Any]]: | |
| """ | |
| Processes Bible JSON files and returns a dictionary mapping book IDs to their data. | |
| Args: | |
| start: The starting book ID (inclusive). | |
| end: The ending book ID (inclusive). | |
| Returns: | |
| A dictionary where keys are book IDs and values are dictionaries | |
| containing 'title' and 'text' fields. | |
| """ | |
| base_path = "texts/bible" | |
| results = {} | |
| for i in range(start, end + 1): | |
| file_name = f"{base_path}/{i}.json" | |
| try: | |
| with open(file_name, 'r', encoding='utf-8') as file: | |
| data = json.load(file) | |
| if data: | |
| # Extract title and verses | |
| title = data.get("title", "No title") | |
| text = data.get("text", []) | |
| # Store book ID as key and book data as value | |
| results[i] = {"title": title, "text": text} | |
| except FileNotFoundError: | |
| logger.warning(f"File {file_name} not found.") | |
| except json.JSONDecodeError as e: | |
| logger.warning(f"File {file_name} could not be read as JSON: {e}") | |
| except Exception as e: | |
| logger.warning(f"Error processing {file_name}: {e}") | |
| return results | |
| def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip_spaces=True, strip_in_braces=True, | |
| strip_diacritics_value=True, translate=False): | |
| """ | |
| Processes Bible JSON files and performs ELS (Equidistant Letter Sequence) search. | |
| Parameters: | |
| - start (int): Start number of the Bible book. | |
| - end (int): End number of the Bible book. | |
| - step (int): Step size for character selection. | |
| - rounds (str): Comma-separated list of round numbers (can include negative values). | |
| - length (int): Maximum length of the result text. | |
| - tlang (str): Target language for translation. | |
| - strip_spaces (bool): Whether to remove spaces from the text. | |
| - strip_in_braces (bool): Whether to remove text within braces. | |
| - strip_diacritics_value (bool): Whether to remove diacritics from the text. | |
| - translate (bool): Whether to translate the result text. | |
| Returns: | |
| - list: A list of dictionaries containing processed data or error messages. | |
| """ | |
| logger.debug(f"Processing Bible files {start}-{end} with step {step}, rounds {rounds}") | |
| results = [] | |
| try: | |
| bible_data = process_bible_files(start, end) | |
| if not bible_data: | |
| return [{"error": f"No Bible data found for books {start}-{end}"}] | |
| rounds_list = [int(r.strip()) for r in rounds.split(",")] | |
| for book_id, book_info in bible_data.items(): | |
| book_title = book_info.get("title", "Unknown") | |
| chapters = book_info.get("text", []) | |
| if not chapters: | |
| results.append({"error": f"No text found for book {book_title} (ID: {book_id})"}) | |
| continue | |
| # Flatten the text | |
| flattened_text = "" | |
| for chapter_idx, chapter in enumerate(chapters, 1): | |
| for verse_idx, verse in enumerate(chapter, 1): | |
| if verse: | |
| flattened_text += verse + " " | |
| # Clean the text based on parameters | |
| processed_text = flattened_text.lower() | |
| if strip_in_braces: | |
| # Remove content within brackets or parentheses | |
| processed_text = re.sub(r'\[.*?\]|\(.*?\)', '', processed_text) | |
| if strip_diacritics_value: | |
| processed_text = strip_diacritics(processed_text) | |
| if strip_spaces: | |
| processed_text = processed_text.replace(" ", "") | |
| # Perform ELS search for each round | |
| for round_num in rounds_list: | |
| if round_num == 0: | |
| continue | |
| direction = 1 if round_num > 0 else -1 | |
| abs_step = abs(round_num * step) | |
| if direction > 0: | |
| # Forward ELS | |
| result_chars = [processed_text[i] for i in range(0, len(processed_text), abs_step) if i < len(processed_text)] | |
| else: | |
| # Backward ELS | |
| result_chars = [processed_text[i] for i in range(len(processed_text) - 1, -1, -abs_step)] | |
| result_text = "".join(result_chars) | |
| # Truncate result if length is specified | |
| if length > 0 and len(result_text) > length: | |
| result_text = result_text[:length] | |
| # Translate if requested | |
| translated_text = "" | |
| if result_text and translate and tlang != "en": | |
| try: | |
| translator = GoogleTranslator(source='auto', target=tlang) | |
| translated_text = translator.translate(result_text) | |
| except Exception as e: | |
| logger.warning(f"Translation error: {e}") | |
| translated_text = f"Translation error: {str(e)}" | |
| # Add result to results list | |
| results.append({ | |
| "book_id": book_id, | |
| "book_title": book_title, | |
| "step": step, | |
| "round": round_num, | |
| "result_text": result_text, | |
| "translated_text": translated_text, | |
| "gematria": calculate_gematria(result_text) | |
| }) | |
| except Exception as e: | |
| logger.error(f"Error processing Bible files: {e}", exc_info=True) | |
| results.append({"error": f"Error processing Bible files: {str(e)}"}) | |
| return results if results else None | |
| # This function is not needed anymore as we're using get_first_els_result_matthew from app.py | |
| # Keeping the definition for compatibility but marking it as deprecated | |
| def get_first_els_result_john(gematria_sum, tlang="en"): | |
| """ | |
| DEPRECATED: Use get_first_els_result_matthew instead. | |
| Gets the first ELS result from John's Gospel (book 43) using the specified step size. | |
| """ | |
| logger.warning("get_first_els_result_john is deprecated, use get_first_els_result_matthew instead") | |
| from app import cached_process_json_files, get_first_els_result_matthew | |
| return get_first_els_result_matthew(gematria_sum, tlang) | |
| def create_bible_display_iframe(book_title, book_id, chapter=None, verse=None): | |
| """Creates an iframe HTML string for BibleGateway.""" | |
| from urllib.parse import quote_plus | |
| logger.debug(f"Creating Bible iframe for {book_title}, book_id: {book_id}, chapter: {chapter}, verse: {verse}") | |
| encoded_book_title = quote_plus(book_title) | |
| chapter_verse = "" | |
| if chapter is not None: | |
| chapter_verse = f"+{chapter}" | |
| if verse is not None: | |
| chapter_verse += f":{verse}" | |
| url = f"https://www.biblegateway.com/passage/?search={encoded_book_title}{chapter_verse}&version=CJB" | |
| iframe = f'<iframe src="{url}" width="800" height="600"></iframe>' | |
| return iframe | |
| def initialize_bible_database(db_file: str = 'bible.db', max_phrase_length: int = 1): | |
| """ | |
| Initializes the Bible database with verse texts. | |
| This function processes all Bible JSON files and adds their gematria values to the database. | |
| Args: | |
| db_file: The SQLite database file to use | |
| max_phrase_length: Maximum phrase length to process | |
| """ | |
| import re | |
| from gematria import calculate_gematria, strip_diacritics | |
| from tqdm import tqdm # Import tqdm for progress bars | |
| logger.info(f"Initializing Bible database: {db_file}") | |
| # Create the database if it doesn't exist | |
| with sqlite3.connect(db_file) as conn: | |
| cursor = conn.cursor() | |
| # Create results table | |
| cursor.execute(''' | |
| CREATE TABLE IF NOT EXISTS results ( | |
| gematria_sum INTEGER, | |
| words TEXT, | |
| translation TEXT, | |
| book TEXT, | |
| chapter INTEGER, | |
| verse INTEGER, | |
| phrase_length INTEGER, | |
| word_position TEXT, | |
| PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position) | |
| ) | |
| ''') | |
| cursor.execute(''' | |
| CREATE INDEX IF NOT EXISTS idx_results_gematria | |
| ON results (gematria_sum) | |
| ''') | |
| # Create processed_books table to track processing | |
| cursor.execute(''' | |
| CREATE TABLE IF NOT EXISTS processed_books ( | |
| book TEXT PRIMARY KEY, | |
| max_phrase_length INTEGER | |
| ) | |
| ''') | |
| conn.commit() | |
| # Process Bible files from books 40-66 (New Testament) | |
| book_start = 40 | |
| book_end = 66 | |
| logger.info(f"Processing Bible books {book_start}-{book_end}") | |
| # Global counter for word position tracking | |
| total_word_count = 0 | |
| book_names = {} | |
| with sqlite3.connect(db_file) as conn: | |
| cursor = conn.cursor() | |
| # Process each book | |
| for book_id in tqdm(range(book_start, book_end + 1), desc="Processing Bible Books"): | |
| # Load book data | |
| book_data = process_bible_files(book_id, book_id) | |
| if book_id in book_data: | |
| book_info = book_data[book_id] | |
| book_title = book_info['title'] | |
| book_names[book_id] = book_title | |
| # Check if this book has already been processed | |
| cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_title,)) | |
| result = cursor.fetchone() | |
| if result and result[0] >= max_phrase_length: | |
| logger.info(f"Skipping book {book_title}: Already processed with max_phrase_length {result[0]}") | |
| continue | |
| chapters = book_info['text'] | |
| phrases_to_insert = [] | |
| for chapter_idx, chapter in enumerate(chapters, 1): | |
| for verse_idx, verse_text in enumerate(chapter, 1): | |
| if not verse_text: | |
| continue | |
| # Split verse into words | |
| words = verse_text.split() | |
| # Process phrases of different lengths | |
| for length in range(1, max_phrase_length + 1): | |
| for start in range(len(words) - length + 1): | |
| phrase = " ".join(words[start:start + length]) | |
| cleaned_phrase = strip_diacritics(phrase) | |
| gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", "")) | |
| # Calculate word position range | |
| word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}" | |
| # Add to batch insert list | |
| phrases_to_insert.append( | |
| (gematria_sum, cleaned_phrase, "", book_title, chapter_idx, verse_idx, length, word_position_range) | |
| ) | |
| # Update total word count after processing each verse | |
| total_word_count += len(words) | |
| # If we have phrases to insert, do a batch insert | |
| if phrases_to_insert: | |
| try: | |
| cursor.executemany(''' | |
| INSERT OR REPLACE INTO results | |
| (gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position) | |
| VALUES (?, ?, ?, ?, ?, ?, ?, ?) | |
| ''', phrases_to_insert) | |
| # Update the processed_books table | |
| cursor.execute(''' | |
| INSERT OR REPLACE INTO processed_books (book, max_phrase_length) | |
| VALUES (?, ?) | |
| ''', (book_title, max_phrase_length)) | |
| conn.commit() | |
| logger.info(f"Processed book {book_title}: inserted {len(phrases_to_insert)} phrases") | |
| except sqlite3.Error as e: | |
| logger.error(f"Database error processing {book_title}: {e}") | |
| else: | |
| logger.warning(f"No data found for book ID {book_id}") | |
| logger.info(f"Bible database initialization completed. Processed {len(book_names)} books.") | |
| return book_names | |
| def find_shortest_bible_match(gematria_sum: int, db_file: str = 'bible.db') -> Dict[str, Any]: | |
| """ | |
| Finds the shortest Bible verse in John that matches the given gematria sum. | |
| Args: | |
| gematria_sum: The gematria sum to match | |
| db_file: The SQLite database file to search in | |
| Returns: | |
| A dictionary with the matching verse information or None if no match is found | |
| """ | |
| logger.debug(f"Finding shortest Bible match for gematria sum: {gematria_sum} in {db_file}") | |
| try: | |
| with sqlite3.connect(db_file) as conn: | |
| cursor = conn.cursor() | |
| cursor.execute(''' | |
| SELECT words, book, chapter, verse, phrase_length, word_position | |
| FROM results | |
| WHERE gematria_sum = ? AND book = 'Revelation' | |
| ORDER BY LENGTH(words) ASC | |
| LIMIT 1 | |
| ''', (gematria_sum,)) | |
| result = cursor.fetchone() | |
| if result: | |
| logger.debug(f"Found Bible match: {result}") | |
| return { | |
| "words": result[0], | |
| "book": result[1], | |
| "chapter": result[2], | |
| "verse": result[3], | |
| "phrase_length": result[4], | |
| "word_position": result[5] if len(result) > 5 else None | |
| } | |
| else: | |
| logger.debug(f"No matching verse found in John for gematria sum: {gematria_sum}") | |
| return None | |
| except sqlite3.Error as e: | |
| logger.error(f"Database error when finding Bible match: {e}") | |
| return None | |
| except Exception as e: | |
| logger.error(f"Unexpected error when finding Bible match: {e}") | |
| return None | |