Spaces:

neuralworm
/

daily_psalm

Sleeping

App Files Files Community

daily_psalm / bible.py

neuralworm

Update bible.py

6d08b4f verified 6 months ago

raw

history blame contribute delete

15.1 kB

	import json
	import os
	import logging
	import sqlite3
	import re
	from typing import Dict, List, Any
	from gematria import calculate_gematria, strip_diacritics
	from deep_translator import GoogleTranslator

	logger = logging.getLogger(__name__)

	def process_bible_files(start: int, end: int) -> Dict[int, Dict[str, Any]]:
	"""
	Processes Bible JSON files and returns a dictionary mapping book IDs to their data.

	Args:
	start: The starting book ID (inclusive).
	end: The ending book ID (inclusive).

	Returns:
	A dictionary where keys are book IDs and values are dictionaries
	containing 'title' and 'text' fields.
	"""
	base_path = "texts/bible"
	results = {}

	for i in range(start, end + 1):
	file_name = f"{base_path}/{i}.json"
	try:
	with open(file_name, 'r', encoding='utf-8') as file:
	data = json.load(file)
	if data:
	# Extract title and verses
	title = data.get("title", "No title")
	text = data.get("text", [])

	# Store book ID as key and book data as value
	results[i] = {"title": title, "text": text}

	except FileNotFoundError:
	logger.warning(f"File {file_name} not found.")
	except json.JSONDecodeError as e:
	logger.warning(f"File {file_name} could not be read as JSON: {e}")
	except Exception as e:
	logger.warning(f"Error processing {file_name}: {e}")

	return results

	def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip_spaces=True, strip_in_braces=True,
	strip_diacritics_value=True, translate=False):
	"""
	Processes Bible JSON files and performs ELS (Equidistant Letter Sequence) search.

	Parameters:
	- start (int): Start number of the Bible book.
	- end (int): End number of the Bible book.
	- step (int): Step size for character selection.
	- rounds (str): Comma-separated list of round numbers (can include negative values).
	- length (int): Maximum length of the result text.
	- tlang (str): Target language for translation.
	- strip_spaces (bool): Whether to remove spaces from the text.
	- strip_in_braces (bool): Whether to remove text within braces.
	- strip_diacritics_value (bool): Whether to remove diacritics from the text.
	- translate (bool): Whether to translate the result text.

	Returns:
	- list: A list of dictionaries containing processed data or error messages.
	"""
	logger.debug(f"Processing Bible files {start}-{end} with step {step}, rounds {rounds}")
	results = []

	try:
	bible_data = process_bible_files(start, end)

	if not bible_data:
	return [{"error": f"No Bible data found for books {start}-{end}"}]

	rounds_list = [int(r.strip()) for r in rounds.split(",")]

	for book_id, book_info in bible_data.items():
	book_title = book_info.get("title", "Unknown")
	chapters = book_info.get("text", [])

	if not chapters:
	results.append({"error": f"No text found for book {book_title} (ID: {book_id})"})
	continue

	# Flatten the text
	flattened_text = ""
	for chapter_idx, chapter in enumerate(chapters, 1):
	for verse_idx, verse in enumerate(chapter, 1):
	if verse:
	flattened_text += verse + " "

	# Clean the text based on parameters
	processed_text = flattened_text.lower()

	if strip_in_braces:
	# Remove content within brackets or parentheses
	processed_text = re.sub(r'\[.?\]\|\(.?\)', '', processed_text)

	if strip_diacritics_value:
	processed_text = strip_diacritics(processed_text)

	if strip_spaces:
	processed_text = processed_text.replace(" ", "")

	# Perform ELS search for each round
	for round_num in rounds_list:
	if round_num == 0:
	continue

	direction = 1 if round_num > 0 else -1
	abs_step = abs(round_num * step)

	if direction > 0:
	# Forward ELS
	result_chars = [processed_text[i] for i in range(0, len(processed_text), abs_step) if i < len(processed_text)]
	else:
	# Backward ELS
	result_chars = [processed_text[i] for i in range(len(processed_text) - 1, -1, -abs_step)]

	result_text = "".join(result_chars)

	# Truncate result if length is specified
	if length > 0 and len(result_text) > length:
	result_text = result_text[:length]

	# Translate if requested
	translated_text = ""
	if result_text and translate and tlang != "en":
	try:
	translator = GoogleTranslator(source='auto', target=tlang)
	translated_text = translator.translate(result_text)
	except Exception as e:
	logger.warning(f"Translation error: {e}")
	translated_text = f"Translation error: {str(e)}"

	# Add result to results list
	results.append({
	"book_id": book_id,
	"book_title": book_title,
	"step": step,
	"round": round_num,
	"result_text": result_text,
	"translated_text": translated_text,
	"gematria": calculate_gematria(result_text)
	})

	except Exception as e:
	logger.error(f"Error processing Bible files: {e}", exc_info=True)
	results.append({"error": f"Error processing Bible files: {str(e)}"})

	return results if results else None

	# This function is not needed anymore as we're using get_first_els_result_matthew from app.py
	# Keeping the definition for compatibility but marking it as deprecated
	def get_first_els_result_john(gematria_sum, tlang="en"):
	"""
	DEPRECATED: Use get_first_els_result_matthew instead.
	Gets the first ELS result from John's Gospel (book 43) using the specified step size.
	"""
	logger.warning("get_first_els_result_john is deprecated, use get_first_els_result_matthew instead")
	from app import cached_process_json_files, get_first_els_result_matthew

	return get_first_els_result_matthew(gematria_sum, tlang)

	def create_bible_display_iframe(book_title, book_id, chapter=None, verse=None):
	"""Creates an iframe HTML string for BibleGateway."""
	from urllib.parse import quote_plus

	logger.debug(f"Creating Bible iframe for {book_title}, book_id: {book_id}, chapter: {chapter}, verse: {verse}")

	encoded_book_title = quote_plus(book_title)
	chapter_verse = ""
	if chapter is not None:
	chapter_verse = f"+{chapter}"
	if verse is not None:
	chapter_verse += f":{verse}"

	url = f"https://www.biblegateway.com/passage/?search={encoded_book_title}{chapter_verse}&version=CJB"
	iframe = f'<iframe src="{url}" width="800" height="600"></iframe>'

	return iframe

	def initialize_bible_database(db_file: str = 'bible.db', max_phrase_length: int = 1):
	"""
	Initializes the Bible database with verse texts.
	This function processes all Bible JSON files and adds their gematria values to the database.

	Args:
	db_file: The SQLite database file to use
	max_phrase_length: Maximum phrase length to process
	"""
	import re
	from gematria import calculate_gematria, strip_diacritics
	from tqdm import tqdm # Import tqdm for progress bars

	logger.info(f"Initializing Bible database: {db_file}")

	# Create the database if it doesn't exist
	with sqlite3.connect(db_file) as conn:
	cursor = conn.cursor()
	# Create results table
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS results (
	gematria_sum INTEGER,
	words TEXT,
	translation TEXT,
	book TEXT,
	chapter INTEGER,
	verse INTEGER,
	phrase_length INTEGER,
	word_position TEXT,
	PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
	)
	''')

	cursor.execute('''
	CREATE INDEX IF NOT EXISTS idx_results_gematria
	ON results (gematria_sum)
	''')

	# Create processed_books table to track processing
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS processed_books (
	book TEXT PRIMARY KEY,
	max_phrase_length INTEGER
	)
	''')

	conn.commit()

	# Process Bible files from books 40-66 (New Testament)
	book_start = 40
	book_end = 66
	logger.info(f"Processing Bible books {book_start}-{book_end}")

	# Global counter for word position tracking
	total_word_count = 0
	book_names = {}

	with sqlite3.connect(db_file) as conn:
	cursor = conn.cursor()

	# Process each book
	for book_id in tqdm(range(book_start, book_end + 1), desc="Processing Bible Books"):
	# Load book data
	book_data = process_bible_files(book_id, book_id)

	if book_id in book_data:
	book_info = book_data[book_id]
	book_title = book_info['title']
	book_names[book_id] = book_title

	# Check if this book has already been processed
	cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_title,))
	result = cursor.fetchone()
	if result and result[0] >= max_phrase_length:
	logger.info(f"Skipping book {book_title}: Already processed with max_phrase_length {result[0]}")
	continue

	chapters = book_info['text']
	phrases_to_insert = []

	for chapter_idx, chapter in enumerate(chapters, 1):
	for verse_idx, verse_text in enumerate(chapter, 1):
	if not verse_text:
	continue

	# Split verse into words
	words = verse_text.split()

	# Process phrases of different lengths
	for length in range(1, max_phrase_length + 1):
	for start in range(len(words) - length + 1):
	phrase = " ".join(words[start:start + length])
	cleaned_phrase = strip_diacritics(phrase)
	gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", ""))

	# Calculate word position range
	word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}"

	# Add to batch insert list
	phrases_to_insert.append(
	(gematria_sum, cleaned_phrase, "", book_title, chapter_idx, verse_idx, length, word_position_range)
	)

	# Update total word count after processing each verse
	total_word_count += len(words)

	# If we have phrases to insert, do a batch insert
	if phrases_to_insert:
	try:
	cursor.executemany('''
	INSERT OR REPLACE INTO results
	(gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position)
	VALUES (?, ?, ?, ?, ?, ?, ?, ?)
	''', phrases_to_insert)

	# Update the processed_books table
	cursor.execute('''
	INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
	VALUES (?, ?)
	''', (book_title, max_phrase_length))

	conn.commit()
	logger.info(f"Processed book {book_title}: inserted {len(phrases_to_insert)} phrases")
	except sqlite3.Error as e:
	logger.error(f"Database error processing {book_title}: {e}")
	else:
	logger.warning(f"No data found for book ID {book_id}")

	logger.info(f"Bible database initialization completed. Processed {len(book_names)} books.")
	return book_names

	def find_shortest_bible_match(gematria_sum: int, db_file: str = 'bible.db') -> Dict[str, Any]:
	"""
	Finds the shortest Bible verse in John that matches the given gematria sum.

	Args:
	gematria_sum: The gematria sum to match
	db_file: The SQLite database file to search in

	Returns:
	A dictionary with the matching verse information or None if no match is found
	"""
	logger.debug(f"Finding shortest Bible match for gematria sum: {gematria_sum} in {db_file}")

	try:
	with sqlite3.connect(db_file) as conn:
	cursor = conn.cursor()
	cursor.execute('''
	SELECT words, book, chapter, verse, phrase_length, word_position
	FROM results
	WHERE gematria_sum = ? AND book = 'Revelation'
	ORDER BY LENGTH(words) ASC
	LIMIT 1
	''', (gematria_sum,))

	result = cursor.fetchone()

	if result:
	logger.debug(f"Found Bible match: {result}")
	return {
	"words": result[0],
	"book": result[1],
	"chapter": result[2],
	"verse": result[3],
	"phrase_length": result[4],
	"word_position": result[5] if len(result) > 5 else None
	}
	else:
	logger.debug(f"No matching verse found in John for gematria sum: {gematria_sum}")
	return None

	except sqlite3.Error as e:
	logger.error(f"Database error when finding Bible match: {e}")
	return None
	except Exception as e:
	logger.error(f"Unexpected error when finding Bible match: {e}")
	return None