Spaces:

dindizz
/

sandhisplitter

Sleeping

App Files Files Community

sandhisplitter / app.py

dindizz

Upload 2 files

f120f79 verified 3 months ago

raw

history blame contribute delete

1.96 kB

	import gradio as gr

	# --- Minimal Sanskrit lexicon (extend with real data) ---
	LEXICON = {
	"राम", "वन", "गच्छति", "गुरु", "इन्द्र", "तत्", "अपि",
	"धर्म", "क्षेत्र", "कुरु", "क्षेत्रे"
	}

	# --- Basic Reverse Sandhi Rules ---
	REVERSE_SANDHI_RULES = [
	("ा", ["अ+अ"]), # ā → a + a
	("े", ["अ+इ", "अ+ई"]), # e → a+i or a+ī
	("ो", ["अ+उ", "अ+ऊ"]), # o → a+u or a+ū
	("ः", ["ः+"]), # visarga restoration
	]

	def generate_candidates(word):
	candidates = []
	for i in range(1, len(word)):
	left, right = word[:i], word[i:]
	# Direct split
	if left in LEXICON and right in LEXICON:
	candidates.append((left, right))
	# Apply reverse sandhi substitutions
	for ch, expansions in REVERSE_SANDHI_RULES:
	if left.endswith(ch):
	for exp in expansions:
	l_base = left[:-1] + exp.split("+")[0]
	r_base = exp.split("+")[1] + right
	if l_base in LEXICON and r_base in LEXICON:
	candidates.append((l_base, r_base))
	# Deduplicate
	candidates = list(set(candidates))
	return candidates or [("No plausible split found", "")]

	def sandhi_splitter(word):
	candidates = generate_candidates(word.strip())
	formatted = [" + ".join(c) for c in candidates]
	return "\n".join(formatted)

	with gr.Blocks() as demo:
	gr.Markdown("## Sanskrit Sandhi-Splitter (Prototype)")
	gr.Markdown("Enter a Sanskrit compound word (Devanagari) to see possible splits.")
	inp = gr.Textbox(label="Compound Word (e.g. धर्मक्षेत्रे)")
	out = gr.Textbox(label="Candidate Splits")
	btn = gr.Button("Split Sandhi")
	btn.click(fn=sandhi_splitter, inputs=inp, outputs=out)

	if __name__ == "__main__":
	demo.launch()