Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| # --- Minimal Sanskrit lexicon (extend with real data) --- | |
| LEXICON = { | |
| "राम", "वन", "गच्छति", "गुरु", "इन्द्र", "तत्", "अपि", | |
| "धर्म", "क्षेत्र", "कुरु", "क्षेत्रे" | |
| } | |
| # --- Basic Reverse Sandhi Rules --- | |
| REVERSE_SANDHI_RULES = [ | |
| ("ा", ["अ+अ"]), # ā → a + a | |
| ("े", ["अ+इ", "अ+ई"]), # e → a+i or a+ī | |
| ("ो", ["अ+उ", "अ+ऊ"]), # o → a+u or a+ū | |
| ("ः", ["ः+"]), # visarga restoration | |
| ] | |
| def generate_candidates(word): | |
| candidates = [] | |
| for i in range(1, len(word)): | |
| left, right = word[:i], word[i:] | |
| # Direct split | |
| if left in LEXICON and right in LEXICON: | |
| candidates.append((left, right)) | |
| # Apply reverse sandhi substitutions | |
| for ch, expansions in REVERSE_SANDHI_RULES: | |
| if left.endswith(ch): | |
| for exp in expansions: | |
| l_base = left[:-1] + exp.split("+")[0] | |
| r_base = exp.split("+")[1] + right | |
| if l_base in LEXICON and r_base in LEXICON: | |
| candidates.append((l_base, r_base)) | |
| # Deduplicate | |
| candidates = list(set(candidates)) | |
| return candidates or [("No plausible split found", "")] | |
| def sandhi_splitter(word): | |
| candidates = generate_candidates(word.strip()) | |
| formatted = [" + ".join(c) for c in candidates] | |
| return "\n".join(formatted) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Sanskrit Sandhi-Splitter (Prototype)") | |
| gr.Markdown("Enter a Sanskrit compound word (Devanagari) to see possible splits.") | |
| inp = gr.Textbox(label="Compound Word (e.g. धर्मक्षेत्रे)") | |
| out = gr.Textbox(label="Candidate Splits") | |
| btn = gr.Button("Split Sandhi") | |
| btn.click(fn=sandhi_splitter, inputs=inp, outputs=out) | |
| if __name__ == "__main__": | |
| demo.launch() | |