File size: 4,651 Bytes
279ed8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import sys
import codecs
import torch
from transformers import M2M100ForConditionalGeneration, NllbTokenizerFast

def translate_text(text, model, tokenizer, src_lang, target_lang="eng_Latn"):
    """
    Translates a single text string.
    """
    try:
        tokenizer.src_lang = src_lang
        inputs = tokenizer(text, return_tensors="pt")
        generated_tokens = model.generate(
            **inputs,
            forced_bos_token_id=tokenizer.vocab[target_lang],
            max_length=512
        )
        translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
        return translated_text
    except Exception as e:
        return f"An error occurred during translation: {e}"

def main():
    """
    Main function to load the model and run a test translation.
    """
    # Reconfigure stdout to handle UTF-8 encoding
    sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer)

    # --- Configuration ---
    script_dir = os.path.dirname(os.path.abspath(__file__))
    nepali_model_path = os.path.join(script_dir, "models", "nllb-finetuned-nepali-en")
    
    # --- Model Loading ---
    print("Loading Nepali model and tokenizer...")
    try:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        nepali_model = M2M100ForConditionalGeneration.from_pretrained(nepali_model_path).to(device)
        nepali_tokenizer = NllbTokenizerFast.from_pretrained(nepali_model_path)
        print("Nepali model and tokenizer loaded successfully.")
    except Exception as e:
        print(f"Error loading Nepali model or tokenizer: {e}")
        return

    # --- Nepali Translation ---
    nepali_sentences = [
        "जडान बिन्दु थप्नुहोस्",
        "स्टिकी नोट आयात पूरा भयो",
        "मोनोस्पेस १२",
        "पानी जेट पम्पमा दुईवटा भित्रिने र एउटा बाहिरिने पाइप हुन्छन् र एक भित्र अर्को सिद्धान्त अनुरूप दुईवटा पाइप हुन्छन् । पानीको प्रविष्टिमा एउटा पानी जेटले केही ठूलो पाइपमा पूरा चापले टुटीबाट बाहिर फाल्दछ । यस्तो तरिकाले पानी जेटले वायू वा तरललाई दोस्रो प्रविष्टिबाट टाढा पुर्याउदछ । ड्रिफ्टिङ तरलमा ऋणात्मक चापको कारणले यस्तो हुन्छ । त्यसैले यो हाइड्रोडायनमिक विरोधाभाषको एउटा अनुप्रयोग हो । यसले ड्रिफ्टिङ तरल नजिकका वस्तु टाढा फाल्नुको साटोमा सोस्ने कुरा बताउदछ ।",
        "वस्तुको परिवर्तन बचत गर्नुहोस् ।"
        "तिमीलाई कस्तो छ" ,
        "तिमी को हौ",
        "कति बज्यो"
    ]

    print("\n--- Nepali to English Translation Analysis ---")
    for sentence in nepali_sentences:
        print(f"\nOriginal (ne): {sentence}")
        translated_text = translate_text(sentence, nepali_model, nepali_tokenizer, src_lang="nep_Npan")
        print(f"Translated (en): {translated_text}")

    # --- Sinhala Translation ---
    # NOTE: No fine-tuned model for sinhala was found. Using the baseline model for now.
    print("\n\n--- Sinhala to English Translation Analysis ---")
    
    sinhala_sentences = [
        "ඩෝසන්මිස් දුරකථනයෙන් ඩෝසන්මිස් කවුද සර්",
        "කවුද ඩෝසන් නැතුව ඉන්නේ ඔව් සර්",
        "ඔබ එය උත්සාහ කරන්න සර්",
        "කොහොමද වැඩේ හරිද ඔව් සර්ට ස්තුතියි",
        "ඔව්, හරි, ස්තුතියි රත්තරං",

    ]

    for sentence in sinhala_sentences:
        print(f"\nOriginal (si): {sentence}")
        translated_text = translate_text(sentence, nepali_model, nepali_tokenizer, src_lang="sin_Sinh")
        print(f"Translated (en): {translated_text}")


if __name__ == "__main__":
    main()