Spaces:
Runtime error
Runtime error
| import sys | |
| from indicnlp import common | |
| common.set_resources_path(INDIC_NLP_RESOURCES) | |
| from indicnlp import loader | |
| from indicnlp.normalize import indic_normalize | |
| from indicnlp.transliterate import unicode_transliterate | |
| if __name__ == '__main__': | |
| """ | |
| This script transliterates Hindi to Kannada. It removes/remaps | |
| characters only found in Hindi. It also adds halanta to words ending | |
| with consonant - as is the convention in Kannada | |
| """ | |
| infname=sys.argv[1] # one sentence/word per line. Sentences should be space-tokenized | |
| outfname=sys.agv[2] | |
| loader.load() | |
| normalizer_factory=indic_normalize.IndicNormalizerFactory() | |
| normalizer=normalizer_factory.get_normalizer('hi') | |
| with open(infname,'r',encoding='utf-8') as infile, \ | |
| open(outfname,'w',encoding='utf-8') as outfile: | |
| for line in infile: | |
| line=line.strip() | |
| line=normalizer.normalize(line) | |
| ## replace chandrabindus with anusvara | |
| line=line.replace('\u0900','\u0902') | |
| line=line.replace('\u0901','\u0902') | |
| ### replace chandra e and o diacritics with e and o respectively | |
| #line=line.replace('\u0945','\u0947') | |
| #line=line.replace('\u0949','\u094b') | |
| ### replace chandra e and o diacritics with a diacritic | |
| ## this seems to be general usage | |
| line=line.replace('\u0945','\u093e') | |
| line=line.replace('\u0949','\u093e') | |
| ## remove nukta | |
| line=line.replace('\u093c','') | |
| ## add halant if word ends with consonant | |
| #if isc.is_consonant(isc.get_phonetic_feature_vector(line[-1],'hi')): | |
| # line=line+'\u094d' | |
| words=line.split(' ') | |
| outwords=[] | |
| for word in line.split(' '): | |
| if isc.is_consonant(isc.get_phonetic_feature_vector(word[-1],'hi')): | |
| word=word+'\u094d' | |
| outwords.append(word) | |
| line=' '.join(outwords) | |
| ## script conversion | |
| line=unicode_transliterate.UnicodeIndicTransliterator.transliterate(line,'hi','kn') | |
| outfile.write(line+'\n') | |