Spaces:
Sleeping
Sleeping
| import re | |
| # List of (regular expression, replacement) pairs for abbreviations in english: | |
| abbreviations_en = [ | |
| (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1]) | |
| for x in [ | |
| ("mrs", "misess"), | |
| ("mr", "mister"), | |
| ("dr", "doctor"), | |
| ("st", "saint"), | |
| ("co", "company"), | |
| ("jr", "junior"), | |
| ("maj", "major"), | |
| ("gen", "general"), | |
| ("drs", "doctors"), | |
| ("rev", "reverend"), | |
| ("lt", "lieutenant"), | |
| ("hon", "honorable"), | |
| ("sgt", "sergeant"), | |
| ("capt", "captain"), | |
| ("esq", "esquire"), | |
| ("ltd", "limited"), | |
| ("col", "colonel"), | |
| ("ft", "fort"), | |
| ] | |
| ] | |
| def expand_abbreviations(text, lang="en"): | |
| if lang == "en": | |
| _abbreviations = abbreviations_en | |
| else: | |
| raise NotImplementedError() | |
| for regex, replacement in _abbreviations: | |
| text = re.sub(regex, replacement, text) | |
| return text |