File size: 16,606 Bytes
18a28ff
9f0a178
d30b281
f7cc9ad
8b05c02
7bcabe0
6cc39d6
261cbe8
d120873
e46dbee
 
eaf1d7d
 
 
75255bc
9346da7
de649ff
 
 
75255bc
d1936ad
 
 
 
 
 
 
d120873
c1dac64
4e5cd58
 
eaec419
d3a5771
4e5cd58
 
 
eaec419
4e5cd58
 
53869cd
1d71a28
a696db8
 
c3a14f0
 
d3a5771
75255bc
4e5cd58
 
 
 
 
 
 
1d71a28
75255bc
d120873
42c14b6
75255bc
698fb3f
75255bc
d120873
eaf1d7d
 
 
 
564da3b
42c14b6
eaf1d7d
14f07c3
eaf1d7d
3f6a5ed
eaf1d7d
14f07c3
9346da7
de649ff
 
 
 
 
 
7db8a97
2ee9294
14f07c3
4e95015
76e12db
 
 
 
 
 
 
 
 
 
 
e32216a
76e12db
eaada07
76e12db
 
 
 
 
b1ec0c8
 
 
 
 
 
6168b17
b1ec0c8
 
 
 
e747345
 
 
 
 
7794bc3
e747345
 
76e12db
2467a42
 
261cbe8
76e12db
e32216a
7baab5f
 
 
 
 
 
 
 
 
 
 
d43d990
dc27630
261cbe8
3aaf079
0a92611
 
 
05b4630
0a92611
 
 
 
 
 
05b4630
 
0a92611
76e12db
 
 
 
 
 
 
2467a42
 
 
76e12db
 
261cbe8
76e12db
 
 
 
 
 
 
 
 
 
 
 
 
 
0a92611
05b4630
 
0a92611
eec15ee
f873413
76e12db
 
eb10e5e
678e936
f873413
0734240
ebbb8ba
 
d43d990
05b4630
d43d990
 
c9781a7
 
 
67b893a
d43d990
fd04a0b
05b4630
 
515b269
a06c4f8
5301423
55ecfe2
 
5301423
55ecfe2
5f33eaf
3dddf92
 
5301423
 
55ecfe2
5301423
3dddf92
5f33eaf
0bb264b
 
edfc533
3dddf92
d5c671b
 
0519180
7ebabfa
0a92611
05b4630
 
198b77a
0a92611
4c7bc2b
40d6045
d1936ad
98d0958
9797698
d1936ad
 
9797698
6cc39d6
4e95015
8b05c02
 
1d71a28
8b05c02
27ced65
c832cd2
53869cd
261cbe8
8b05c02
a696db8
4e95015
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
from fastapi import FastAPI, Request, Query
import src.Paraphrase as Paraphrase
import src.Translate
from typing import Optional
from fastapi_mcp import FastApiMCP
from huggingface_hub import hf_hub_download, list_repo_files
from sentence_transformers import SentenceTransformer
import bergamot

app = FastAPI()
# app = FastAPI(docs_url="/docs")
MODELS = {'benro': 'BlackKakapo/opus-mt-en-ro',
          'broen': 'BlackKakapo/opus-mt-ro-en',
          'mttcbig': 'Helsinki-NLP/opus-mt-tc-big-en-ro',
          'gemma': 'Gargaz/gemma-2b-romanian-better',
          'mbartenro': 'ancebuc/mbart-translation-en-ro',
          't5enro': 'ancebuc/t5-translation-en-ro',
          'pegasus': 'ancebuc/pegasus-translation-en-ro',
          'mbart': 'facebook/mbart-large-cc25',
          'paraphrase': 'tuner007/pegasus_paraphrase'}
EMBEDDING_MODELS = {"all-MiniLM-L6-v2":384,
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2":384,
"sentence-transformers/distiluse-base-multilingual-cased-v2":512,
"sentence-transformers/stsb-xlm-r-multilingual":768,
"sentence-transformers/use-cmlm-multilingual":768,
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2":768}
EMBEDDING_MODEL = "sentence-transformers/distiluse-base-multilingual-cased-v2"

@app.get("/")
def index(request: Request):
    from fastapi.responses import HTMLResponse
    host_url = "https://" + request.url.netloc
    mcp_config = '''{"mcpServers": {"fastapi-mcp": {"url": "https://tiberiucristianleon-fastapimt.hf.space/mcp"}}}'''
    html_content = f'''
        <html>
            <head>
                <title>FastAPI with MCP</title>
            </head>
            <body>
                <h2>FastAPI URLS</h2>
                <p><a href="{host_url}" target="_blank">Host URL:</a> {host_url}</p>
                <p><a href="{host_url}/docs" target="_blank">DOCS</a></p>
                <p><a href="{host_url}/redoc" target="_blank">REDOC</a></p>
                <p><a href="{host_url}/openapi.json" target="_blank">openapi.json</a></p>
                <p><a href="{host_url}/mcp" target="_blank">MCP</a></p>
                <p>MCP configuration: {mcp_config}</a></p>
                <p>MODELS: {list(MODELS.values())}"</p>
            </body>
        </html>
        '''
    return HTMLResponse(content=html_content)

# @app.get("/")
# async def get_host_url(request: Request):
#     host_url = request.url.scheme + "s://" + request.url.netloc
#     return {"host_url": host_url, 'endpoints': ['/paraphrase', '/translate', f'{host_url}/docs', f'{host_url}/redoc', f'{host_url}/openapi.json'], 'models': MODELS}

@app.get("/paraphrase", operation_id="get_paraphrase", description="Paraphrase text", tags=["paraphrase"], summary="Paraphrase text")
def paraphrase(text: str, model: str  = MODELS['paraphrase']):
    resultValue, exception = Paraphrase.paraphraseParaphraseMethod(text, model)
    return {"input": text, "result": resultValue, "exception": exception}

@app.get("/listmodels", operation_id="list_models", description="List models", tags=["listmodels"], summary="List models")
def listmodels():
    return {"MODELS": MODELS, "EMBEDDING_MODELS": EMBEDDING_MODELS}

# model: Optional[str] = MODELS['benro']
@app.get("/translate", operation_id="get_translate", description="Translate text", tags=["translate"], summary="Translate text")
def translate(input_text: str, model_name: str = MODELS['mttcbig'], sl: str = 'en', tl: str = 'ro'):
    message = f'Translated from {sl} to {tl} with {model_name}'
    if 'BlackKakapo' in model_name:
        translation, model_name = src.Translate.paraphraseTranslateMethod(input_text, model_name)
    elif 'Helsinki-NLP' in model_name:
        translation, message = src.Translate.Translators(model_name, sl, tl, input_text).HelsinkiNLP_mulroa()
     # text2textgenerationpipe, translationpipe
    # elif model_name == MODELS['mbartenro']:
    #     translation, message = src.Translate.Translators(model_name, sl, tl, input_text).text2textgenerationpipe()
    elif model_name == MODELS['t5enro'] or model_name == MODELS['pegasus'] or model_name == MODELS['mbartenro']:
        translation, message = src.Translate.Translators(model_name, sl, tl, input_text).translationpipe()
    elif model_name == MODELS['mbart']:
        translation, message = src.Translate.Translators(model_name, sl, tl, input_text).mbartlarge()
    else:
        translation: str = src.Translate.gemma_direct(input_text, model_name)
    return {"input_text": input_text, "translation": translation, "model_name": model_name, "message": message}

class Bergamot:
    config = bergamot.ServiceConfig(numWorkers=4)
    service = bergamot.Service(config)
        
    def __init__(self, input_text, sl, tl, model_name):
        # Keep track of installed (src, tgt) pairs
        self.installed_pairs = set()
        self.input_text, self.sl, self.tl, self.model_name = input_text, sl, tl, model_name
        self.repo_id = "TiberiuCristianLeon/Bergamot"
        self.branches = ['base', 'base-memory', 'tiny']
        self.subfolder = f"{sl}{tl}"
        self.localfolder = f"{self.subfolder}/{model_name}"
        # List all files in the repo
        self.all_files = list_repo_files(self.repo_id, repo_type='model')
        
    def downloadbergamotfiles(self):
        print('input text type:', type(self.input_text), len(self.all_files), 'installed_pairs', self.installed_pairs, 'defaultlocalfolder', self.localfolder)
        try:
            for branch in self.branches:
                branch_files = [f for f in self.all_files if f.startswith(branch)] 
                fullmodel_files = [f for f in branch_files if f.startswith(self.model_name)]
                print('branch_files', len(branch_files), 'fullmodel_files', fullmodel_files)
                model_files = [f.split(f'{self.model_name}/')[1] for f in fullmodel_files]
                print('branch_files', len(branch_files), 'model_files', model_files)
                for file_path in model_files:
                    if self.localfolder not in self.installed_pairs:
                        # local_files_only (bool, optional, defaults to False) — If True, avoid downloading the file and return the path to the local cached file if it exists.
                        # dry_run (bool, optional, defaults to False) — If True, perform a dry run without actually downloading the file. Returns a DryRunFileInfo object containing information about what would be downloaded.
                        local_path = hf_hub_download(repo_id=self.repo_id, subfolder=self.model_name, filename=file_path, local_dir=self.subfolder)
                        print(f"Downloaded to: {local_path}") # Downloaded to: deen/base/deen/config.yml
            try:
                dry_run = hf_hub_download(repo_id=self.repo_id, subfolder=self.model_name, filename='config.yml', local_dir=self.subfolder)
                print('installed_pairs', self.installed_pairs, 'localfolder', self.localfolder, 'dry_run', type(dry_run), dry_run)
                if isinstance(dry_run, str):
                    print('Add to set after dryrun', dry_run)
                    self.installed_pairs.add(self.localfolder)
            except Exception as dryrunerror:
                print('installed_pairs', self.installed_pairs, 'localfolder', self.localfolder, 'dry_runerror', dryrunerror)
        except Exception as downloaderror:
            response, message_text = str(downloaderror), f"Error downloading {self.model_name}: {downloaderror}."
            print(downloaderror)
    
    def translate(self):
        self.downloadbergamotfiles()
        try:
            model = self.service.modelFromConfigPath(f"{self.localfolder}/config.yml")
            options = bergamot.ResponseOptions(alignment=False, sentenceMappings=False, qualityScores=False, HTML=False)
            rawresponse = self.service.translate(model, bergamot.VectorString(self.input_text), options)
            response: list|str = [r.target.text for r in rawresponse] if len(rawresponse) > 1 else next(iter(rawresponse)).target.text
            print(type(self.input_text), len(self.input_text), len(rawresponse), type(response), response)
            message_text = f"Translated from {self.sl} to {self.tl} with {self.model_name}."
        except Exception as translateerror:
            response, message_text = str(translateerror), f"Error translating from {self.sl} to {self.tl} with {self.model_name}: {translateerror}."
            print(translateerror)
        return {"input": self.input_text, "translated_text": response, "message_text": message_text}

# https://tiberiucristianleon-fastapimt.hf.space/bergamot?input_text=das%20ist%20keine%20gute%20Frau&input_text=das%20ist%20eine%20gute%20Nachricht&sl=de&tl=en&model=bergamot
@app.get("/bergamott", operation_id="get_bergamott", description="Translate text with Bergamot", tags=["bergamott"], summary="Translate text with Bergamot")
def bergamott(input_text: list[str] = Query(description="Input string or list of strings"), sl: str = 'de', tl: str = 'en', model_name: Optional[str] = 'base/deen'):
    """
    Translates the input text from the source language to the target language using a specified model.
    Parameters:
        input_text (str | list[str]): The source text to be translated, can be either a string or a list of strings
        sl (str): The source language of the input text
        tl (str): The target language into which the input text is translated
        model_name (str): The selected translation model name
    Returns:
        dict:
            input_text(str): The input text in the source language
            translated_text(str): The input text translated into the selected target language
            message_text(str):  A descriptive message summarizing the translation process. Example: "Translated from English to German with base/ende."
    
    Example:
        >>> bergamot("Hello world", "en", "de", "base/ende")
        {"input_text": "Hello world", "translated_text": "Hallo Welt", "message_text": "Translated from English to German with base/ende."}
    """
    try:
        bergamotinstance = Bergamot(input_text, sl, tl, model_name)
        return bergamotinstance.translate()
    except Exception as mainerror:
        response, message_text = str(mainerror), f"Error translating from {sl} to {tl} with {model_name}: {mainerror}."
        print(mainerror)

# https://tiberiucristianleon-fastapimt.hf.space/bergamot?input_text=das%20ist%20keine%20gute%20Frau&input_text=das%20ist%20eine%20gute%20Nachricht&sl=de&tl=en&model=bergamot
@app.get("/bergamots", operation_id="get_bergamot", description="Translate text with Bergamot", tags=["bergamots"], summary="Translate text with Bergamot")
def bergamots(input_text: list[str] = Query(description="Input string or list of strings"), sl: str = 'de', tl: str = 'en', model_name: Optional[str] = 'base/deen'):
    """
    Translates the input text from the source language to the target language using a specified model.
    Parameters:
        input_text (str | list[str]): The source text to be translated, can be either a string or a list of strings
        sl (str): The source language of the input text
        tl (str): The target language into which the input text is translated
        model_name (str): The selected translation model name
    Returns:
        dict:
            input_text(str): The input text in the source language
            translated_text(str): The input text translated into the selected target language
            message_text(str):  A descriptive message summarizing the translation process. Example: "Translated from English to German with base/ende."
    
    Example:
        >>> bergamot("Hello world", "en", "de", "base/ende")
        {"input_text": "Hello world", "translated_text": "Hallo Welt", "message_text": "Translated from English to German with base/ende."}
    """
    try:
        import bergamot
        repo_id="TiberiuCristianLeon/Bergamot"
        branches = ['base', 'base-memory', 'tiny']
        # input_text = [input_text] if isinstance(input_text, str) else input_text           
        config = bergamot.ServiceConfig(numWorkers=4)
        service = bergamot.Service(config)
        subfolder = f"{sl}{tl}"
        localfolder = f"{subfolder}/{model_name}"
        # List all files in the repo
        all_files = list_repo_files(repo_id, repo_type='model')
        print('input text type:', type(input_text), len(all_files), 'installed_pairs', installed_pairs, 'defaultlocalfolder', localfolder)
        for branch in branches:
            branch_files = [f for f in all_files if f.startswith(branch)] 
            fullmodel_files = [f for f in branch_files if f.startswith(model_name)]
            print('branch_files', len(branch_files), 'fullmodel_files', fullmodel_files)
            model_files = [f.split(f'{model_name}/')[1] for f in fullmodel_files]
            print('branch_files', len(branch_files), 'model_files', model_files)
            for file_path in model_files:
                if localfolder not in installed_pairs:
                    # local_files_only (bool, optional, defaults to False) — If True, avoid downloading the file and return the path to the local cached file if it exists.
                    # dry_run (bool, optional, defaults to False) — If True, perform a dry run without actually downloading the file. Returns a DryRunFileInfo object containing information about what would be downloaded.
                    local_path = hf_hub_download(repo_id=repo_id, subfolder=model_name, filename=file_path, local_dir=subfolder)
                    print(f"Downloaded to: {local_path}") # Downloaded to: deen/base/deen/config.yml
        ## Check if model/localfolder in repo files, add to set if exists
        modelcheck = [i for i in all_files if model_name in i]
        print('Modelcheck', modelcheck)
        if modelcheck:
            print('Add to set after modelcheck', modelcheck)
            installed_pairs.add(localfolder)
        try:
            dry_run = hf_hub_download(repo_id=repo_id, subfolder=model_name, filename='config.yml', local_dir=subfolder)
            print('installed_pairs', installed_pairs, 'localfolder', localfolder, 'dry_run', type(dry_run), dry_run)
            if isinstance(dry_run, str):
                print('Add to set after dryrun', dry_run)
                installed_pairs.add(localfolder)
        except Exception as dryrunerror:
            print('installed_pairs', installed_pairs, 'localfolder', localfolder, 'dry_runerror', dryrunerror)
        model = service.modelFromConfigPath(f"{localfolder}/config.yml")
        # model = service.modelFromConfig(localfolder)
        options = bergamot.ResponseOptions(alignment=False, sentenceMappings=False, qualityScores=False, HTML=False)
        rawresponse = service.translate(model, bergamot.VectorString(input_text), options)
        response: list|str = [r.target.text for r in rawresponse] if len(rawresponse) > 1 else next(iter(rawresponse)).target.text
        print(type(input_text), len(input_text), len(rawresponse), type(response), response)
        # response = [r.target.text for r in model_response][0] if isinstance(response, bergamot._bergamot.VectorResponse) else next(iter(response)).target.text
        # response is of type bergamot._bergamot.VectorResponse, an iterable of bergamot._bergamot.Response
        message_text = f"Translated from {sl} to {tl} with {model_name}."
    except Exception as generalerror:
        response, message_text = str(generalerror), f"Error translating from {sl} to {tl} with {model_name}: {generalerror}."
        print(generalerror)
    return {"input": input_text, "translated_text": response, "message_text": message_text}

@app.get("/embed", operation_id="get_embeddings", description="Embed text", tags=["embed"], summary="Embed text")
def embed(text: str, model: str  = EMBEDDING_MODEL):
    model = SentenceTransformer(model)
    embeddings = model.encode(text)
    print(embeddings.shape, len(embeddings))
    # similarities = model.similarity(embeddings, embeddings)
    return {"input": text, "embeddings": embeddings.tolist(), "shape": embeddings.shape}

# Create an MCP server based on this app
mcp = FastApiMCP(
    app,
    name="Translate and paraphrase FASTAPI MCP",
    description="MCP server to translate and paraphrase text",
    describe_all_responses=True,
    describe_full_response_schema=True,
    include_operations=["get_translate", "get_paraphrase"],
    include_tags=["paraphrase", "translate", "bergamott"]
)
# Mount the MCP server directly to the FASTAPI app
mcp.mount()