Spaces:
Sleeping
Sleeping
| import torch | |
| from fastapi import Request | |
| async def generate_fallback_response(request: Request, prompt: str) -> str: | |
| tokenizer = request.app.state.fallback_tokenizer | |
| model = request.app.state.fallback_model | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=150, | |
| temperature=0.7, | |
| top_p=0.9, | |
| repetition_penalty=1.1, | |
| do_sample=True | |
| ) | |
| decoded = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return decoded[len(prompt):].strip() or "..." |