Spaces:
Sleeping
Sleeping
File size: 626 Bytes
5fc69e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
import torch
from fastapi import Request
async def generate_fallback_response(request: Request, prompt: str) -> str:
tokenizer = request.app.state.fallback_tokenizer
model = request.app.state.fallback_model
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=150,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.1,
do_sample=True
)
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
return decoded[len(prompt):].strip() or "..." |