Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| from langchain.llms import HuggingFacePipeline | |
| import torch | |
| app = FastAPI() | |
| # --- LLM Initialization using Hugging Face --- | |
| model_id = "Qwen/Qwen2.5-1.5B-Instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| device_map="auto", | |
| torch_dtype=torch.float16 | |
| ) | |
| generator = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_length=256, | |
| temperature=0.3, | |
| ) | |
| llm = HuggingFacePipeline(pipeline=generator) | |
| # Example endpoint using the new llm | |
| async def post_query(query: str): | |
| # Create a simple prompt structure | |
| prompt = f"Answer the following query:\n\n{query}\n" | |
| # Get the response from the LLM | |
| response = llm(prompt) | |
| return {"response": response} | |
| # (Keep your WebSocket endpoint and other code mostly unchanged) | |