Spaces:
Runtime error
Runtime error
| from datasets import load_dataset | |
| import numpy as np | |
| import gradio as gr | |
| import chromadb | |
| from transformers import AutoModel, AutoTokenizer, pipeline | |
| import torch | |
| import chromadb | |
| import os | |
| import requests | |
| API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-hf" | |
| headers = {"Authorization": f"Bearer {os.getenv('HF_Token')}"} | |
| def query_llama(prompt): | |
| payload = {"inputs": prompt} | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| return response.json() | |
| prompt = "Explain machine learning in simple terms." | |
| response = query_llama(prompt) | |
| print(response) | |
| ''' | |
| # Initialize ChromaDB client | |
| chroma_client = chromadb.PersistentClient(path="./chroma_db") # Stores data persistently | |
| collection = chroma_client.get_or_create_collection(name="wikipedia_docs") | |
| # Load the BAAI embedding model | |
| model_name = "BAAI/bge-base-en" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModel.from_pretrained(model_name) | |
| def get_embedding(text): | |
| """Generate embeddings using BAAI/bge-base-en.""" | |
| inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| return outputs.last_hidden_state[:, 0, :].numpy().tolist() # Take CLS token embedding | |
| # Load LLaMA Model (Meta LLaMA 2) | |
| #llama_pipe = pipeline("text-generation", model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")) | |
| # Load a small subset (10,000 rows) | |
| #dataset = load_dataset("wiki40b", "en", split="train[:1000]") | |
| # Extract only text | |
| #docs = [d["text"] for d in dataset] | |
| docs = ["Machine learning is a field of AI...", "Neural networks are inspired by the brain..."] | |
| #print("Loaded dataset with", len(docs), "documents.") | |
| # ✅ Step 2: Embed and Store in ChromaDB | |
| for i, doc in enumerate(docs): | |
| embedding = get_embedding(doc) | |
| collection.add(ids=[str(i)], embeddings=[embedding], documents=[doc]) | |
| print("Stored embeddings in ChromaDB!") | |
| # Store embeddings in ChromaDB | |
| #for i, (doc, embedding) in enumerate(zip(docs, embeddings)): | |
| # collection.add( | |
| # ids=[str(i)], # Unique ID for each doc | |
| # embeddings=[embedding.tolist()], # Convert numpy array to list | |
| # documents=[doc] | |
| # ) | |
| # Search function using ChromaDB | |
| #def search_wikipedia(query, top_k=3): | |
| # query_embedding = embed_model.encode([query]).tolist() | |
| # results = collection.query( | |
| # query_embeddings=query_embedding, | |
| # n_results=top_k | |
| #return "\n\n".join(results["documents"][0]) # Return top results | |
| # return results["documents"][0] # Return top results | |
| # Function to search ChromaDB & generate response | |
| def query_llama(user_input): | |
| query_embedding = get_embedding(user_input) | |
| results = collection.query(query_embeddings=[query_embedding], n_results=3) | |
| if not results["documents"]: | |
| return "No relevant documents found." | |
| context = " ".join(results["documents"][0]) | |
| prompt = f"Using this context, answer the question: {user_input}\nContext: {context}" | |
| response = llama_pipe(prompt, max_length=200) | |
| return f"**LLaMA Response:** {response[0]['generated_text']}\n\n**Retrieved Docs:** {context}" | |
| # Gradio Interface | |
| iface = gr.Interface( | |
| fn=query_llama, | |
| inputs="text", | |
| outputs="text", | |
| title="Wikipedia Search RAG", | |
| description="Enter a query and retrieve relevant Wikipedia passages." | |
| ) | |
| iface.launch() | |
| ''' |