Spaces:

FridayMaster
/

CHATBOT1

Sleeping

App Files Files Community

FridayMaster commited on Aug 14, 2024

Commit

f67ae72

verified ·

1 Parent(s): e44a872

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -21

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import gradio as gr
 import faiss
 import numpy as np
@@ -5,12 +7,13 @@ import openai
 from sentence_transformers import SentenceTransformer
 from nltk.tokenize import sent_tokenize
 import nltk
 # Download the required NLTK data
 nltk.download('punkt')
-nltk.download('punkt_tab')
-# Paths
 faiss_path = "manual_chunked_faiss_index_500.bin"
 manual_path = "ubuntu_manual.txt"
@@ -48,17 +51,19 @@ try:
 except Exception as e:
     raise RuntimeError(f"Failed to load FAISS index: {e}")
-# Load your embedding model
-embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-# OpenAI API key
-openai.api_key = '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'
 # Function to create embeddings
 def embed_text(text_list):
-    embeddings = embedding_model.encode(text_list)
-    print("Embedding shape:", embeddings.shape)  # Debugging: Print shape
-    return np.array(embeddings, dtype=np.float32)
 # Function to retrieve relevant chunks for a user query
 def retrieve_chunks(query, k=5):
@@ -66,45 +71,44 @@ def retrieve_chunks(query, k=5):
     try:
         distances, indices = index.search(query_embedding, k=k)
-        print("Indices:", indices)  # Debugging: Print indices
-        print("Distances:", distances)  # Debugging: Print distances
     except Exception as e:
         raise RuntimeError(f"FAISS search failed: {e}")
     if len(indices[0]) == 0:
         return []
-    # Ensure indices are within bounds
     valid_indices = [i for i in indices[0] if i < len(manual_chunks)]
     if not valid_indices:
         return []
-    # Retrieve relevant chunks
     relevant_chunks = [manual_chunks[i] for i in valid_indices]
     return relevant_chunks
 # Function to truncate long inputs
 def truncate_input(text, max_length=512):
-    tokens = generator_tokenizer.encode(text, truncation=True, max_length=max_length, return_tensors="pt")
-    return tokens
 # Function to perform RAG: Retrieve chunks and generate a response
 def rag_response(query, k=5, max_new_tokens=150):
     try:
-        # Step 1: Retrieve relevant chunks
         relevant_chunks = retrieve_chunks(query, k=k)
         if not relevant_chunks:
             return "Sorry, I couldn't find relevant information."
-        # Step 2: Combine the query with retrieved chunks
         augmented_input = query + "\n" + "\n".join(relevant_chunks)
-        # Truncate and encode the input
         inputs = truncate_input(augmented_input)
         # Generate response
-        outputs = generator_model.generate(inputs, max_new_tokens=max_new_tokens)
         generated_text = generator_tokenizer.decode(outputs[0], skip_special_tokens=True)
         return generated_text
@@ -128,4 +132,3 @@ if __name__ == "__main__":

+# OpenAI API key
 import gradio as gr
 import faiss
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from nltk.tokenize import sent_tokenize
 import nltk
+from transformers import AutoTokenizer, AutoModel
+import torch
 # Download the required NLTK data
 nltk.download('punkt')
+# Paths to your files
 faiss_path = "manual_chunked_faiss_index_500.bin"
 manual_path = "ubuntu_manual.txt"
 except Exception as e:
     raise RuntimeError(f"Failed to load FAISS index: {e}")
+# Load the tokenizer and model for embeddings
+embedding_tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
+embedding_model = AutoModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
 # Function to create embeddings
 def embed_text(text_list):
+    inputs = embedding_tokenizer(text_list, padding=True, truncation=True, return_tensors="pt")
+    with torch.no_grad():
+        outputs = embedding_model(**inputs)
+    embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()  # Use the CLS token representation
+    return embeddings
 # Function to retrieve relevant chunks for a user query
 def retrieve_chunks(query, k=5):
     try:
         distances, indices = index.search(query_embedding, k=k)
+        print("Distances:", distances)
+        print("Indices:", indices)
     except Exception as e:
         raise RuntimeError(f"FAISS search failed: {e}")
     if len(indices[0]) == 0:
         return []
     valid_indices = [i for i in indices[0] if i < len(manual_chunks)]
     if not valid_indices:
         return []
     relevant_chunks = [manual_chunks[i] for i in valid_indices]
     return relevant_chunks
+# Load the tokenizer and model for generation
+generator_tokenizer = AutoTokenizer.from_pretrained("gpt-3.5-turbo")  # Replace with correct tokenizer if needed
+generator_model = AutoModel.from_pretrained("gpt-3.5-turbo")  # Replace with correct model if needed
 # Function to truncate long inputs
 def truncate_input(text, max_length=512):
+    inputs = generator_tokenizer(text, return_tensors="pt", truncation=True, max_length=max_length)
+    return inputs
 # Function to perform RAG: Retrieve chunks and generate a response
 def rag_response(query, k=5, max_new_tokens=150):
     try:
         relevant_chunks = retrieve_chunks(query, k=k)
         if not relevant_chunks:
             return "Sorry, I couldn't find relevant information."
         augmented_input = query + "\n" + "\n".join(relevant_chunks)
         inputs = truncate_input(augmented_input)
         # Generate response
+        outputs = generator_model.generate(inputs['input_ids'], max_new_tokens=max_new_tokens)
         generated_text = generator_tokenizer.decode(outputs[0], skip_special_tokens=True)
         return generated_text