Spaces:
Sleeping
Sleeping
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| # Use the generic HuggingFaceEmbeddings for the smaller model | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_huggingface import HuggingFacePipeline | |
| # Remove BitsAndBytesConfig import | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # Set cache directories with fallback for permission issues | |
| os.environ.setdefault('HF_HOME', '/tmp/huggingface_cache') | |
| os.environ.setdefault('TRANSFORMERS_CACHE', '/tmp/huggingface_cache/transformers') | |
| os.environ.setdefault('HF_DATASETS_CACHE', '/tmp/huggingface_cache/datasets') | |
| # --- MODEL INITIALIZATION (Minimal Footprint) --- | |
| print("Loading Qwen2-0.5B-Instruct...") | |
| model_name = "Qwen/Qwen2-0.5B-Instruct" | |
| # Removed: quantization_config = BitsAndBytesConfig(load_in_8bit=True) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| # Removed: quantization_config parameter from from_pretrained | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| device_map="cpu", | |
| trust_remote_code=True | |
| ) | |
| llm_pipeline = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_new_tokens=256, | |
| do_sample=True, | |
| temperature=0.5, | |
| top_p=0.9, | |
| ) | |
| llm = HuggingFacePipeline(pipeline=llm_pipeline) | |
| # Use the lighter all-MiniLM-L6-v2 embeddings model | |
| embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| # --- DOCUMENT LOADING & CHUNKING --- | |
| loader = PyPDFLoader("data/sample.pdf") # Correct path for Docker: data/sample.pdf | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| chunks = text_splitter.split_documents(documents) | |
| if not chunks: | |
| raise ValueError("No document chunks found.") | |
| # Initialize FAISS and retriever | |
| vectorstore = FAISS.from_documents(chunks, embeddings) | |
| retriever = vectorstore.as_retriever() | |
| # Expose the necessary components for rag.py to import | |
| def query_vector_store(query: str) -> str: | |
| docs = retriever.get_relevant_documents(query) | |
| if docs: | |
| context = "\n\n".join([doc.page_content for doc in docs]) | |
| prompt = f"""Use the following context to answer the question:\n\n{context}\n\nQuestion: {query}\nAnswer:""" | |
| raw_output = llm.invoke(prompt) | |
| answer = raw_output.replace(prompt, "").strip() | |
| return answer | |
| return None |