Spaces:

trungnd7112004
/

FastAPI-backend-chatbotRAG

Running

File size: 2,872 Bytes

ee00031

from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv
load_dotenv()
# load model from HuggingFace
# def load_model(model_name="context-labs/meta-llama-Llama-3.2-3B-Instruct-FP16"):
#     ## load model and tokenizer
#     # Configure quantization for memory efficiency
#     quantization_config = BitsAndBytesConfig(
#         load_in_4bit=True,
#         bnb_4bit_quant_type="nf4",
#         bnb_4bit_use_double_quant=True,
#         bnb_4bit_compute_dtype=torch.float16,
#     )
#
#     tokenizer = AutoTokenizer.from_pretrained(model_name)
#     model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config, device_map="auto")
#
#     print("✅ Model and tokenizer loaded")
#     return model, tokenizer

# -----------------------------
# Prompt Template
# -----------------------------

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
    You are an experienced assistant specializing in question-answering tasks.
    Utilize the provided context to respond to the question.

    Rules:
      - If the question refers to a **specific table**, note that tables may be identified by either:
        • Roman numerals (I, II, III, IV, …)
        • Arabic numerals (1, 2, 3, 4, …)
      - Normalize references (e.g., "Table II" = "Table 2"). Always check both forms when matching.
      - Only answer using information contained in that table.
      - If the table is not found or the requested information is not in the table, respond with: "I don't know."

      - If the question is about a **formula**:
        • Extract the formula from the context (in LaTeX).
        • Present it in a clean readable way:
            - Use a block math display for clarity: $$ ... $$
            - Then rewrite it inline in plain text (e.g., f_final^t = β·f_adapter^t + (1 - β)·f_original^t).
        • Briefly explain what each symbol means if the context provides that information.
        • If the formula is not found, respond with: "I don't know."

      - If the question is not about a table or a formula, answer using the context as normal.
      - Never provide an answer you are unsure about.
      - Keep answers concise, factual, and easy for non-experts to read.

    CONTEXT:
    {context}

    QUESTION: {question}

    DETAILED RESPONSE:
    """,
    input_variable=["context", "question"]
)

#call api groq
llm = ChatGroq(
api_key=os.environ.get("GROQ_API_KEY"),
model="meta-llama/llama-4-scout-17b-16e-instruct",
temperature=0.3,
max_tokens=1024
)

print("✅ Using Groq LLM")

# Function to ask a question
def ask_question(question, context):
    final_prompt = prompt.invoke({"context": context, "question": question})
    answer = llm.invoke(final_prompt)
    return answer.content