pdf-QA-app / app.py
Ahsan2kk1's picture
Update app.py
a99969a verified
import gradio as gr
available_models = {
"Llama 3.2": "unsloth/Llama-3.2-3B-Instruct",
"Microsoft Phi-4 Mini": "microsoft/Phi-4-mini-instruct",
"Google Gemma 3": "unsloth/gemma-3-4b-it-GGUF"
}
# --- Global State (or use gr.State in Blocks) ---
# To keep track of the currently loaded model/pipeline
current_model_id = None
current_pipeline = None
print(f"Models available for selection: {list(available_models.keys())}")
# Define a function to Load/Switch Models
def load_llm_model(model_name):
"""Loads the selected LLM, unloading the previous one."""
global current_model_id, current_pipeline, tokenizer, model
new_model_id = available_models.get(model_name)
if not new_model_id:
return "Invalid model selected.", None # Return error message and None pipeline
if new_model_id == current_model_id and current_pipeline is not None:
print(f"Model {model_name} is already loaded.")
# Indicate success but don't reload
return f"{model_name} already loaded.", current_pipeline
print(f"Switching to model: {model_name} ({new_model_id})...")
# Unload previous model (important for memory)
# Clear variables and run garbage collection
current_pipeline = None
if "model" in locals():
del model
if "tokenizer" in locals():
del tokenizer
if "pipe" in locals():
del pipe
torch.cuda.empty_cache() # Clear GPU memory cache
import gc
gc.collect()
print("Previous model unloaded (if any).")
# --- Load the new model ---
loading_message = f"Loading {model_name}..."
try:
# Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(new_model_id, trust_remote_code = True)
# Load Model (Quantized)
model = AutoModelForCausalLM.from_pretrained(new_model_id,
torch_dtype = "auto", # "torch.float16", # Or bfloat16 if available
load_in_4bit = True,
device_map = "auto",
trust_remote_code = True)
# Create Pipeline
loaded_pipeline = pipeline(
"text-generation", model = model, tokenizer = tokenizer, torch_dtype = "auto", device_map = "auto")
print(f"Model {model_name} loaded successfully!")
current_model_id = new_model_id
current_pipeline = loaded_pipeline # Update global state
# Use locals() or return values with gr.State for better Gradio practice
return f"{model_name} loaded successfully!", loaded_pipeline # Status message and the pipeline object
except Exception as e:
print(f"Error loading model {model_name}: {e}")
current_model_id = None
current_pipeline = None
return f"Error loading {model_name}: {e}", None # Error message and None pipeline
# --- Function to handle Q&A Submission ---
# This function now relies on the globally managed 'current_pipeline'
# In a more robust Gradio app, you'd pass the pipeline via gr.State
def handle_submit(question):
"""Handles the user submitting a question."""
if not current_pipeline:
return "Error: No model is currently loaded. Please select a model."
if not pdf_text:
return "Error: PDF text is not loaded. Please run Section 4."
if not question:
return "Please enter a question."
print(f"Handling submission for question: '{question}' using {current_model_id}")
# Call the Q&A function defined in Section 5
answer = answer_question_from_pdf(pdf_text, question, current_pipeline)
return answer
# --- Build Gradio Interface using Blocks ---
print("Building Gradio interface...")
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(
f"""
# PDF Q&A Bot Using Hugging Face Open-Source Models
Ask questions about the document ('{pdf_filename}' if loaded, {len(pdf_text)} chars).
Select an open-source LLM to answer your question.
**Note:** Switching models takes time as the new model needs to be downloaded and loaded into the GPU.
"""
)
# Store the pipeline in Gradio state for better practice (optional for this simple version)
# llm_pipeline_state = gr.State(None)
with gr.Row():
model_dropdown = gr.Dropdown(
choices=list(available_models.keys()),
label="πŸ€– Select LLM Model",
value=list(available_models.keys())[0], # Default to the first model
)
status_textbox = gr.Textbox(label="Model Status", interactive=False)
question_textbox = gr.Textbox(
label="❓ Your Question", lines=2, placeholder="Enter your question about the document here..."
)
submit_button = gr.Button("Submit Question", variant="primary")
answer_textbox = gr.Textbox(label="πŸ’‘ Answer", lines=5, interactive=False)
# --- Event Handlers ---
# When the dropdown changes, load the selected model
model_dropdown.change(
fn = load_llm_model,
inputs = [model_dropdown],
outputs = [status_textbox], # Update status text. Ideally also update a gr.State for the pipeline
# outputs=[status_textbox, llm_pipeline_state] # If using gr.State
)
# When the button is clicked, call the submit handler
submit_button.click(
fn = handle_submit,
inputs = [question_textbox],
outputs = [answer_textbox],
# inputs=[question_textbox, llm_pipeline_state], # Pass state if using it
)
# --- Initial Model Load ---
# Easier: Manually load first model *before* launching Gradio for simplicity here
initial_model_name = list(available_models.keys())[0]
print(f"Performing initial load of default model: {initial_model_name}...")
status, _ = load_llm_model(initial_model_name)
status_textbox.value = status # Set initial status
print("Initial load complete.")
# --- Launch the Gradio App ---
print("Launching Gradio demo...")
demo.launch(debug=True) # debug=True provides more detailed logs