Spaces:

Ahsan2kk1
/

pdf-QA-app

Runtime error

App Files Files Community

pdf-QA-app / app.py

Ahsan2kk1

Update app.py

a99969a verified 2 months ago

raw

history blame contribute delete

6.12 kB

	import gradio as gr
	available_models = {
	"Llama 3.2": "unsloth/Llama-3.2-3B-Instruct",
	"Microsoft Phi-4 Mini": "microsoft/Phi-4-mini-instruct",
	"Google Gemma 3": "unsloth/gemma-3-4b-it-GGUF"
	}
	# --- Global State (or use gr.State in Blocks) ---
	# To keep track of the currently loaded model/pipeline
	current_model_id = None
	current_pipeline = None
	print(f"Models available for selection: {list(available_models.keys())}")


	# Define a function to Load/Switch Models
	def load_llm_model(model_name):
	"""Loads the selected LLM, unloading the previous one."""
	global current_model_id, current_pipeline, tokenizer, model

	new_model_id = available_models.get(model_name)
	if not new_model_id:
	return "Invalid model selected.", None # Return error message and None pipeline

	if new_model_id == current_model_id and current_pipeline is not None:
	print(f"Model {model_name} is already loaded.")
	# Indicate success but don't reload
	return f"{model_name} already loaded.", current_pipeline

	print(f"Switching to model: {model_name} ({new_model_id})...")

	# Unload previous model (important for memory)
	# Clear variables and run garbage collection
	current_pipeline = None
	if "model" in locals():
	del model
	if "tokenizer" in locals():
	del tokenizer
	if "pipe" in locals():
	del pipe
	torch.cuda.empty_cache() # Clear GPU memory cache
	import gc

	gc.collect()
	print("Previous model unloaded (if any).")

	# --- Load the new model ---
	loading_message = f"Loading {model_name}..."
	try:
	# Load Tokenizer
	tokenizer = AutoTokenizer.from_pretrained(new_model_id, trust_remote_code = True)

	# Load Model (Quantized)
	model = AutoModelForCausalLM.from_pretrained(new_model_id,
	torch_dtype = "auto", # "torch.float16", # Or bfloat16 if available
	load_in_4bit = True,
	device_map = "auto",
	trust_remote_code = True)

	# Create Pipeline
	loaded_pipeline = pipeline(
	"text-generation", model = model, tokenizer = tokenizer, torch_dtype = "auto", device_map = "auto")

	print(f"Model {model_name} loaded successfully!")
	current_model_id = new_model_id
	current_pipeline = loaded_pipeline # Update global state
	# Use locals() or return values with gr.State for better Gradio practice
	return f"{model_name} loaded successfully!", loaded_pipeline # Status message and the pipeline object

	except Exception as e:
	print(f"Error loading model {model_name}: {e}")
	current_model_id = None
	current_pipeline = None
	return f"Error loading {model_name}: {e}", None # Error message and None pipeline


	# --- Function to handle Q&A Submission ---
	# This function now relies on the globally managed 'current_pipeline'
	# In a more robust Gradio app, you'd pass the pipeline via gr.State
	def handle_submit(question):
	"""Handles the user submitting a question."""
	if not current_pipeline:
	return "Error: No model is currently loaded. Please select a model."
	if not pdf_text:
	return "Error: PDF text is not loaded. Please run Section 4."
	if not question:
	return "Please enter a question."

	print(f"Handling submission for question: '{question}' using {current_model_id}")
	# Call the Q&A function defined in Section 5
	answer = answer_question_from_pdf(pdf_text, question, current_pipeline)
	return answer


	# --- Build Gradio Interface using Blocks ---
	print("Building Gradio interface...")
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	f"""
	# PDF Q&A Bot Using Hugging Face Open-Source Models
	Ask questions about the document ('{pdf_filename}' if loaded, {len(pdf_text)} chars).
	Select an open-source LLM to answer your question.
	Note: Switching models takes time as the new model needs to be downloaded and loaded into the GPU.
	"""
	)

	# Store the pipeline in Gradio state for better practice (optional for this simple version)
	# llm_pipeline_state = gr.State(None)

	with gr.Row():
	model_dropdown = gr.Dropdown(
	choices=list(available_models.keys()),
	label="🤖 Select LLM Model",
	value=list(available_models.keys())[0], # Default to the first model
	)
	status_textbox = gr.Textbox(label="Model Status", interactive=False)

	question_textbox = gr.Textbox(
	label="❓ Your Question", lines=2, placeholder="Enter your question about the document here..."
	)
	submit_button = gr.Button("Submit Question", variant="primary")
	answer_textbox = gr.Textbox(label="💡 Answer", lines=5, interactive=False)

	# --- Event Handlers ---
	# When the dropdown changes, load the selected model
	model_dropdown.change(
	fn = load_llm_model,
	inputs = [model_dropdown],
	outputs = [status_textbox], # Update status text. Ideally also update a gr.State for the pipeline
	# outputs=[status_textbox, llm_pipeline_state] # If using gr.State
	)

	# When the button is clicked, call the submit handler
	submit_button.click(
	fn = handle_submit,
	inputs = [question_textbox],
	outputs = [answer_textbox],
	# inputs=[question_textbox, llm_pipeline_state], # Pass state if using it
	)

	# --- Initial Model Load ---
	# Easier: Manually load first model before launching Gradio for simplicity here
	initial_model_name = list(available_models.keys())[0]
	print(f"Performing initial load of default model: {initial_model_name}...")
	status, _ = load_llm_model(initial_model_name)
	status_textbox.value = status # Set initial status
	print("Initial load complete.")


	# --- Launch the Gradio App ---
	print("Launching Gradio demo...")
	demo.launch(debug=True) # debug=True provides more detailed logs