Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| available_models = { | |
| "Llama 3.2": "unsloth/Llama-3.2-3B-Instruct", | |
| "Microsoft Phi-4 Mini": "microsoft/Phi-4-mini-instruct", | |
| "Google Gemma 3": "unsloth/gemma-3-4b-it-GGUF" | |
| } | |
| # --- Global State (or use gr.State in Blocks) --- | |
| # To keep track of the currently loaded model/pipeline | |
| current_model_id = None | |
| current_pipeline = None | |
| print(f"Models available for selection: {list(available_models.keys())}") | |
| # Define a function to Load/Switch Models | |
| def load_llm_model(model_name): | |
| """Loads the selected LLM, unloading the previous one.""" | |
| global current_model_id, current_pipeline, tokenizer, model | |
| new_model_id = available_models.get(model_name) | |
| if not new_model_id: | |
| return "Invalid model selected.", None # Return error message and None pipeline | |
| if new_model_id == current_model_id and current_pipeline is not None: | |
| print(f"Model {model_name} is already loaded.") | |
| # Indicate success but don't reload | |
| return f"{model_name} already loaded.", current_pipeline | |
| print(f"Switching to model: {model_name} ({new_model_id})...") | |
| # Unload previous model (important for memory) | |
| # Clear variables and run garbage collection | |
| current_pipeline = None | |
| if "model" in locals(): | |
| del model | |
| if "tokenizer" in locals(): | |
| del tokenizer | |
| if "pipe" in locals(): | |
| del pipe | |
| torch.cuda.empty_cache() # Clear GPU memory cache | |
| import gc | |
| gc.collect() | |
| print("Previous model unloaded (if any).") | |
| # --- Load the new model --- | |
| loading_message = f"Loading {model_name}..." | |
| try: | |
| # Load Tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(new_model_id, trust_remote_code = True) | |
| # Load Model (Quantized) | |
| model = AutoModelForCausalLM.from_pretrained(new_model_id, | |
| torch_dtype = "auto", # "torch.float16", # Or bfloat16 if available | |
| load_in_4bit = True, | |
| device_map = "auto", | |
| trust_remote_code = True) | |
| # Create Pipeline | |
| loaded_pipeline = pipeline( | |
| "text-generation", model = model, tokenizer = tokenizer, torch_dtype = "auto", device_map = "auto") | |
| print(f"Model {model_name} loaded successfully!") | |
| current_model_id = new_model_id | |
| current_pipeline = loaded_pipeline # Update global state | |
| # Use locals() or return values with gr.State for better Gradio practice | |
| return f"{model_name} loaded successfully!", loaded_pipeline # Status message and the pipeline object | |
| except Exception as e: | |
| print(f"Error loading model {model_name}: {e}") | |
| current_model_id = None | |
| current_pipeline = None | |
| return f"Error loading {model_name}: {e}", None # Error message and None pipeline | |
| # --- Function to handle Q&A Submission --- | |
| # This function now relies on the globally managed 'current_pipeline' | |
| # In a more robust Gradio app, you'd pass the pipeline via gr.State | |
| def handle_submit(question): | |
| """Handles the user submitting a question.""" | |
| if not current_pipeline: | |
| return "Error: No model is currently loaded. Please select a model." | |
| if not pdf_text: | |
| return "Error: PDF text is not loaded. Please run Section 4." | |
| if not question: | |
| return "Please enter a question." | |
| print(f"Handling submission for question: '{question}' using {current_model_id}") | |
| # Call the Q&A function defined in Section 5 | |
| answer = answer_question_from_pdf(pdf_text, question, current_pipeline) | |
| return answer | |
| # --- Build Gradio Interface using Blocks --- | |
| print("Building Gradio interface...") | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| f""" | |
| # PDF Q&A Bot Using Hugging Face Open-Source Models | |
| Ask questions about the document ('{pdf_filename}' if loaded, {len(pdf_text)} chars). | |
| Select an open-source LLM to answer your question. | |
| **Note:** Switching models takes time as the new model needs to be downloaded and loaded into the GPU. | |
| """ | |
| ) | |
| # Store the pipeline in Gradio state for better practice (optional for this simple version) | |
| # llm_pipeline_state = gr.State(None) | |
| with gr.Row(): | |
| model_dropdown = gr.Dropdown( | |
| choices=list(available_models.keys()), | |
| label="π€ Select LLM Model", | |
| value=list(available_models.keys())[0], # Default to the first model | |
| ) | |
| status_textbox = gr.Textbox(label="Model Status", interactive=False) | |
| question_textbox = gr.Textbox( | |
| label="β Your Question", lines=2, placeholder="Enter your question about the document here..." | |
| ) | |
| submit_button = gr.Button("Submit Question", variant="primary") | |
| answer_textbox = gr.Textbox(label="π‘ Answer", lines=5, interactive=False) | |
| # --- Event Handlers --- | |
| # When the dropdown changes, load the selected model | |
| model_dropdown.change( | |
| fn = load_llm_model, | |
| inputs = [model_dropdown], | |
| outputs = [status_textbox], # Update status text. Ideally also update a gr.State for the pipeline | |
| # outputs=[status_textbox, llm_pipeline_state] # If using gr.State | |
| ) | |
| # When the button is clicked, call the submit handler | |
| submit_button.click( | |
| fn = handle_submit, | |
| inputs = [question_textbox], | |
| outputs = [answer_textbox], | |
| # inputs=[question_textbox, llm_pipeline_state], # Pass state if using it | |
| ) | |
| # --- Initial Model Load --- | |
| # Easier: Manually load first model *before* launching Gradio for simplicity here | |
| initial_model_name = list(available_models.keys())[0] | |
| print(f"Performing initial load of default model: {initial_model_name}...") | |
| status, _ = load_llm_model(initial_model_name) | |
| status_textbox.value = status # Set initial status | |
| print("Initial load complete.") | |
| # --- Launch the Gradio App --- | |
| print("Launching Gradio demo...") | |
| demo.launch(debug=True) # debug=True provides more detailed logs |