Spaces:

Steph254
/

demo_1

Runtime error

App Files Files Community

Steph254 commited on Mar 18

Commit

b94b847

verified ·

1 Parent(s): 5e6f6cd

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -42

app.py CHANGED Viewed

@@ -1,63 +1,84 @@
 import os
 import gradio as gr
-from transformers import LlamaTokenizer, AutoModelForCausalLM
 import torch
 import json
 # Set Hugging Face Token for Authentication (ensure it's set in your environment)
 HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
-# Load Llama 3.2 (QLoRA) Model on CPU
 MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"
-tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME, token=HUGGINGFACE_TOKEN)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME,
-    token=HUGGINGFACE_TOKEN,
-    device_map="cpu"
-)
-# Load Llama Guard for content moderation on CPU
 LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
-guard_tokenizer = LlamaTokenizer.from_pretrained(LLAMA_GUARD_NAME, token=HUGGINGFACE_TOKEN)
-guard_model = AutoModelForCausalLM.from_pretrained(
-    LLAMA_GUARD_NAME,
-    token=HUGGINGFACE_TOKEN,
-    device_map="cpu"
-)
 # Define Prompt Templates
 PROMPTS = {
-    "project_analysis": """Analyze this project description and generate:
 1. Project timeline with milestones
 2. Required technology stack
 3. Potential risks
 4. Team composition
 5. Cost estimation
-Project: {project_description}""",
-    "code_generation": """Generate implementation code for this feature:
 {feature_description}
 Considerations:
 - Use {programming_language}
 - Follow {coding_standards}
 - Include error handling
-- Add documentation""",
-    "risk_analysis": """Predict potential risks for this project plan:
 {project_data}
-Format output as JSON with risk types, probabilities, and mitigation strategies"""
 }
 # Function: Content Moderation using Llama Guard
 def moderate_input(user_input):
-    inputs = guard_tokenizer(user_input, return_tensors="pt", max_length=512, truncation=True)
-    outputs = guard_model.generate(inputs.input_ids, max_length=512)
     response = guard_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    if "flagged" in response.lower():
         return "⚠️ Content flagged by Llama Guard. Please modify your input."
     return None  # Safe input, proceed normally
@@ -69,14 +90,16 @@ def generate_response(prompt_type, **kwargs):
     if moderation_warning:
         return moderation_warning  # Stop processing if flagged
-    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
-    outputs = model.generate(
-        inputs.input_ids,
-        max_length=1024,
-        temperature=0.7 if prompt_type == "project_analysis" else 0.5,
-        top_p=0.9
-    )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -92,7 +115,12 @@ def generate_code(feature_desc, lang="Python", standards="PEP8"):
 def predict_risks(project_data):
     risks = generate_response("risk_analysis", project_data=project_data)
     try:
-        return json.loads(risks)  # Convert to structured JSON if valid
     except json.JSONDecodeError:
         return {"error": "Invalid JSON response. Please refine your input."}
@@ -104,7 +132,7 @@ def create_gradio_interface():
         # Project Analysis Tab
         with gr.Tab("Project Setup"):
             project_input = gr.Textbox(label="Project Description", lines=5, placeholder="Describe your project...")
-            project_output = gr.JSON(label="Project Analysis")
             analyze_btn = gr.Button("Analyze Project")
             analyze_btn.click(analyze_project, inputs=project_input, outputs=project_output)
@@ -137,14 +165,27 @@ def create_gradio_interface():
                     chat_history.append((message, moderation_warning))
                     return "", chat_history
-                prompt = f"""Project Management Chat:
-                Context: {message}
-                Chat History: {chat_history}
-                User: {message}
-                AI:"""
-                inputs = tokenizer(prompt, return_tensors="pt")
-                outputs = model.generate(inputs.input_ids, max_length=1024)
                 response = tokenizer.decode(outputs[0], skip_special_tokens=True)
                 chat_history.append((message, response))
                 return "", chat_history
@@ -157,4 +198,4 @@ def create_gradio_interface():
 # Run Gradio App
 if __name__ == "__main__":
     interface = create_gradio_interface()
-    interface.launch(share=True)

 import os
 import gradio as gr
 import torch
 import json
+from transformers import AutoTokenizer
 # Set Hugging Face Token for Authentication (ensure it's set in your environment)
 HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
+# Function to load Llama model
+def load_llama_model(model_name):
+    from transformers import LlamaForCausalLM, LlamaTokenizer
+    # Use AutoTokenizer which will handle various tokenizer types
+    tokenizer = AutoTokenizer.from_pretrained(model_name, token=HUGGINGFACE_TOKEN, use_fast=False)
+    # Use the LlamaForCausalLM class which can properly load the consolidated.00.pth format
+    model = LlamaForCausalLM.from_pretrained(
+        model_name,
+        token=HUGGINGFACE_TOKEN,
+        torch_dtype=torch.float16,  # Use float16 to reduce memory usage on CPU
+        low_cpu_mem_usage=True,     # Optimize for low memory usage
+        device_map="cpu"
+    )
+    return tokenizer, model
+# Load Llama 3.2 model
 MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"
+tokenizer, model = load_llama_model(MODEL_NAME)
+# Load Llama Guard for content moderation
 LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
+guard_tokenizer, guard_model = load_llama_model(LLAMA_GUARD_NAME)
 # Define Prompt Templates
 PROMPTS = {
+    "project_analysis": """<|begin_of_text|><|prompt|>Analyze this project description and generate:
 1. Project timeline with milestones
 2. Required technology stack
 3. Potential risks
 4. Team composition
 5. Cost estimation
+Project: {project_description}<|completion|>""",
+    "code_generation": """<|begin_of_text|><|prompt|>Generate implementation code for this feature:
 {feature_description}
 Considerations:
 - Use {programming_language}
 - Follow {coding_standards}
 - Include error handling
+- Add documentation<|completion|>""",
+    "risk_analysis": """<|begin_of_text|><|prompt|>Predict potential risks for this project plan:
 {project_data}
+Format output as JSON with risk types, probabilities, and mitigation strategies<|completion|>"""
 }
 # Function: Content Moderation using Llama Guard
 def moderate_input(user_input):
+    # Llama Guard specific prompt format
+    prompt = f"""<|begin_of_text|><|user|>
+Input: {user_input}
+Please verify that this input doesn't violate any content policies.
+<|assistant|>"""
+    inputs = guard_tokenizer(prompt, return_tensors="pt", truncation=True)
+    with torch.no_grad():  # Disable gradient calculation for inference
+        outputs = guard_model.generate(
+            inputs.input_ids,
+            max_length=256,
+            temperature=0.1
+        )
     response = guard_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    if "flagged" in response.lower() or "violated" in response.lower() or "policy violation" in response.lower():
         return "⚠️ Content flagged by Llama Guard. Please modify your input."
     return None  # Safe input, proceed normally
     if moderation_warning:
         return moderation_warning  # Stop processing if flagged
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
+    with torch.no_grad():  # Disable gradient calculation for inference
+        outputs = model.generate(
+            inputs.input_ids,
+            max_length=1024,
+            temperature=0.7 if prompt_type == "project_analysis" else 0.5,
+            top_p=0.9,
+            do_sample=True
+        )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 def predict_risks(project_data):
     risks = generate_response("risk_analysis", project_data=project_data)
     try:
+        # Try to extract JSON part from the response
+        import re
+        json_match = re.search(r'\{.*\}', risks, re.DOTALL)
+        if json_match:
+            return json.loads(json_match.group(0))
+        return {"error": "Could not parse JSON response"}
     except json.JSONDecodeError:
         return {"error": "Invalid JSON response. Please refine your input."}
         # Project Analysis Tab
         with gr.Tab("Project Setup"):
             project_input = gr.Textbox(label="Project Description", lines=5, placeholder="Describe your project...")
+            project_output = gr.Textbox(label="Project Analysis", lines=15)  # Changed from JSON to Textbox for better formatting
             analyze_btn = gr.Button("Analyze Project")
             analyze_btn.click(analyze_project, inputs=project_input, outputs=project_output)
                     chat_history.append((message, moderation_warning))
                     return "", chat_history
+                # Format chat history for context
+                history_text = ""
+                for i, (usr, ai) in enumerate(chat_history[-3:]):  # Use last 3 messages for context
+                    history_text += f"User: {usr}\nAI: {ai}\n"
+                prompt = f"""<|begin_of_text|><|prompt|>Project Management Chat:
+Context: {message}
+Chat History: {history_text}
+User: {message}<|completion|>"""
+                inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
+                with torch.no_grad():
+                    outputs = model.generate(
+                        inputs.input_ids,
+                        max_length=1024,
+                        temperature=0.7,
+                        top_p=0.9,
+                        do_sample=True
+                    )
                 response = tokenizer.decode(outputs[0], skip_special_tokens=True)
                 chat_history.append((message, response))
                 return "", chat_history
 # Run Gradio App
 if __name__ == "__main__":
     interface = create_gradio_interface()
+    interface.launch(share=True)