Daedalus-1

Sleeping

App Files Files Community

Spestly commited on Aug 31

Commit

2c56220

verified ·

1 Parent(s): 330c803

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -30

app.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import gradio as gr
 import spaces
-from transformers import pipeline
 import torch
 from typing import List, Dict, Optional
 # Global variable to store pipelines
 model_cache = {}
 # Available models (only Daedalus)
 AVAILABLE_MODELS = {
@@ -14,7 +15,7 @@ AVAILABLE_MODELS = {
 @spaces.GPU
 def initialize_model(model_name):
-    global model_cache
     if model_name not in AVAILABLE_MODELS:
         raise ValueError(f"Model {model_name} not found in available models")
@@ -24,31 +25,80 @@ def initialize_model(model_name):
     # Check if model is already cached
     if model_id not in model_cache:
         try:
             model_cache[model_id] = pipeline(
                 "text-generation",
                 model=model_id,
                 torch_dtype=torch.float16,
                 device_map="auto",
                 trust_remote_code=True
             )
         except Exception:
             # Fallback to CPU if GPU fails
             model_cache[model_id] = pipeline(
                 "text-generation",
                 model=model_id,
                 torch_dtype=torch.float32,
                 device_map="cpu",
                 trust_remote_code=True
             )
-    return model_cache[model_id]
 @spaces.GPU
 def generate_response(message, history, model_name, max_length=512, temperature=0.7, top_p=0.9):
     """Generate response using the selected model"""
     try:
-        model_pipe = initialize_model(model_name)
     except Exception as e:
         return f"Error loading model {model_name}: {str(e)}"
@@ -62,48 +112,65 @@ def generate_response(message, history, model_name, max_length=512, temperature=
     messages.append({"role": "user", "content": message})
     try:
         try:
             response = model_pipe(
-                messages,
-                max_length=max_length,
                 temperature=temperature,
                 top_p=top_p,
                 do_sample=True,
-                pad_token_id=model_pipe.tokenizer.eos_token_id,
                 return_full_text=False
             )
-        except:
-            conversation_text = ""
             for msg in messages:
                 if msg["role"] == "user":
-                    conversation_text += f"User: {msg['content']}\n"
                 else:
-                    conversation_text += f"Assistant: {msg['content']}\n"
-            conversation_text += "Assistant:"
             response = model_pipe(
                 conversation_text,
-                max_length=max_length,
                 temperature=temperature,
                 top_p=top_p,
                 do_sample=True,
-                pad_token_id=model_pipe.tokenizer.eos_token_id,
                 return_full_text=False
             )
-        if isinstance(response, list) and len(response) > 0:
-            generated_text = response[0]['generated_text']
-        else:
-            generated_text = str(response)
-        if isinstance(generated_text, list):
-            assistant_response = generated_text[-1]['content']
-        else:
             assistant_response = str(generated_text).strip()
-            if "Assistant:" in assistant_response:
-                assistant_response = assistant_response.split("Assistant:")[-1].strip()
-        return assistant_response
     except Exception as e:
         return f"Error generating response: {str(e)}"
@@ -141,8 +208,8 @@ def create_interface():
                 maximum=8192,
                 value=2048,
                 step=50,
-                label="Max Length",
-                info="Maximum length of generated response"
             )
             temperature = gr.Slider(
                 minimum=0.1,
@@ -207,4 +274,4 @@ def create_interface():
 # Launch the app
 if __name__ == "__main__":
     demo = create_interface()
-    demo.launch(share=True)

 import gradio as gr
 import spaces
+from transformers import pipeline, AutoTokenizer
 import torch
 from typing import List, Dict, Optional
 # Global variable to store pipelines
 model_cache = {}
+tokenizer_cache = {}
 # Available models (only Daedalus)
 AVAILABLE_MODELS = {
 @spaces.GPU
 def initialize_model(model_name):
+    global model_cache, tokenizer_cache
     if model_name not in AVAILABLE_MODELS:
         raise ValueError(f"Model {model_name} not found in available models")
     # Check if model is already cached
     if model_id not in model_cache:
         try:
+            # Load tokenizer separately to handle chat template properly
+            tokenizer_cache[model_id] = AutoTokenizer.from_pretrained(
+                model_id,
+                trust_remote_code=True
+            )
             model_cache[model_id] = pipeline(
                 "text-generation",
                 model=model_id,
+                tokenizer=tokenizer_cache[model_id],
                 torch_dtype=torch.float16,
                 device_map="auto",
                 trust_remote_code=True
             )
         except Exception:
             # Fallback to CPU if GPU fails
+            tokenizer_cache[model_id] = AutoTokenizer.from_pretrained(
+                model_id,
+                trust_remote_code=True
+            )
             model_cache[model_id] = pipeline(
                 "text-generation",
                 model=model_id,
+                tokenizer=tokenizer_cache[model_id],
                 torch_dtype=torch.float32,
                 device_map="cpu",
                 trust_remote_code=True
             )
+    return model_cache[model_id], tokenizer_cache[model_id]
+def format_conversation_with_template(messages: List[Dict], tokenizer) -> str:
+    """Manually apply the chat template to ensure proper formatting"""
+    # Get the chat template
+    if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template:
+        try:
+            # Use the tokenizer's apply_chat_template method
+            formatted = tokenizer.apply_chat_template(
+                messages,
+                tokenize=False,
+                add_generation_prompt=True
+            )
+            return formatted
+        except Exception as e:
+            print(f"Chat template application failed: {e}")
+            # Fall back to manual formatting
+            pass
+    # Manual fallback formatting based on your template
+    bos_token = tokenizer.bos_token if tokenizer.bos_token else "<s>"
+    eos_token = tokenizer.eos_token if tokenizer.eos_token else "</s>"
+    # Start with system message
+    formatted = f"{bos_token}system\nYou are an AI Coding model called Daedalus, developed by Noema Research{eos_token}"
+    # Add each message
+    for msg in messages:
+        role = msg.get('role', 'user')
+        content = msg.get('content', '').strip()
+        formatted += f"{bos_token}{role}\n{content}{eos_token}"
+    # Add generation prompt
+    formatted += f"{bos_token}assistant\n"
+    return formatted
 @spaces.GPU
 def generate_response(message, history, model_name, max_length=512, temperature=0.7, top_p=0.9):
     """Generate response using the selected model"""
     try:
+        model_pipe, tokenizer = initialize_model(model_name)
     except Exception as e:
         return f"Error loading model {model_name}: {str(e)}"
     messages.append({"role": "user", "content": message})
     try:
+        # Method 1: Try using the pipeline with proper chat template
         try:
+            # Format the conversation using the chat template
+            formatted_prompt = format_conversation_with_template(messages, tokenizer)
             response = model_pipe(
+                formatted_prompt,
+                max_new_tokens=max_length,
                 temperature=temperature,
                 top_p=top_p,
                 do_sample=True,
+                pad_token_id=tokenizer.eos_token_id,
+                eos_token_id=tokenizer.eos_token_id,
                 return_full_text=False
             )
+            if isinstance(response, list) and len(response) > 0:
+                generated_text = response[0]['generated_text']
+            else:
+                generated_text = str(response)
+            # Clean up the response
+            assistant_response = str(generated_text).strip()
+            # Remove any residual formatting artifacts
+            if assistant_response.startswith("assistant\n"):
+                assistant_response = assistant_response[10:].strip()
+            return assistant_response
+        except Exception as template_error:
+            print(f"Chat template method failed: {template_error}")
+            # Method 2: Fallback to simple string formatting
+            conversation_text = "system\nYou are an AI Coding model called Daedalus, developed by Noema Research\n\n"
             for msg in messages:
                 if msg["role"] == "user":
+                    conversation_text += f"user\n{msg['content']}\n\n"
                 else:
+                    conversation_text += f"assistant\n{msg['content']}\n\n"
+            conversation_text += "assistant\n"
             response = model_pipe(
                 conversation_text,
+                max_new_tokens=max_length,
                 temperature=temperature,
                 top_p=top_p,
                 do_sample=True,
+                pad_token_id=tokenizer.eos_token_id,
                 return_full_text=False
             )
+            if isinstance(response, list) and len(response) > 0:
+                generated_text = response[0]['generated_text']
+            else:
+                generated_text = str(response)
             assistant_response = str(generated_text).strip()
+            return assistant_response
     except Exception as e:
         return f"Error generating response: {str(e)}"
                 maximum=8192,
                 value=2048,
                 step=50,
+                label="Max New Tokens",
+                info="Maximum number of new tokens to generate"
             )
             temperature = gr.Slider(
                 minimum=0.1,
 # Launch the app
 if __name__ == "__main__":
     demo = create_interface()
+    demo.launch(share=True)