Daedalus-1

Sleeping

App Files Files Community

Spestly commited on Aug 31

Commit

2190cc9

verified ·

1 Parent(s): 2c56220

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -64

app.py CHANGED Viewed

@@ -75,9 +75,9 @@ def format_conversation_with_template(messages: List[Dict], tokenizer) -> str:
             # Fall back to manual formatting
             pass
-    # Manual fallback formatting based on your template
-    bos_token = tokenizer.bos_token if tokenizer.bos_token else "<s>"
-    eos_token = tokenizer.eos_token if tokenizer.eos_token else "</s>"
     # Start with system message
     formatted = f"{bos_token}system\nYou are an AI Coding model called Daedalus, developed by Noema Research{eos_token}"
@@ -112,65 +112,63 @@ def generate_response(message, history, model_name, max_length=512, temperature=
     messages.append({"role": "user", "content": message})
     try:
-        # Method 1: Try using the pipeline with proper chat template
-        try:
-            # Format the conversation using the chat template
-            formatted_prompt = format_conversation_with_template(messages, tokenizer)
-            response = model_pipe(
-                formatted_prompt,
-                max_new_tokens=max_length,
-                temperature=temperature,
-                top_p=top_p,
-                do_sample=True,
-                pad_token_id=tokenizer.eos_token_id,
-                eos_token_id=tokenizer.eos_token_id,
-                return_full_text=False
-            )
-            if isinstance(response, list) and len(response) > 0:
-                generated_text = response[0]['generated_text']
-            else:
-                generated_text = str(response)
-            # Clean up the response
-            assistant_response = str(generated_text).strip()
-            # Remove any residual formatting artifacts
-            if assistant_response.startswith("assistant\n"):
-                assistant_response = assistant_response[10:].strip()
-            return assistant_response
-        except Exception as template_error:
-            print(f"Chat template method failed: {template_error}")
-            # Method 2: Fallback to simple string formatting
-            conversation_text = "system\nYou are an AI Coding model called Daedalus, developed by Noema Research\n\n"
-            for msg in messages:
-                if msg["role"] == "user":
-                    conversation_text += f"user\n{msg['content']}\n\n"
-                else:
-                    conversation_text += f"assistant\n{msg['content']}\n\n"
-            conversation_text += "assistant\n"
-            response = model_pipe(
-                conversation_text,
-                max_new_tokens=max_length,
-                temperature=temperature,
-                top_p=top_p,
-                do_sample=True,
-                pad_token_id=tokenizer.eos_token_id,
-                return_full_text=False
-            )
-            if isinstance(response, list) and len(response) > 0:
-                generated_text = response[0]['generated_text']
-            else:
-                generated_text = str(response)
-            assistant_response = str(generated_text).strip()
-            return assistant_response
     except Exception as e:
         return f"Error generating response: {str(e)}"
@@ -205,8 +203,8 @@ def create_interface():
         with gr.Accordion("Advanced Settings", open=False):
             max_length = gr.Slider(
                 minimum=200,
-                maximum=8192,
-                value=2048,
                 step=50,
                 label="Max New Tokens",
                 info="Maximum number of new tokens to generate"

             # Fall back to manual formatting
             pass
+    # Manual fallback formatting using actual special tokens
+    bos_token = "<[begin▁of▁sentence]>"
+    eos_token = "<[end▁of▁sentence]>"
     # Start with system message
     formatted = f"{bos_token}system\nYou are an AI Coding model called Daedalus, developed by Noema Research{eos_token}"
     messages.append({"role": "user", "content": message})
     try:
+        # Format the conversation using the chat template
+        formatted_prompt = format_conversation_with_template(messages, tokenizer)
+        # CRITICAL: Proper stop tokens to prevent repetition
+        stop_tokens = [
+            "<[end▁of▁sentence]>",  # EOS token
+            "<[begin▁of▁sentence]>",  # BOS token (shouldn't appear mid-generation)
+            "user\n",  # Stop if model tries to continue conversation
+            "system\n",  # Stop if model tries to add system messages
+            "\nuser",  # Alternative format
+            "\nsystem"  # Alternative format
+        ]
+        response = model_pipe(
+            formatted_prompt,
+            max_new_tokens=max_length,
+            temperature=temperature,
+            top_p=top_p,
+            do_sample=True,
+            pad_token_id=1,  # PAD token ID from your config
+            eos_token_id=2,  # EOS token ID from your config
+            bos_token_id=0,  # BOS token ID from your config
+            return_full_text=False,
+            # Add repetition penalty to reduce loops
+            repetition_penalty=1.1,
+            # Stop on these strings
+            stop_sequence=stop_tokens[0]  # Primary stop token
+        )
+        if isinstance(response, list) and len(response) > 0:
+            generated_text = response[0]['generated_text']
+        else:
+            generated_text = str(response)
+        # Clean up the response - remove stop tokens and formatting
+        assistant_response = str(generated_text).strip()
+        # Remove stop tokens if they appear in output
+        for stop_token in stop_tokens:
+            if stop_token in assistant_response:
+                assistant_response = assistant_response.split(stop_token)[0].strip()
+        # Remove any residual role formatting
+        if assistant_response.startswith("assistant\n"):
+            assistant_response = assistant_response[10:].strip()
+        # Additional cleanup for common repetition patterns
+        lines = assistant_response.split('\n')
+        cleaned_lines = []
+        for line in lines:
+            # Skip empty lines or lines that look like role markers
+            if line.strip() and not line.strip().startswith(('user', 'assistant', 'system')):
+                cleaned_lines.append(line)
+        assistant_response = '\n'.join(cleaned_lines).strip()
+        return assistant_response if assistant_response else "I apologize, but I couldn't generate a proper response. Please try again."
     except Exception as e:
         return f"Error generating response: {str(e)}"
         with gr.Accordion("Advanced Settings", open=False):
             max_length = gr.Slider(
                 minimum=200,
+                maximum=4096,  # Reduced from 8192 to prevent memory issues
+                value=1024,    # Reduced default from 2048
                 step=50,
                 label="Max New Tokens",
                 info="Maximum number of new tokens to generate"