Spestly commited on
Commit
2190cc9
·
verified ·
1 Parent(s): 2c56220

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -64
app.py CHANGED
@@ -75,9 +75,9 @@ def format_conversation_with_template(messages: List[Dict], tokenizer) -> str:
75
  # Fall back to manual formatting
76
  pass
77
 
78
- # Manual fallback formatting based on your template
79
- bos_token = tokenizer.bos_token if tokenizer.bos_token else "<s>"
80
- eos_token = tokenizer.eos_token if tokenizer.eos_token else "</s>"
81
 
82
  # Start with system message
83
  formatted = f"{bos_token}system\nYou are an AI Coding model called Daedalus, developed by Noema Research{eos_token}"
@@ -112,65 +112,63 @@ def generate_response(message, history, model_name, max_length=512, temperature=
112
  messages.append({"role": "user", "content": message})
113
 
114
  try:
115
- # Method 1: Try using the pipeline with proper chat template
116
- try:
117
- # Format the conversation using the chat template
118
- formatted_prompt = format_conversation_with_template(messages, tokenizer)
119
-
120
- response = model_pipe(
121
- formatted_prompt,
122
- max_new_tokens=max_length,
123
- temperature=temperature,
124
- top_p=top_p,
125
- do_sample=True,
126
- pad_token_id=tokenizer.eos_token_id,
127
- eos_token_id=tokenizer.eos_token_id,
128
- return_full_text=False
129
- )
130
-
131
- if isinstance(response, list) and len(response) > 0:
132
- generated_text = response[0]['generated_text']
133
- else:
134
- generated_text = str(response)
135
-
136
- # Clean up the response
137
- assistant_response = str(generated_text).strip()
138
-
139
- # Remove any residual formatting artifacts
140
- if assistant_response.startswith("assistant\n"):
141
- assistant_response = assistant_response[10:].strip()
142
-
143
- return assistant_response
144
-
145
- except Exception as template_error:
146
- print(f"Chat template method failed: {template_error}")
147
-
148
- # Method 2: Fallback to simple string formatting
149
- conversation_text = "system\nYou are an AI Coding model called Daedalus, developed by Noema Research\n\n"
150
- for msg in messages:
151
- if msg["role"] == "user":
152
- conversation_text += f"user\n{msg['content']}\n\n"
153
- else:
154
- conversation_text += f"assistant\n{msg['content']}\n\n"
155
- conversation_text += "assistant\n"
156
-
157
- response = model_pipe(
158
- conversation_text,
159
- max_new_tokens=max_length,
160
- temperature=temperature,
161
- top_p=top_p,
162
- do_sample=True,
163
- pad_token_id=tokenizer.eos_token_id,
164
- return_full_text=False
165
- )
166
-
167
- if isinstance(response, list) and len(response) > 0:
168
- generated_text = response[0]['generated_text']
169
- else:
170
- generated_text = str(response)
171
-
172
- assistant_response = str(generated_text).strip()
173
- return assistant_response
174
 
175
  except Exception as e:
176
  return f"Error generating response: {str(e)}"
@@ -205,8 +203,8 @@ def create_interface():
205
  with gr.Accordion("Advanced Settings", open=False):
206
  max_length = gr.Slider(
207
  minimum=200,
208
- maximum=8192,
209
- value=2048,
210
  step=50,
211
  label="Max New Tokens",
212
  info="Maximum number of new tokens to generate"
 
75
  # Fall back to manual formatting
76
  pass
77
 
78
+ # Manual fallback formatting using actual special tokens
79
+ bos_token = "<[begin▁of▁sentence]>"
80
+ eos_token = "<[end▁of▁sentence]>"
81
 
82
  # Start with system message
83
  formatted = f"{bos_token}system\nYou are an AI Coding model called Daedalus, developed by Noema Research{eos_token}"
 
112
  messages.append({"role": "user", "content": message})
113
 
114
  try:
115
+ # Format the conversation using the chat template
116
+ formatted_prompt = format_conversation_with_template(messages, tokenizer)
117
+
118
+ # CRITICAL: Proper stop tokens to prevent repetition
119
+ stop_tokens = [
120
+ "<[end▁of▁sentence]>", # EOS token
121
+ "<[begin▁of▁sentence]>", # BOS token (shouldn't appear mid-generation)
122
+ "user\n", # Stop if model tries to continue conversation
123
+ "system\n", # Stop if model tries to add system messages
124
+ "\nuser", # Alternative format
125
+ "\nsystem" # Alternative format
126
+ ]
127
+
128
+ response = model_pipe(
129
+ formatted_prompt,
130
+ max_new_tokens=max_length,
131
+ temperature=temperature,
132
+ top_p=top_p,
133
+ do_sample=True,
134
+ pad_token_id=1, # PAD token ID from your config
135
+ eos_token_id=2, # EOS token ID from your config
136
+ bos_token_id=0, # BOS token ID from your config
137
+ return_full_text=False,
138
+ # Add repetition penalty to reduce loops
139
+ repetition_penalty=1.1,
140
+ # Stop on these strings
141
+ stop_sequence=stop_tokens[0] # Primary stop token
142
+ )
143
+
144
+ if isinstance(response, list) and len(response) > 0:
145
+ generated_text = response[0]['generated_text']
146
+ else:
147
+ generated_text = str(response)
148
+
149
+ # Clean up the response - remove stop tokens and formatting
150
+ assistant_response = str(generated_text).strip()
151
+
152
+ # Remove stop tokens if they appear in output
153
+ for stop_token in stop_tokens:
154
+ if stop_token in assistant_response:
155
+ assistant_response = assistant_response.split(stop_token)[0].strip()
156
+
157
+ # Remove any residual role formatting
158
+ if assistant_response.startswith("assistant\n"):
159
+ assistant_response = assistant_response[10:].strip()
160
+
161
+ # Additional cleanup for common repetition patterns
162
+ lines = assistant_response.split('\n')
163
+ cleaned_lines = []
164
+ for line in lines:
165
+ # Skip empty lines or lines that look like role markers
166
+ if line.strip() and not line.strip().startswith(('user', 'assistant', 'system')):
167
+ cleaned_lines.append(line)
168
+
169
+ assistant_response = '\n'.join(cleaned_lines).strip()
170
+
171
+ return assistant_response if assistant_response else "I apologize, but I couldn't generate a proper response. Please try again."
 
 
172
 
173
  except Exception as e:
174
  return f"Error generating response: {str(e)}"
 
203
  with gr.Accordion("Advanced Settings", open=False):
204
  max_length = gr.Slider(
205
  minimum=200,
206
+ maximum=4096, # Reduced from 8192 to prevent memory issues
207
+ value=1024, # Reduced default from 2048
208
  step=50,
209
  label="Max New Tokens",
210
  info="Maximum number of new tokens to generate"