Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,41 +9,43 @@ from typing import Optional
|
|
| 9 |
import asyncio
|
| 10 |
import time
|
| 11 |
import gc
|
|
|
|
| 12 |
|
| 13 |
# Inisialisasi FastAPI
|
| 14 |
-
app = FastAPI(title="LyonPoy AI Chat - CPU Optimized")
|
| 15 |
|
| 16 |
# Set seed untuk konsistensi
|
| 17 |
set_seed(42)
|
| 18 |
|
| 19 |
# CPU-Optimized 11 models configuration
|
|
|
|
| 20 |
MODELS = {
|
| 21 |
"distil-gpt-2": {
|
| 22 |
"name": "DistilGPT-2 β‘",
|
| 23 |
"model_path": "Lyon28/Distil_GPT-2",
|
| 24 |
"task": "text-generation",
|
| 25 |
-
"max_tokens":
|
| 26 |
-
"priority": 1
|
| 27 |
},
|
| 28 |
"gpt-2-tinny": {
|
| 29 |
"name": "GPT-2 Tinny β‘",
|
| 30 |
"model_path": "Lyon28/GPT-2-Tinny",
|
| 31 |
"task": "text-generation",
|
| 32 |
-
"max_tokens":
|
| 33 |
"priority": 1
|
| 34 |
},
|
| 35 |
"bert-tinny": {
|
| 36 |
"name": "BERT Tinny π",
|
| 37 |
"model_path": "Lyon28/Bert-Tinny",
|
| 38 |
"task": "text-classification",
|
| 39 |
-
"max_tokens": 0,
|
| 40 |
"priority": 1
|
| 41 |
},
|
| 42 |
"distilbert-base-uncased": {
|
| 43 |
"name": "DistilBERT π",
|
| 44 |
"model_path": "Lyon28/Distilbert-Base-Uncased",
|
| 45 |
"task": "text-classification",
|
| 46 |
-
"max_tokens": 0,
|
| 47 |
"priority": 1
|
| 48 |
},
|
| 49 |
"albert-base-v2": {
|
|
@@ -64,51 +66,57 @@ MODELS = {
|
|
| 64 |
"name": "T5 Small π",
|
| 65 |
"model_path": "Lyon28/T5-Small",
|
| 66 |
"task": "text2text-generation",
|
| 67 |
-
"max_tokens":
|
| 68 |
"priority": 2
|
| 69 |
},
|
| 70 |
"gpt-2": {
|
| 71 |
"name": "GPT-2 Standard",
|
| 72 |
"model_path": "Lyon28/GPT-2",
|
| 73 |
"task": "text-generation",
|
| 74 |
-
"max_tokens":
|
| 75 |
"priority": 2
|
| 76 |
},
|
| 77 |
"tinny-llama": {
|
| 78 |
"name": "Tinny Llama",
|
| 79 |
"model_path": "Lyon28/Tinny-Llama",
|
| 80 |
"task": "text-generation",
|
| 81 |
-
"max_tokens":
|
| 82 |
"priority": 3
|
| 83 |
},
|
| 84 |
"pythia": {
|
| 85 |
"name": "Pythia",
|
| 86 |
"model_path": "Lyon28/Pythia",
|
| 87 |
"task": "text-generation",
|
| 88 |
-
"max_tokens":
|
| 89 |
"priority": 3
|
| 90 |
},
|
| 91 |
"gpt-neo": {
|
| 92 |
"name": "GPT-Neo",
|
| 93 |
"model_path": "Lyon28/GPT-Neo",
|
| 94 |
"task": "text-generation",
|
| 95 |
-
"max_tokens":
|
| 96 |
"priority": 3
|
| 97 |
}
|
| 98 |
}
|
| 99 |
|
| 100 |
class ChatRequest(BaseModel):
|
| 101 |
-
message: str
|
| 102 |
model: Optional[str] = "distil-gpt-2"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
# CPU-Optimized startup
|
| 105 |
@app.on_event("startup")
|
| 106 |
-
async def
|
| 107 |
app.state.pipelines = {}
|
| 108 |
-
app.state.tokenizers = {}
|
| 109 |
|
| 110 |
# Set CPU optimizations
|
| 111 |
-
torch.set_num_threads(2)
|
| 112 |
os.environ['OMP_NUM_THREADS'] = '2'
|
| 113 |
os.environ['MKL_NUM_THREADS'] = '2'
|
| 114 |
os.environ['NUMEXPR_NUM_THREADS'] = '2'
|
|
@@ -118,116 +126,143 @@ async def load_models():
|
|
| 118 |
os.environ['TRANSFORMERS_CACHE'] = '/tmp/.cache/huggingface'
|
| 119 |
os.makedirs(os.environ['HF_HOME'], exist_ok=True)
|
| 120 |
|
| 121 |
-
print("π LyonPoy AI Chat - CPU Optimized Ready!")
|
| 122 |
|
| 123 |
# Lightweight frontend
|
| 124 |
@app.get("/", response_class=HTMLResponse)
|
| 125 |
async def get_frontend():
|
|
|
|
|
|
|
| 126 |
html_content = '''
|
| 127 |
<!DOCTYPE html>
|
| 128 |
<html lang="id">
|
| 129 |
<head>
|
| 130 |
<meta charset="UTF-8">
|
| 131 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 132 |
-
<title>LyonPoy AI Chat -
|
| 133 |
<style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
* { margin: 0; padding: 0; box-sizing: border-box; }
|
| 135 |
body {
|
| 136 |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 137 |
-
background:
|
|
|
|
| 138 |
height: 100vh; display: flex; justify-content: center; align-items: center;
|
|
|
|
| 139 |
}
|
| 140 |
-
.
|
| 141 |
-
width:
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
}
|
| 144 |
-
.chat-header {
|
| 145 |
-
background:
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
.cpu-badge {
|
| 150 |
-
background: rgba(255,255,255,0.2); padding: 3px 8px; border-radius: 10px;
|
| 151 |
-
font-size: 10px; display: inline-block; margin-top: 3px;
|
| 152 |
}
|
|
|
|
| 153 |
.model-selector {
|
| 154 |
background: rgba(255,255,255,0.2); border: none; color: white;
|
| 155 |
-
padding: 6px 10px; border-radius: 15px; font-size:
|
| 156 |
-
|
| 157 |
}
|
| 158 |
-
.chat-messages {
|
| 159 |
-
flex: 1; padding:
|
| 160 |
display: flex; flex-direction: column; gap: 12px;
|
| 161 |
}
|
| 162 |
-
.message {
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
}
|
| 166 |
-
.message.user {
|
| 167 |
-
background: #00b4db; color: white; align-self: flex-end;
|
| 168 |
-
border-bottom-right-radius: 4px;
|
| 169 |
-
}
|
| 170 |
-
.message.bot {
|
| 171 |
-
background: white; color: #333; align-self: flex-start;
|
| 172 |
-
border-bottom-left-radius: 4px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);
|
| 173 |
-
}
|
| 174 |
-
.message-time { font-size: 10px; opacity: 0.6; margin-top: 3px; }
|
| 175 |
-
.response-time {
|
| 176 |
-
font-size: 9px; color: #666; margin-top: 2px;
|
| 177 |
-
display: flex; align-items: center; gap: 3px;
|
| 178 |
}
|
| 179 |
-
.
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
}
|
| 182 |
-
.
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
padding: 15px; background: white; border-top: 1px solid #e5e7eb;
|
| 187 |
-
display: flex; gap: 8px; align-items: center;
|
| 188 |
}
|
| 189 |
-
.
|
| 190 |
-
|
| 191 |
-
|
|
|
|
|
|
|
| 192 |
}
|
| 193 |
-
.
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
}
|
| 199 |
-
.
|
| 200 |
-
.
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
padding:
|
|
|
|
|
|
|
| 204 |
}
|
| 205 |
-
.
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
| 208 |
}
|
| 209 |
-
.
|
| 210 |
-
|
| 211 |
-
|
|
|
|
|
|
|
| 212 |
}
|
| 213 |
-
.
|
| 214 |
-
.
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
}
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
| 223 |
}
|
|
|
|
|
|
|
| 224 |
</style>
|
| 225 |
</head>
|
| 226 |
<body>
|
| 227 |
-
<div class="
|
| 228 |
<div class="chat-header">
|
| 229 |
-
<h1
|
| 230 |
-
<div class="cpu-badge">CPU Optimized</div>
|
| 231 |
<select class="model-selector" id="modelSelect">
|
| 232 |
<option value="distil-gpt-2">π DistilGPT-2 (Fastest)</option>
|
| 233 |
<option value="gpt-2-tinny">π GPT-2 Tinny (Fast)</option>
|
|
@@ -244,145 +279,143 @@ async def get_frontend():
|
|
| 244 |
<div class="model-status" id="modelStatus">Ready to chat!</div>
|
| 245 |
</div>
|
| 246 |
<div class="chat-messages" id="chatMessages">
|
| 247 |
-
<div class="
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
|
|
|
| 251 |
</div>
|
| 252 |
</div>
|
| 253 |
-
<div class="typing-indicator" id="typingIndicator">
|
| 254 |
-
|
| 255 |
-
<
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
</div>
|
| 261 |
</div>
|
| 262 |
<script>
|
| 263 |
const chatMessages = document.getElementById('chatMessages');
|
| 264 |
-
const
|
|
|
|
|
|
|
| 265 |
const sendButton = document.getElementById('sendButton');
|
| 266 |
const modelSelect = document.getElementById('modelSelect');
|
| 267 |
const typingIndicator = document.getElementById('typingIndicator');
|
| 268 |
const modelStatus = document.getElementById('modelStatus');
|
| 269 |
|
| 270 |
-
// Production API Base
|
| 271 |
const API_BASE = window.location.origin;
|
| 272 |
|
| 273 |
-
function scrollToBottom() {
|
| 274 |
-
chatMessages.scrollTop = chatMessages.scrollHeight;
|
| 275 |
-
}
|
| 276 |
-
|
| 277 |
-
function getSpeedClass(time) {
|
| 278 |
-
if (time < 2000) return 'fast';
|
| 279 |
-
if (time < 5000) return 'medium';
|
| 280 |
-
return 'slow';
|
| 281 |
-
}
|
| 282 |
|
| 283 |
-
function addMessage(content, isUser = false,
|
|
|
|
|
|
|
|
|
|
| 284 |
const messageDiv = document.createElement('div');
|
| 285 |
-
messageDiv.className =
|
| 286 |
-
const time = new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' });
|
| 287 |
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
${responseTime}ms
|
| 294 |
-
</div>`;
|
| 295 |
}
|
| 296 |
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
scrollToBottom();
|
| 305 |
-
}
|
| 306 |
-
|
| 307 |
-
function hideTyping() {
|
| 308 |
-
typingIndicator.style.display = 'none';
|
| 309 |
}
|
| 310 |
|
| 311 |
async function sendMessage() {
|
| 312 |
-
const
|
| 313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
sendButton.disabled = true;
|
|
|
|
| 317 |
modelStatus.textContent = 'Processing...';
|
| 318 |
|
| 319 |
-
addMessage(message, true);
|
| 320 |
-
chatInput.value = '';
|
| 321 |
-
showTyping();
|
| 322 |
-
|
| 323 |
const startTime = Date.now();
|
| 324 |
|
| 325 |
try {
|
| 326 |
-
const response = await fetch('/chat', {
|
| 327 |
method: 'POST',
|
| 328 |
headers: { 'Content-Type': 'application/json' },
|
| 329 |
body: JSON.stringify({
|
| 330 |
-
message:
|
| 331 |
-
model: modelSelect.value
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
})
|
| 333 |
});
|
| 334 |
|
| 335 |
const data = await response.json();
|
| 336 |
const responseTime = Date.now() - startTime;
|
| 337 |
|
| 338 |
-
hideTyping();
|
| 339 |
-
modelStatus.textContent = 'Ready';
|
| 340 |
-
|
| 341 |
if (data.status === 'success') {
|
| 342 |
addMessage(data.response, false, responseTime);
|
| 343 |
} else {
|
| 344 |
-
addMessage('β οΈ Model
|
| 345 |
}
|
| 346 |
} catch (error) {
|
| 347 |
const responseTime = Date.now() - startTime;
|
| 348 |
-
|
| 349 |
-
modelStatus.textContent = 'Error';
|
| 350 |
-
addMessage('β Koneksi bermasalah, coba lagi.', false, responseTime);
|
| 351 |
console.error('Error:', error);
|
| 352 |
}
|
| 353 |
|
| 354 |
-
|
|
|
|
|
|
|
| 355 |
sendButton.disabled = false;
|
| 356 |
-
|
| 357 |
}
|
| 358 |
|
| 359 |
-
// Event listeners
|
| 360 |
sendButton.addEventListener('click', sendMessage);
|
| 361 |
-
|
| 362 |
-
if (e.key === 'Enter'
|
|
|
|
|
|
|
|
|
|
| 363 |
});
|
| 364 |
|
| 365 |
modelSelect.addEventListener('change', () => {
|
| 366 |
const selectedOption = modelSelect.options[modelSelect.selectedIndex];
|
| 367 |
-
|
| 368 |
-
modelStatus.textContent = `Model: ${modelName}`;
|
| 369 |
-
addMessage(`π Switched to: ${modelName}`);
|
| 370 |
});
|
| 371 |
|
| 372 |
-
// Auto-focus on load
|
| 373 |
window.addEventListener('load', () => {
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
// Character counter
|
| 379 |
-
chatInput.addEventListener('input', () => {
|
| 380 |
-
const remaining = 100 - chatInput.value.length;
|
| 381 |
-
if (remaining < 20) {
|
| 382 |
-
chatInput.style.borderColor = remaining < 10 ? '#ef4444' : '#f59e0b';
|
| 383 |
-
} else {
|
| 384 |
-
chatInput.style.borderColor = '#d1d5db';
|
| 385 |
-
}
|
| 386 |
});
|
| 387 |
</script>
|
| 388 |
</body>
|
|
@@ -398,192 +431,264 @@ async def chat(request: ChatRequest):
|
|
| 398 |
try:
|
| 399 |
model_id = request.model.lower()
|
| 400 |
if model_id not in MODELS:
|
| 401 |
-
model_id = "distil-gpt-2"
|
| 402 |
|
| 403 |
model_config = MODELS[model_id]
|
| 404 |
|
| 405 |
-
#
|
|
|
|
|
|
|
|
|
|
| 406 |
if model_id not in app.state.pipelines:
|
| 407 |
print(f"β‘ CPU Loading {model_config['name']}...")
|
| 408 |
|
| 409 |
-
# CPU-specific optimizations
|
| 410 |
pipeline_kwargs = {
|
| 411 |
"task": model_config["task"],
|
| 412 |
"model": model_config["model_path"],
|
| 413 |
-
"device": -1,
|
| 414 |
-
"torch_dtype": torch.float32,
|
| 415 |
"model_kwargs": {
|
| 416 |
"torchscript": False,
|
| 417 |
"low_cpu_mem_usage": True
|
| 418 |
}
|
| 419 |
}
|
| 420 |
-
|
|
|
|
|
|
|
| 421 |
app.state.pipelines[model_id] = pipeline(**pipeline_kwargs)
|
| 422 |
-
|
| 423 |
-
# Cleanup memory
|
| 424 |
gc.collect()
|
| 425 |
|
| 426 |
pipe = app.state.pipelines[model_id]
|
| 427 |
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
if model_config["task"] == "text-generation":
|
| 432 |
-
#
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
do_sample=True,
|
| 438 |
-
top_p=0.
|
| 439 |
-
pad_token_id=pipe.tokenizer.eos_token_id,
|
| 440 |
num_return_sequences=1,
|
| 441 |
-
early_stopping=True
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
if result.startswith(input_text):
|
| 446 |
-
result = result[len(input_text):].strip()
|
| 447 |
|
| 448 |
-
#
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
|
| 454 |
elif model_config["task"] == "text-classification":
|
| 455 |
-
|
|
|
|
|
|
|
| 456 |
confidence = f"{output['score']:.2f}"
|
| 457 |
-
|
| 458 |
|
| 459 |
elif model_config["task"] == "text2text-generation":
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
result = result[:97] + "..."
|
| 472 |
|
| 473 |
-
|
| 474 |
|
| 475 |
return {
|
| 476 |
-
"response":
|
| 477 |
"model": model_config["name"],
|
| 478 |
"status": "success",
|
| 479 |
-
"processing_time": f"{
|
| 480 |
}
|
| 481 |
|
| 482 |
except Exception as e:
|
| 483 |
print(f"β CPU Error: {e}")
|
| 484 |
-
|
|
|
|
|
|
|
| 485 |
|
| 486 |
-
# Fallback response
|
| 487 |
fallback_responses = [
|
| 488 |
-
"π Coba lagi dengan kata yang lebih
|
| 489 |
-
"π Hmm,
|
| 490 |
-
"β‘ Model sedang
|
| 491 |
-
"π
|
| 492 |
]
|
| 493 |
|
| 494 |
-
import random
|
| 495 |
fallback = random.choice(fallback_responses)
|
| 496 |
|
| 497 |
return {
|
| 498 |
-
"response": fallback,
|
| 499 |
"status": "error",
|
| 500 |
-
"
|
|
|
|
| 501 |
}
|
| 502 |
|
| 503 |
-
# Optimized inference endpoint untuk
|
|
|
|
|
|
|
| 504 |
@app.post("/inference")
|
| 505 |
async def inference(request: dict):
|
| 506 |
-
"""CPU-Optimized inference endpoint"""
|
| 507 |
try:
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
"
|
| 516 |
-
"gpt-2-tinny": "gpt-2-tinny",
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
|
|
|
|
|
|
|
|
|
| 526 |
}
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
# Quick processing
|
| 531 |
-
chat_request = ChatRequest(message=message, model=internal_model)
|
| 532 |
-
result = await chat(chat_request)
|
| 533 |
|
| 534 |
return {
|
| 535 |
-
"result": result
|
| 536 |
-
"status": "
|
| 537 |
-
"model_used": result
|
| 538 |
"processing_time": result.get("processing_time", "0ms")
|
| 539 |
}
|
| 540 |
|
| 541 |
except Exception as e:
|
| 542 |
print(f"β Inference Error: {e}")
|
| 543 |
return {
|
| 544 |
-
"result": "π
|
| 545 |
"status": "error"
|
| 546 |
}
|
| 547 |
|
| 548 |
# Lightweight health check
|
| 549 |
@app.get("/health")
|
| 550 |
async def health():
|
| 551 |
-
|
| 552 |
return {
|
| 553 |
"status": "healthy",
|
| 554 |
"platform": "CPU",
|
| 555 |
-
"loaded_models":
|
| 556 |
"total_models": len(MODELS),
|
| 557 |
-
"optimization": "CPU-Tuned"
|
| 558 |
}
|
| 559 |
|
| 560 |
# Model info endpoint
|
| 561 |
@app.get("/models")
|
| 562 |
-
async def
|
| 563 |
return {
|
| 564 |
"models": [
|
| 565 |
{
|
| 566 |
-
"id": k,
|
| 567 |
-
"
|
| 568 |
-
"task": v["task"],
|
| 569 |
-
"max_tokens": v["max_tokens"],
|
| 570 |
-
"priority": v["priority"],
|
| 571 |
"cpu_optimized": True
|
| 572 |
}
|
| 573 |
for k, v in MODELS.items()
|
| 574 |
],
|
| 575 |
"platform": "CPU",
|
| 576 |
-
"
|
| 577 |
}
|
| 578 |
|
| 579 |
# Run with CPU optimizations
|
| 580 |
if __name__ == "__main__":
|
| 581 |
port = int(os.environ.get("PORT", 7860))
|
|
|
|
|
|
|
|
|
|
| 582 |
uvicorn.run(
|
| 583 |
app,
|
| 584 |
host="0.0.0.0",
|
| 585 |
port=port,
|
| 586 |
-
workers=1,
|
| 587 |
-
timeout_keep_alive=30,
|
| 588 |
-
access_log=False
|
| 589 |
)
|
|
|
|
| 9 |
import asyncio
|
| 10 |
import time
|
| 11 |
import gc
|
| 12 |
+
import random # Ditambahkan untuk fallback
|
| 13 |
|
| 14 |
# Inisialisasi FastAPI
|
| 15 |
+
app = FastAPI(title="LyonPoy AI Chat - CPU Optimized (Prompt Mode)")
|
| 16 |
|
| 17 |
# Set seed untuk konsistensi
|
| 18 |
set_seed(42)
|
| 19 |
|
| 20 |
# CPU-Optimized 11 models configuration
|
| 21 |
+
# Menyesuaikan max_tokens untuk memberi ruang lebih bagi generasi setelah prompt
|
| 22 |
MODELS = {
|
| 23 |
"distil-gpt-2": {
|
| 24 |
"name": "DistilGPT-2 β‘",
|
| 25 |
"model_path": "Lyon28/Distil_GPT-2",
|
| 26 |
"task": "text-generation",
|
| 27 |
+
"max_tokens": 60, # Ditingkatkan
|
| 28 |
+
"priority": 1
|
| 29 |
},
|
| 30 |
"gpt-2-tinny": {
|
| 31 |
"name": "GPT-2 Tinny β‘",
|
| 32 |
"model_path": "Lyon28/GPT-2-Tinny",
|
| 33 |
"task": "text-generation",
|
| 34 |
+
"max_tokens": 50, # Ditingkatkan
|
| 35 |
"priority": 1
|
| 36 |
},
|
| 37 |
"bert-tinny": {
|
| 38 |
"name": "BERT Tinny π",
|
| 39 |
"model_path": "Lyon28/Bert-Tinny",
|
| 40 |
"task": "text-classification",
|
| 41 |
+
"max_tokens": 0, # Tidak relevan untuk klasifikasi
|
| 42 |
"priority": 1
|
| 43 |
},
|
| 44 |
"distilbert-base-uncased": {
|
| 45 |
"name": "DistilBERT π",
|
| 46 |
"model_path": "Lyon28/Distilbert-Base-Uncased",
|
| 47 |
"task": "text-classification",
|
| 48 |
+
"max_tokens": 0, # Tidak relevan untuk klasifikasi
|
| 49 |
"priority": 1
|
| 50 |
},
|
| 51 |
"albert-base-v2": {
|
|
|
|
| 66 |
"name": "T5 Small π",
|
| 67 |
"model_path": "Lyon28/T5-Small",
|
| 68 |
"task": "text2text-generation",
|
| 69 |
+
"max_tokens": 70, # Ditingkatkan
|
| 70 |
"priority": 2
|
| 71 |
},
|
| 72 |
"gpt-2": {
|
| 73 |
"name": "GPT-2 Standard",
|
| 74 |
"model_path": "Lyon28/GPT-2",
|
| 75 |
"task": "text-generation",
|
| 76 |
+
"max_tokens": 70, # Ditingkatkan
|
| 77 |
"priority": 2
|
| 78 |
},
|
| 79 |
"tinny-llama": {
|
| 80 |
"name": "Tinny Llama",
|
| 81 |
"model_path": "Lyon28/Tinny-Llama",
|
| 82 |
"task": "text-generation",
|
| 83 |
+
"max_tokens": 80, # Ditingkatkan
|
| 84 |
"priority": 3
|
| 85 |
},
|
| 86 |
"pythia": {
|
| 87 |
"name": "Pythia",
|
| 88 |
"model_path": "Lyon28/Pythia",
|
| 89 |
"task": "text-generation",
|
| 90 |
+
"max_tokens": 80, # Ditingkatkan
|
| 91 |
"priority": 3
|
| 92 |
},
|
| 93 |
"gpt-neo": {
|
| 94 |
"name": "GPT-Neo",
|
| 95 |
"model_path": "Lyon28/GPT-Neo",
|
| 96 |
"task": "text-generation",
|
| 97 |
+
"max_tokens": 90, # Ditingkatkan
|
| 98 |
"priority": 3
|
| 99 |
}
|
| 100 |
}
|
| 101 |
|
| 102 |
class ChatRequest(BaseModel):
|
| 103 |
+
message: str # Akan berisi prompt lengkap
|
| 104 |
model: Optional[str] = "distil-gpt-2"
|
| 105 |
+
# Tambahan field untuk prompt terstruktur jika diperlukan di Pydantic,
|
| 106 |
+
# tapi untuk saat ini kita akan parse dari 'message'
|
| 107 |
+
situasi: Optional[str] = ""
|
| 108 |
+
latar: Optional[str] = ""
|
| 109 |
+
user_message: str # Pesan pengguna aktual
|
| 110 |
+
|
| 111 |
|
| 112 |
# CPU-Optimized startup
|
| 113 |
@app.on_event("startup")
|
| 114 |
+
async def load_models_on_startup(): # Mengganti nama fungsi agar unik
|
| 115 |
app.state.pipelines = {}
|
| 116 |
+
app.state.tokenizers = {} # Meskipun tidak secara eksplisit digunakan, baik untuk dimiliki jika diperlukan
|
| 117 |
|
| 118 |
# Set CPU optimizations
|
| 119 |
+
torch.set_num_threads(2)
|
| 120 |
os.environ['OMP_NUM_THREADS'] = '2'
|
| 121 |
os.environ['MKL_NUM_THREADS'] = '2'
|
| 122 |
os.environ['NUMEXPR_NUM_THREADS'] = '2'
|
|
|
|
| 126 |
os.environ['TRANSFORMERS_CACHE'] = '/tmp/.cache/huggingface'
|
| 127 |
os.makedirs(os.environ['HF_HOME'], exist_ok=True)
|
| 128 |
|
| 129 |
+
print("π LyonPoy AI Chat - CPU Optimized (Prompt Mode) Ready!")
|
| 130 |
|
| 131 |
# Lightweight frontend
|
| 132 |
@app.get("/", response_class=HTMLResponse)
|
| 133 |
async def get_frontend():
|
| 134 |
+
# Mengambil inspirasi styling dari styles.css dan layout dari chat.html
|
| 135 |
+
# Ini adalah versi yang SANGAT disederhanakan dan disematkan
|
| 136 |
html_content = '''
|
| 137 |
<!DOCTYPE html>
|
| 138 |
<html lang="id">
|
| 139 |
<head>
|
| 140 |
<meta charset="UTF-8">
|
| 141 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 142 |
+
<title>LyonPoy AI Chat - Prompt Mode</title>
|
| 143 |
<style>
|
| 144 |
+
:root {
|
| 145 |
+
--primary-color: #075E54; /* styles.css */
|
| 146 |
+
--bg-primary: #ffffff; /* styles.css */
|
| 147 |
+
--bg-secondary: #f8f9fa; /* styles.css */
|
| 148 |
+
--bg-accent: #DCF8C6; /* styles.css */
|
| 149 |
+
--text-primary: #212529; /* styles.css */
|
| 150 |
+
--text-white: #ffffff; /* styles.css */
|
| 151 |
+
--border-color: #dee2e6; /* styles.css */
|
| 152 |
+
--border-radius: 10px; /* styles.css */
|
| 153 |
+
--spacing-sm: 0.5rem;
|
| 154 |
+
--spacing-md: 1rem;
|
| 155 |
+
--shadow: 0 2px 5px rgba(0, 0, 0, 0.15); /* styles.css */
|
| 156 |
+
--font-size-base: 1rem;
|
| 157 |
+
--font-size-sm: 0.875rem;
|
| 158 |
+
--font-size-xs: 0.75rem;
|
| 159 |
+
}
|
| 160 |
* { margin: 0; padding: 0; box-sizing: border-box; }
|
| 161 |
body {
|
| 162 |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 163 |
+
background-color: var(--bg-secondary); /* styles.css --bg-primary */
|
| 164 |
+
color: var(--text-primary); /* styles.css */
|
| 165 |
height: 100vh; display: flex; justify-content: center; align-items: center;
|
| 166 |
+
padding: var(--spacing-md);
|
| 167 |
}
|
| 168 |
+
.app-container { /* Mirip #app dari styles.css */
|
| 169 |
+
width: 100%;
|
| 170 |
+
max-width: 600px; /* Lebih lebar untuk input tambahan */
|
| 171 |
+
height: 95vh;
|
| 172 |
+
background: var(--bg-primary); /* styles.css */
|
| 173 |
+
border-radius: var(--border-radius); /* styles.css */
|
| 174 |
+
box-shadow: var(--shadow); /* styles.css */
|
| 175 |
+
display: flex; flex-direction: column; overflow: hidden;
|
| 176 |
}
|
| 177 |
+
.chat-header { /* Mirip .header.chat-header dari styles.css */
|
| 178 |
+
background: var(--primary-color); /* styles.css --secondary-color (untuk header chat) */
|
| 179 |
+
color: var(--text-white); /* styles.css */
|
| 180 |
+
padding: var(--spacing-md);
|
| 181 |
+
text-align: center;
|
|
|
|
|
|
|
|
|
|
| 182 |
}
|
| 183 |
+
.chat-header h1 { font-size: 1.2rem; font-weight: 600; margin-bottom: var(--spacing-sm); }
|
| 184 |
.model-selector {
|
| 185 |
background: rgba(255,255,255,0.2); border: none; color: white;
|
| 186 |
+
padding: 6px 10px; border-radius: 15px; font-size: 0.8rem; cursor: pointer;
|
| 187 |
+
width: 100%;
|
| 188 |
}
|
| 189 |
+
.chat-messages { /* Mirip .chat-messages dari styles.css */
|
| 190 |
+
flex: 1; padding: var(--spacing-md); overflow-y: auto; background: var(--bg-secondary); /* styles.css */
|
| 191 |
display: flex; flex-direction: column; gap: 12px;
|
| 192 |
}
|
| 193 |
+
.message-group { /* Mirip .message-group dari styles.css */
|
| 194 |
+
display: flex;
|
| 195 |
+
max-width: 75%; /* styles.css --message-max-width */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
}
|
| 197 |
+
.message-group.outgoing { align-self: flex-end; flex-direction: row-reverse; }
|
| 198 |
+
.message-group.incoming { align-self: flex-start; }
|
| 199 |
+
|
| 200 |
+
.message { /* Mirip .message dari styles.css */
|
| 201 |
+
padding: var(--spacing-sm) var(--spacing-md);
|
| 202 |
+
border-radius: var(--border-radius); /* styles.css --message-border-radius */
|
| 203 |
+
font-size: var(--font-size-sm); /* styles.css --font-size-base (untuk pesan)*/
|
| 204 |
+
line-height: 1.4; word-wrap: break-word;
|
| 205 |
+
position: relative;
|
| 206 |
}
|
| 207 |
+
.message-group.outgoing .message {
|
| 208 |
+
background: var(--bg-accent); /* styles.css */
|
| 209 |
+
color: var(--text-primary);
|
| 210 |
+
margin-left: var(--spacing-md);
|
|
|
|
|
|
|
| 211 |
}
|
| 212 |
+
.message-group.incoming .message {
|
| 213 |
+
background: var(--bg-primary); /* styles.css */
|
| 214 |
+
color: var(--text-primary);
|
| 215 |
+
box-shadow: var(--shadow-sm); /* styles.css --shadow-sm */
|
| 216 |
+
margin-right: var(--spacing-md); /* Jika ada avatar */
|
| 217 |
}
|
| 218 |
+
.message-info { /* Mirip .message-info dari styles.css */
|
| 219 |
+
display: flex; justify-content: flex-end; align-items: center;
|
| 220 |
+
margin-top: var(--spacing-xs);
|
| 221 |
+
font-size: var(--font-size-xs); /* styles.css */
|
| 222 |
+
color: #6c757d; /* styles.css --text-muted */
|
| 223 |
}
|
| 224 |
+
.message-time { margin-right: var(--spacing-xs); }
|
| 225 |
+
.response-time-info { font-size: 9px; color: #666; margin-top: 2px; }
|
| 226 |
+
|
| 227 |
+
.input-area { /* Wadah untuk semua input */
|
| 228 |
+
padding: var(--spacing-md);
|
| 229 |
+
background: var(--bg-primary); /* styles.css */
|
| 230 |
+
border-top: 1px solid var(--border-color); /* styles.css */
|
| 231 |
}
|
| 232 |
+
.prompt-inputs { display: flex; gap: var(--spacing-sm); margin-bottom: var(--spacing-sm); }
|
| 233 |
+
.prompt-inputs input { flex: 1; }
|
| 234 |
+
|
| 235 |
+
.chat-input-container { /* Mirip .chat-input-container dari styles.css */
|
| 236 |
+
display: flex; gap: var(--spacing-sm); align-items: center;
|
| 237 |
}
|
| 238 |
+
.chat-input { /* Mirip textarea di .chat-input-field dari styles.css */
|
| 239 |
+
flex: 1; padding: var(--spacing-sm) var(--spacing-md);
|
| 240 |
+
border: 1px solid var(--border-color); /* styles.css */
|
| 241 |
+
border-radius: 20px; /* styles.css --border-radius-xl */
|
| 242 |
+
font-size: var(--font-size-sm); outline: none;
|
| 243 |
}
|
| 244 |
+
.chat-input:focus { border-color: var(--primary-color); }
|
| 245 |
+
.send-button { /* Mirip .send-btn dari styles.css */
|
| 246 |
+
background: var(--primary-color); color: var(--text-white); border: none;
|
| 247 |
+
border-radius: 50%; width: 40px; height: 40px; cursor: pointer;
|
| 248 |
+
display: flex; align-items: center; justify-content: center; font-size: 1.2rem;
|
| 249 |
}
|
| 250 |
+
.send-button:hover { filter: brightness(1.2); }
|
| 251 |
+
.send-button:disabled { background: #d1d5db; cursor: not-allowed; }
|
| 252 |
+
|
| 253 |
+
.typing-indicator-text {
|
| 254 |
+
font-style: italic; color: #6c757d; font-size: var(--font-size-sm);
|
| 255 |
+
padding: var(--spacing-sm) var(--spacing-md);
|
| 256 |
+
text-align: center;
|
| 257 |
}
|
| 258 |
+
.model-status { font-size: 10px; color: rgba(255,255,255,0.8); margin-top: 3px; text-align: center; }
|
| 259 |
+
label { font-size: 0.9em; margin-bottom: 0.2em; display:block; }
|
| 260 |
</style>
|
| 261 |
</head>
|
| 262 |
<body>
|
| 263 |
+
<div class="app-container">
|
| 264 |
<div class="chat-header">
|
| 265 |
+
<h1>AI Character Prompt Mode</h1>
|
|
|
|
| 266 |
<select class="model-selector" id="modelSelect">
|
| 267 |
<option value="distil-gpt-2">π DistilGPT-2 (Fastest)</option>
|
| 268 |
<option value="gpt-2-tinny">π GPT-2 Tinny (Fast)</option>
|
|
|
|
| 279 |
<div class="model-status" id="modelStatus">Ready to chat!</div>
|
| 280 |
</div>
|
| 281 |
<div class="chat-messages" id="chatMessages">
|
| 282 |
+
<div class="message-group incoming">
|
| 283 |
+
<div class="message">
|
| 284 |
+
Hello! Atur Situasi, Latar, dan pesanmu di bawah. Lalu kirim!
|
| 285 |
+
<div class="message-info"><span class="message-time">${new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' })}</span></div>
|
| 286 |
+
</div>
|
| 287 |
</div>
|
| 288 |
</div>
|
| 289 |
+
<div class="typing-indicator-text" id="typingIndicator" style="display: none;">AI sedang berpikir...</div>
|
| 290 |
+
<div class="input-area">
|
| 291 |
+
<div class="prompt-inputs">
|
| 292 |
+
<div>
|
| 293 |
+
<label for="situasiInput">Situasi:</label>
|
| 294 |
+
<input type="text" class="chat-input" id="situasiInput" placeholder="Mis: Santai">
|
| 295 |
+
</div>
|
| 296 |
+
<div>
|
| 297 |
+
<label for="latarInput">Latar:</label>
|
| 298 |
+
<input type="text" class="chat-input" id="latarInput" placeholder="Mis: Tepi sungai">
|
| 299 |
+
</div>
|
| 300 |
+
</div>
|
| 301 |
+
<div class="chat-input-container">
|
| 302 |
+
<input type="text" class="chat-input" id="userMessageInput" placeholder="Ketik pesan sebagai {{User}}..." maxlength="150">
|
| 303 |
+
<button class="send-button" id="sendButton">β€</button>
|
| 304 |
+
</div>
|
| 305 |
</div>
|
| 306 |
</div>
|
| 307 |
<script>
|
| 308 |
const chatMessages = document.getElementById('chatMessages');
|
| 309 |
+
const situasiInput = document.getElementById('situasiInput');
|
| 310 |
+
const latarInput = document.getElementById('latarInput');
|
| 311 |
+
const userMessageInput = document.getElementById('userMessageInput');
|
| 312 |
const sendButton = document.getElementById('sendButton');
|
| 313 |
const modelSelect = document.getElementById('modelSelect');
|
| 314 |
const typingIndicator = document.getElementById('typingIndicator');
|
| 315 |
const modelStatus = document.getElementById('modelStatus');
|
| 316 |
|
|
|
|
| 317 |
const API_BASE = window.location.origin;
|
| 318 |
|
| 319 |
+
function scrollToBottom() { chatMessages.scrollTop = chatMessages.scrollHeight; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
|
| 321 |
+
function addMessage(content, isUser = false, responseTimeMs = null, fullPromptForUser = null) {
|
| 322 |
+
const messageGroupDiv = document.createElement('div');
|
| 323 |
+
messageGroupDiv.className = \`message-group \${isUser ? 'outgoing' : 'incoming'}\`;
|
| 324 |
+
|
| 325 |
const messageDiv = document.createElement('div');
|
| 326 |
+
messageDiv.className = 'message';
|
|
|
|
| 327 |
|
| 328 |
+
const time = new Date().toLocaleTimeString('id-ID', { hour: '2-digit', minute: '2-digit' });
|
| 329 |
+
let timeInfoHtml = \`<div class="message-info"><span class="message-time">\${time}</span></div>\`;
|
| 330 |
+
|
| 331 |
+
if (responseTimeMs !== null && !isUser) {
|
| 332 |
+
timeInfoHtml += \`<div class="response-time-info">\${responseTimeMs}ms</div>\`;
|
|
|
|
|
|
|
| 333 |
}
|
| 334 |
|
| 335 |
+
// Untuk pesan pengguna, kita tampilkan prompt lengkap atau hanya pesan user
|
| 336 |
+
// Saat ini, kita akan tampilkan pesan user saja untuk kebersihan, tapi prompt lengkap dikirim ke backend
|
| 337 |
+
const displayContent = isUser ? userMessageInput.value.trim() : content;
|
| 338 |
+
messageDiv.innerHTML = displayContent.replace(/\\n/g, '<br>') + timeInfoHtml;
|
| 339 |
|
| 340 |
+
messageGroupDiv.appendChild(messageDiv);
|
| 341 |
+
chatMessages.appendChild(messageGroupDiv);
|
| 342 |
+
scrollToBottom();
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
}
|
| 344 |
|
| 345 |
async function sendMessage() {
|
| 346 |
+
const situasi = situasiInput.value.trim();
|
| 347 |
+
const latar = latarInput.value.trim();
|
| 348 |
+
const userMsg = userMessageInput.value.trim();
|
| 349 |
+
|
| 350 |
+
if (!userMsg) {
|
| 351 |
+
alert("Pesan pengguna tidak boleh kosong!");
|
| 352 |
+
return;
|
| 353 |
+
}
|
| 354 |
|
| 355 |
+
const fullPrompt = \`Situasi: \${situasi}\\nLatar: \${latar}\\n{{User}}: \${userMsg}\\n{{Char}}:\`;
|
| 356 |
+
|
| 357 |
+
addMessage(userMsg, true, null, fullPrompt);
|
| 358 |
+
|
| 359 |
+
userMessageInput.value = ''; // Kosongkan input pesan user saja
|
| 360 |
+
userMessageInput.disabled = true;
|
| 361 |
sendButton.disabled = true;
|
| 362 |
+
typingIndicator.style.display = 'block';
|
| 363 |
modelStatus.textContent = 'Processing...';
|
| 364 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
const startTime = Date.now();
|
| 366 |
|
| 367 |
try {
|
| 368 |
+
const response = await fetch(API_BASE + '/chat', {
|
| 369 |
method: 'POST',
|
| 370 |
headers: { 'Content-Type': 'application/json' },
|
| 371 |
body: JSON.stringify({
|
| 372 |
+
message: fullPrompt, // Kirim prompt lengkap
|
| 373 |
+
model: modelSelect.value,
|
| 374 |
+
// Informasi tambahan jika ingin diproses backend secara terpisah
|
| 375 |
+
situasi: situasi,
|
| 376 |
+
latar: latar,
|
| 377 |
+
user_message: userMsg
|
| 378 |
})
|
| 379 |
});
|
| 380 |
|
| 381 |
const data = await response.json();
|
| 382 |
const responseTime = Date.now() - startTime;
|
| 383 |
|
|
|
|
|
|
|
|
|
|
| 384 |
if (data.status === 'success') {
|
| 385 |
addMessage(data.response, false, responseTime);
|
| 386 |
} else {
|
| 387 |
+
addMessage(data.response || 'β οΈ Model gagal merespon, coba lagi.', false, responseTime);
|
| 388 |
}
|
| 389 |
} catch (error) {
|
| 390 |
const responseTime = Date.now() - startTime;
|
| 391 |
+
addMessage('β Koneksi bermasalah atau error server.', false, responseTime);
|
|
|
|
|
|
|
| 392 |
console.error('Error:', error);
|
| 393 |
}
|
| 394 |
|
| 395 |
+
typingIndicator.style.display = 'none';
|
| 396 |
+
modelStatus.textContent = 'Ready';
|
| 397 |
+
userMessageInput.disabled = false;
|
| 398 |
sendButton.disabled = false;
|
| 399 |
+
userMessageInput.focus();
|
| 400 |
}
|
| 401 |
|
|
|
|
| 402 |
sendButton.addEventListener('click', sendMessage);
|
| 403 |
+
userMessageInput.addEventListener('keypress', (e) => {
|
| 404 |
+
if (e.key === 'Enter' && !e.shiftKey) {
|
| 405 |
+
e.preventDefault(); // Mencegah newline di input
|
| 406 |
+
sendMessage();
|
| 407 |
+
}
|
| 408 |
});
|
| 409 |
|
| 410 |
modelSelect.addEventListener('change', () => {
|
| 411 |
const selectedOption = modelSelect.options[modelSelect.selectedIndex];
|
| 412 |
+
modelStatus.textContent = \`Model: \${selectedOption.text}\`;
|
|
|
|
|
|
|
| 413 |
});
|
| 414 |
|
|
|
|
| 415 |
window.addEventListener('load', () => {
|
| 416 |
+
userMessageInput.focus();
|
| 417 |
+
const initialModelName = modelSelect.options[modelSelect.selectedIndex].text;
|
| 418 |
+
modelStatus.textContent = \`\${initialModelName} Ready\`;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
});
|
| 420 |
</script>
|
| 421 |
</body>
|
|
|
|
| 431 |
try:
|
| 432 |
model_id = request.model.lower()
|
| 433 |
if model_id not in MODELS:
|
| 434 |
+
model_id = "distil-gpt-2"
|
| 435 |
|
| 436 |
model_config = MODELS[model_id]
|
| 437 |
|
| 438 |
+
# Pesan dari request sekarang adalah prompt yang sudah terstruktur
|
| 439 |
+
# contoh: "Situasi: Santai\nLatar:Tepi sungai\n{{User}}:sayang,danau nya indah ya, (memeluk {{char}} dari samping)\n{{Char}}:"
|
| 440 |
+
structured_prompt = request.message
|
| 441 |
+
|
| 442 |
if model_id not in app.state.pipelines:
|
| 443 |
print(f"β‘ CPU Loading {model_config['name']}...")
|
| 444 |
|
|
|
|
| 445 |
pipeline_kwargs = {
|
| 446 |
"task": model_config["task"],
|
| 447 |
"model": model_config["model_path"],
|
| 448 |
+
"device": -1,
|
| 449 |
+
"torch_dtype": torch.float32,
|
| 450 |
"model_kwargs": {
|
| 451 |
"torchscript": False,
|
| 452 |
"low_cpu_mem_usage": True
|
| 453 |
}
|
| 454 |
}
|
| 455 |
+
if model_config["task"] != "text-classification": # Tokenizer hanya untuk generator
|
| 456 |
+
app.state.tokenizers[model_id] = AutoTokenizer.from_pretrained(model_config["model_path"])
|
| 457 |
+
|
| 458 |
app.state.pipelines[model_id] = pipeline(**pipeline_kwargs)
|
|
|
|
|
|
|
| 459 |
gc.collect()
|
| 460 |
|
| 461 |
pipe = app.state.pipelines[model_id]
|
| 462 |
|
| 463 |
+
generated_text = "Output tidak didukung untuk task ini."
|
| 464 |
+
|
|
|
|
| 465 |
if model_config["task"] == "text-generation":
|
| 466 |
+
# Hitung panjang prompt dalam token
|
| 467 |
+
current_tokenizer = app.state.tokenizers.get(model_id)
|
| 468 |
+
if not current_tokenizer: # Fallback jika tokenizer tidak ada di state (seharusnya ada)
|
| 469 |
+
current_tokenizer = AutoTokenizer.from_pretrained(model_config["model_path"])
|
| 470 |
+
|
| 471 |
+
prompt_tokens = current_tokenizer.encode(structured_prompt, return_tensors="pt")
|
| 472 |
+
prompt_length_tokens = prompt_tokens.shape[1]
|
| 473 |
+
|
| 474 |
+
# max_length adalah total (prompt + generated). max_tokens adalah untuk generated.
|
| 475 |
+
# Pastikan max_length tidak melebihi kapasitas model (umumnya 512 atau 1024 untuk model kecil)
|
| 476 |
+
# dan juga tidak terlalu pendek.
|
| 477 |
+
# Beberapa model mungkin memiliki max_position_embeddings yang lebih kecil.
|
| 478 |
+
# Kita cap max_length ke sesuatu yang aman seperti 256 atau 512 jika terlalu besar.
|
| 479 |
+
# Model_config["max_tokens"] adalah max *new* tokens yang kita inginkan.
|
| 480 |
+
|
| 481 |
+
# Kita gunakan max_new_tokens langsung jika didukung oleh pipeline, atau atur max_length
|
| 482 |
+
# Untuk pipeline generik, max_length adalah yang utama.
|
| 483 |
+
# Max length harus lebih besar dari prompt.
|
| 484 |
+
# Max new tokens dari config model.
|
| 485 |
+
max_new_generated_tokens = model_config["max_tokens"]
|
| 486 |
+
max_len_for_generation = prompt_length_tokens + max_new_generated_tokens
|
| 487 |
+
|
| 488 |
+
# Batasi max_length total agar tidak terlalu besar untuk model kecil.
|
| 489 |
+
# Misalnya, GPT-2 memiliki konteks 1024. DistilGPT-2 juga.
|
| 490 |
+
# Model yang lebih kecil mungkin memiliki batas yang lebih rendah.
|
| 491 |
+
# Mari kita set batas atas yang aman, misal 512 untuk demo ini.
|
| 492 |
+
# Sesuaikan jika model spesifik Anda memiliki batas yang berbeda.
|
| 493 |
+
absolute_max_len = 512
|
| 494 |
+
if hasattr(pipe.model.config, 'max_position_embeddings'):
|
| 495 |
+
absolute_max_len = pipe.model.config.max_position_embeddings
|
| 496 |
+
|
| 497 |
+
max_len_for_generation = min(max_len_for_generation, absolute_max_len)
|
| 498 |
+
|
| 499 |
+
# Pastikan max_length setidaknya prompt + beberapa token baru
|
| 500 |
+
if max_len_for_generation <= prompt_length_tokens + 5 : # +5 token baru minimal
|
| 501 |
+
max_len_for_generation = prompt_length_tokens + 5
|
| 502 |
+
|
| 503 |
+
|
| 504 |
+
# Pastikan kita tidak meminta lebih banyak token baru daripada yang diizinkan oleh absolute_max_len
|
| 505 |
+
actual_max_new_tokens = max_len_for_generation - prompt_length_tokens
|
| 506 |
+
if actual_max_new_tokens <= 0: # Jika prompt sudah terlalu panjang
|
| 507 |
+
return {
|
| 508 |
+
"response": "Hmm, prompt terlalu panjang untuk model ini. Coba perpendek situasi/latar/pesan.",
|
| 509 |
+
"model": model_config["name"],
|
| 510 |
+
"status": "error_prompt_too_long",
|
| 511 |
+
"processing_time": f"{round((time.time() - start_time) * 1000)}ms"
|
| 512 |
+
}
|
| 513 |
+
|
| 514 |
+
outputs = pipe(
|
| 515 |
+
structured_prompt,
|
| 516 |
+
max_length=max_len_for_generation, # Total panjang
|
| 517 |
+
# max_new_tokens=actual_max_new_tokens, # Lebih disukai jika pipeline mendukungnya secara eksplisit
|
| 518 |
+
temperature=0.75, # Sedikit lebih kreatif
|
| 519 |
do_sample=True,
|
| 520 |
+
top_p=0.9, # Memperluas sampling sedikit
|
| 521 |
+
pad_token_id=pipe.tokenizer.eos_token_id if hasattr(pipe.tokenizer, 'eos_token_id') else 50256, # 50256 untuk GPT2
|
| 522 |
num_return_sequences=1,
|
| 523 |
+
early_stopping=True,
|
| 524 |
+
truncation=True # Penting jika prompt terlalu panjang untuk model
|
| 525 |
+
)
|
| 526 |
+
generated_text = outputs[0]['generated_text']
|
|
|
|
|
|
|
| 527 |
|
| 528 |
+
# Cleanup: ekstrak hanya teks setelah prompt "{{Char}}:"
|
| 529 |
+
char_marker = "{{Char}}:"
|
| 530 |
+
if char_marker in generated_text:
|
| 531 |
+
generated_text = generated_text.split(char_marker, 1)[-1].strip()
|
| 532 |
+
elif generated_text.startswith(structured_prompt): # fallback jika marker tidak ada
|
| 533 |
+
generated_text = generated_text[len(structured_prompt):].strip()
|
| 534 |
+
|
| 535 |
+
# Hapus jika model mengulang bagian prompt user
|
| 536 |
+
if request.user_message and generated_text.startswith(request.user_message):
|
| 537 |
+
generated_text = generated_text[len(request.user_message):].strip()
|
| 538 |
+
|
| 539 |
+
# Batasi ke beberapa kalimat atau panjang tertentu untuk kecepatan & relevansi
|
| 540 |
+
# Ini bisa lebih fleksibel
|
| 541 |
+
sentences = generated_text.split('.')
|
| 542 |
+
if len(sentences) > 2: # Ambil 2 kalimat pertama jika ada
|
| 543 |
+
generated_text = sentences[0].strip() + ('.' if sentences[0] else '') + \
|
| 544 |
+
(sentences[1].strip() + '.' if len(sentences) > 1 and sentences[1] else '')
|
| 545 |
+
elif len(generated_text) > 150: # Batas karakter kasar
|
| 546 |
+
generated_text = generated_text[:147] + '...'
|
| 547 |
|
| 548 |
elif model_config["task"] == "text-classification":
|
| 549 |
+
# Untuk klasifikasi, kita gunakan pesan pengguna aktual, bukan prompt terstruktur
|
| 550 |
+
user_msg_for_classification = request.user_message if request.user_message else structured_prompt
|
| 551 |
+
output = pipe(user_msg_for_classification[:256], truncation=True, max_length=256)[0] # Batasi input
|
| 552 |
confidence = f"{output['score']:.2f}"
|
| 553 |
+
generated_text = f"π Klasifikasi pesan '{user_msg_for_classification[:30]}...': {output['label']} (Skor: {confidence})"
|
| 554 |
|
| 555 |
elif model_config["task"] == "text2text-generation":
|
| 556 |
+
# T5 dan model serupa mungkin memerlukan format input yang sedikit berbeda,
|
| 557 |
+
# tapi untuk demo ini kita coba kirim prompt apa adanya.
|
| 558 |
+
# Anda mungkin perlu menambahkan prefix task seperti "translate English to German: " untuk T5
|
| 559 |
+
# Untuk chat, kita bisa biarkan apa adanya atau gunakan user_message.
|
| 560 |
+
user_msg_for_t2t = request.user_message if request.user_message else structured_prompt
|
| 561 |
+
outputs = pipe(
|
| 562 |
+
user_msg_for_t2t[:256], # Batasi input untuk T5
|
| 563 |
+
max_length=model_config["max_tokens"], # Ini adalah max_length untuk output T5
|
| 564 |
+
temperature=0.65,
|
| 565 |
+
early_stopping=True,
|
| 566 |
+
truncation=True
|
| 567 |
+
)
|
| 568 |
+
generated_text = outputs[0]['generated_text']
|
| 569 |
|
| 570 |
+
if not generated_text or len(generated_text.strip()) < 1:
|
| 571 |
+
generated_text = "π€ Hmm, saya tidak yakin bagaimana merespon. Coba lagi dengan prompt berbeda?"
|
| 572 |
+
elif len(generated_text) > 250: # Batas akhir output
|
| 573 |
+
generated_text = generated_text[:247] + "..."
|
|
|
|
| 574 |
|
| 575 |
+
processing_time_ms = round((time.time() - start_time) * 1000)
|
| 576 |
|
| 577 |
return {
|
| 578 |
+
"response": generated_text,
|
| 579 |
"model": model_config["name"],
|
| 580 |
"status": "success",
|
| 581 |
+
"processing_time": f"{processing_time_ms}ms"
|
| 582 |
}
|
| 583 |
|
| 584 |
except Exception as e:
|
| 585 |
print(f"β CPU Error: {e}")
|
| 586 |
+
import traceback
|
| 587 |
+
traceback.print_exc() # Print full traceback for debugging
|
| 588 |
+
processing_time_ms = round((time.time() - start_time) * 1000)
|
| 589 |
|
|
|
|
| 590 |
fallback_responses = [
|
| 591 |
+
"π Maaf, ada sedikit gangguan. Coba lagi dengan kata yang lebih simpel?",
|
| 592 |
+
"π Hmm, sepertinya saya butuh istirahat sejenak. Mungkin pertanyaan lain?",
|
| 593 |
+
"β‘ Model sedang dioptimalkan, tunggu sebentar dan coba lagi...",
|
| 594 |
+
"π Mungkin coba model lain yang lebih cepat atau prompt yang berbeda?"
|
| 595 |
]
|
| 596 |
|
|
|
|
| 597 |
fallback = random.choice(fallback_responses)
|
| 598 |
|
| 599 |
return {
|
| 600 |
+
"response": f"{fallback} (Error: {str(e)[:100]})", # Beri sedikit info error
|
| 601 |
"status": "error",
|
| 602 |
+
"model": MODELS.get(model_id, {"name": "Unknown"})["name"] if 'model_id' in locals() else "Unknown",
|
| 603 |
+
"processing_time": f"{processing_time_ms}ms"
|
| 604 |
}
|
| 605 |
|
| 606 |
+
# Optimized inference endpoint (TIDAK DIPERBARUI SECARA RINCI untuk prompt mode baru,
|
| 607 |
+
# karena fokus utama adalah pada /chat dan frontendnya. Jika /inference juga perlu prompt mode,
|
| 608 |
+
# ia harus mengkonstruksi ChatRequest serupa.)
|
| 609 |
@app.post("/inference")
|
| 610 |
async def inference(request: dict):
|
| 611 |
+
"""CPU-Optimized inference endpoint - MUNGKIN PERLU PENYESUAIAN UNTUK PROMPT MODE"""
|
| 612 |
try:
|
| 613 |
+
# Untuk prompt mode, 'message' harus menjadi prompt terstruktur lengkap
|
| 614 |
+
# Atau endpoint ini harus diubah untuk menerima 'situasi', 'latar', 'user_message'
|
| 615 |
+
message = request.get("message", "")
|
| 616 |
+
model_id_from_request = request.get("model", "distil-gpt-2") # Harusnya model_id internal
|
| 617 |
+
|
| 618 |
+
# Jika yang diberikan adalah model path, coba map ke model_id internal
|
| 619 |
+
if "/" in model_id_from_request:
|
| 620 |
+
model_key_from_path = model_id_from_request.split("/")[-1].lower()
|
| 621 |
+
model_mapping = { "distil_gpt-2": "distil-gpt-2", "gpt-2-tinny": "gpt-2-tinny", /* ... (tambahkan semua mapping) ... */ }
|
| 622 |
+
internal_model = model_mapping.get(model_key_from_path, "distil-gpt-2")
|
| 623 |
+
else: # Asumsikan sudah model_id internal
|
| 624 |
+
internal_model = model_id_from_request
|
| 625 |
+
|
| 626 |
+
# Jika /inference perlu mendukung prompt mode, data yang dikirim ke ChatRequest harus disesuaikan
|
| 627 |
+
# Untuk contoh ini, kita asumsikan 'message' adalah user_message saja untuk /inference
|
| 628 |
+
# dan situasi/latar default atau tidak digunakan.
|
| 629 |
+
# Ini adalah penyederhanaan dan mungkin perlu diubah sesuai kebutuhan.
|
| 630 |
+
chat_req_data = {
|
| 631 |
+
"message": f"{{User}}: {message}\n{{Char}}:", # Bentuk prompt paling sederhana
|
| 632 |
+
"model": internal_model,
|
| 633 |
+
"user_message": message # Simpan pesan user asli
|
| 634 |
}
|
| 635 |
+
|
| 636 |
+
chat_request_obj = ChatRequest(**chat_req_data)
|
| 637 |
+
result = await chat(chat_request_obj)
|
|
|
|
|
|
|
|
|
|
| 638 |
|
| 639 |
return {
|
| 640 |
+
"result": result.get("response"),
|
| 641 |
+
"status": result.get("status"),
|
| 642 |
+
"model_used": result.get("model"),
|
| 643 |
"processing_time": result.get("processing_time", "0ms")
|
| 644 |
}
|
| 645 |
|
| 646 |
except Exception as e:
|
| 647 |
print(f"β Inference Error: {e}")
|
| 648 |
return {
|
| 649 |
+
"result": "π Terjadi kesalahan pada endpoint inference. Coba lagi...",
|
| 650 |
"status": "error"
|
| 651 |
}
|
| 652 |
|
| 653 |
# Lightweight health check
|
| 654 |
@app.get("/health")
|
| 655 |
async def health():
|
| 656 |
+
loaded_models_count = len(app.state.pipelines) if hasattr(app.state, 'pipelines') else 0
|
| 657 |
return {
|
| 658 |
"status": "healthy",
|
| 659 |
"platform": "CPU",
|
| 660 |
+
"loaded_models": loaded_models_count,
|
| 661 |
"total_models": len(MODELS),
|
| 662 |
+
"optimization": "CPU-Tuned (Prompt Mode)"
|
| 663 |
}
|
| 664 |
|
| 665 |
# Model info endpoint
|
| 666 |
@app.get("/models")
|
| 667 |
+
async def get_models_info(): # Mengganti nama fungsi
|
| 668 |
return {
|
| 669 |
"models": [
|
| 670 |
{
|
| 671 |
+
"id": k, "name": v["name"], "task": v["task"],
|
| 672 |
+
"max_tokens_generate": v["max_tokens"], "priority": v["priority"],
|
|
|
|
|
|
|
|
|
|
| 673 |
"cpu_optimized": True
|
| 674 |
}
|
| 675 |
for k, v in MODELS.items()
|
| 676 |
],
|
| 677 |
"platform": "CPU",
|
| 678 |
+
"recommended_for_prompting": ["distil-gpt-2", "gpt-2-tinny", "tinny-llama", "gpt-neo", "pythia", "gpt-2"]
|
| 679 |
}
|
| 680 |
|
| 681 |
# Run with CPU optimizations
|
| 682 |
if __name__ == "__main__":
|
| 683 |
port = int(os.environ.get("PORT", 7860))
|
| 684 |
+
# Gunakan reload=True untuk pengembangan agar perubahan kode langsung terlihat
|
| 685 |
+
# Matikan reload untuk produksi
|
| 686 |
+
# uvicorn.run("app:app", host="0.0.0.0", port=port, workers=1, reload=True)
|
| 687 |
uvicorn.run(
|
| 688 |
app,
|
| 689 |
host="0.0.0.0",
|
| 690 |
port=port,
|
| 691 |
+
workers=1,
|
| 692 |
+
timeout_keep_alive=30, # Default FastAPI 5 detik, mungkin terlalu pendek untuk loading model
|
| 693 |
+
access_log=False
|
| 694 |
)
|