Dyraa18 commited on
Commit
20de58b
·
verified ·
1 Parent(s): 2853e42

Upload 7 files

Browse files
Files changed (7) hide show
  1. Guardrail.py +35 -0
  2. Model.py +13 -0
  3. app.py +676 -0
  4. app.sh +4 -0
  5. prepare_assets.py +40 -0
  6. requerments.txt +18 -0
  7. runtime.txt +1 -0
Guardrail.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Guardrail.py
2
+ import warnings
3
+ warnings.filterwarnings("ignore")
4
+ from functools import lru_cache
5
+
6
+ from transformers import logging as hf_logging
7
+ hf_logging.set_verbosity_error()
8
+ from transformers import pipeline
9
+
10
+ SAFE_LABELS = ["pertanyaan sejarah", "pertanyaan olahraga", "pertanyaan alam"]
11
+ UNSAFE_LABELS = ["kasar", "penghinaan", "berbahaya"]
12
+
13
+ @lru_cache(maxsize=1)
14
+ def _clf():
15
+ # device=-1 => CPU, model otomatis pakai cache dari prepare_assets.py
16
+ return pipeline("zero-shot-classification",
17
+ model="joeddav/xlm-roberta-large-xnli",
18
+ device=-1)
19
+
20
+ def classify_text(text: str):
21
+ clf = _clf()
22
+ labels = SAFE_LABELS + UNSAFE_LABELS
23
+ res = clf(text, candidate_labels=labels)
24
+ scores = dict(zip(res["labels"], res["scores"]))
25
+ return res["labels"][0], res["scores"][0], scores
26
+
27
+ def validate_input(text: str, threshold: float = 0.2) -> bool:
28
+ text = (text or "").strip()
29
+ if not text:
30
+ return False
31
+ top_label, top_score, _ = classify_text(text)
32
+ return bool(top_label in SAFE_LABELS and top_score > threshold)
33
+
34
+ if __name__ == "__main__":
35
+ print(validate_input("kapan belanda menjajah indonesia?"))
Model.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model.py
2
+ import os
3
+ from llama_cpp import Llama
4
+
5
+ def load_model(path, n_ctx=2048, n_gpu_layers=0, n_threads=None):
6
+ if n_threads is None:
7
+ n_threads = int(os.environ.get("NUM_THREADS", "4"))
8
+ return Llama(model_path=path, n_ctx=n_ctx, n_gpu_layers=n_gpu_layers, n_threads=n_threads)
9
+
10
+ def generate(llm, prompt, max_tokens=384, temperature=0.2, top_p=0.9, stop=None):
11
+ stop = stop or []
12
+ out = llm(prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p, stop=stop)
13
+ return out["choices"][0]["text"].strip()
app.py ADDED
@@ -0,0 +1,676 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ # Flask RAG app (HF Spaces / Static) — dataset sudah ada di Space.
3
+ import os, json, re, time, logging
4
+ from functools import lru_cache
5
+ from typing import Dict, List, Tuple
6
+ from dataclasses import dataclass
7
+ from datetime import datetime
8
+ from zoneinfo import ZoneInfo
9
+ from pathlib import Path
10
+
11
+ from flask import Flask, render_template, request, redirect, url_for, session, jsonify, flash
12
+ import numpy as np
13
+ import faiss
14
+ import torch
15
+ from transformers import AutoTokenizer, AutoModel
16
+ from dotenv import load_dotenv
17
+
18
+ load_dotenv()
19
+
20
+ # ========= ENV & LOGGING =========
21
+ os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE")
22
+ os.environ.setdefault("OMP_NUM_THREADS", "1")
23
+ torch.set_num_threads(1)
24
+ torch.set_num_interop_threads(1)
25
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
26
+ log = logging.getLogger("rag-app")
27
+
28
+ # ========= IMPORT EKSTERNAL =========
29
+ from Guardrail import validate_input # -> bool (lazy di file)
30
+ from Model import load_model, generate # -> llama.cpp wrapper
31
+
32
+ # ========= PATH ROOT PROYEK =========
33
+ BASE_DIR = Path(__file__).resolve().parent
34
+
35
+ # ========= KONFIGURASI RAG =========
36
+ MODEL_PATH = str(BASE_DIR / "models" / os.getenv("GGUF_FILENAME", "DeepSeek-R1-0528-Qwen3-8B-Q4_K_M.gguf"))
37
+ CTX_WINDOW = 4096
38
+ N_GPU_LAYERS = 0 # HF Spaces CPU only
39
+ N_THREADS = int(os.environ.get("NUM_THREADS", "4"))
40
+
41
+ ENCODER_NAME = "intfloat/multilingual-e5-large"
42
+ ENCODER_DEVICE = torch.device("cpu")
43
+
44
+ # Dataset sudah ada di Space → path RELATIF
45
+ SUBJECTS: Dict[str, Dict[str, str]] = {
46
+ "ipas": {
47
+ "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Ipas" / "IPA_index.index"),
48
+ "chunks": str(BASE_DIR / "Dataset" / "Ipas" / "Chunk" / "ipas_chunks.json"),
49
+ "embeddings": str(BASE_DIR / "Dataset" / "Ipas" / "Embedd"/ "ipas_embeddings.npy"),
50
+ "label": "IPAS",
51
+ "desc": "Ilmu Pengetahuan Alam dan Sosial"
52
+ },
53
+ "penjas": {
54
+ "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Penjas" / "PENJAS_index.index"),
55
+ "chunks": str(BASE_DIR / "Dataset" / "Penjas" / "Chunk" / "penjas_chunks.json"),
56
+ "embeddings": str(BASE_DIR / "Dataset" / "Penjas" / "Embedd" / "penjas_embeddings.npy"),
57
+ "label": "PJOK",
58
+ "desc": "Pendidikan Jasmani, Olahraga, dan Kesehatan"
59
+ },
60
+ "pancasila": {
61
+ "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Pancasila" / "PANCASILA_index.index"),
62
+ "chunks": str(BASE_DIR / "Dataset" / "Pancasila" / "Chunk" / "pancasila_chunks.json"),
63
+ "embeddings": str(BASE_DIR / "Dataset" / "Pancasila" / "Embedd" / "pancasila_embeddings.npy"),
64
+ "label": "PANCASILA",
65
+ "desc": "Pendidikan Pancasila dan Kewarganegaraan"
66
+ }
67
+ }
68
+
69
+ # Threshold dan fallback
70
+ TOP_K_FAISS = 24
71
+ TOP_K_FINAL = 10
72
+ MIN_COSINE = 0.84
73
+ MIN_HYBRID = 0.15
74
+
75
+ FALLBACK_TEXT = "maap pengetahuan tidak ada dalam database"
76
+ GUARDRAIL_BLOCK_TEXT = "maap, pertanyaan ditolak oleh guardrail"
77
+ ENABLE_PROFILING = False
78
+
79
+ # ========= APP =========
80
+ app = Flask(__name__)
81
+ app.secret_key = os.environ.get("FLASK_SECRET_KEY", "dev-secret-please-change")
82
+
83
+ # ========= GLOBAL MODEL =========
84
+ ENCODER_TOKENIZER = None
85
+ ENCODER_MODEL = None
86
+ LLM = None
87
+
88
+ @dataclass(frozen=True)
89
+ class SubjectAssets:
90
+ index: faiss.Index
91
+ texts: List[str]
92
+ embs: np.ndarray
93
+
94
+ # ========= TEKS UTILITAS =========
95
+ STOPWORDS_ID = {
96
+ "yang","dan","atau","pada","di","ke","dari","itu","ini","adalah","dengan",
97
+ "untuk","serta","sebagai","oleh","dalam","akan","kamu","apa","karena",
98
+ "agar","sehingga","terhadap","dapat","juga","para","diri",
99
+ }
100
+ TOKEN_RE = re.compile(r"[A-Za-zÀ-ÖØ-öø-ÿ]+", re.UNICODE)
101
+ def tok_id(text: str) -> List[str]:
102
+ return [t.lower() for t in TOKEN_RE.findall(text or "") if t.lower() not in STOPWORDS_ID]
103
+ def lexical_overlap(query: str, sent: str) -> float:
104
+ q = set(tok_id(query)); s = set(tok_id(sent))
105
+ if not q or not s: return 0.0
106
+ return len(q & s) / max(1, len(q | s))
107
+
108
+ QUESTION_LIKE_RE = re.compile(r"(^\s*(apa|mengapa|bagaimana|sebutkan|jelaskan)\b|[?]$)", re.IGNORECASE)
109
+ INSTRUCTION_RE = re.compile(r"\b(jelaskan|sebutkan|uraikan|kerjakan|diskusikan|tugas|latihan|menurut\s+pendapatmu)\b", re.IGNORECASE)
110
+ META_PREFIX_PATTERNS = [
111
+ r"berdasarkan\s+(?:kalimat|sumber|teks|konten|informasi)(?:\s+(?:di\s+atas|tersebut))?",
112
+ r"menurut\s+(?:sumber|teks|konten)",
113
+ r"merujuk\s+pada",
114
+ r"mengacu\s+pada",
115
+ r"bersumber\s+dari",
116
+ r"dari\s+(?:kalimat|sumber|teks|konten)"
117
+ ]
118
+ META_PREFIX_RE = re.compile(r"^\s*(?:" + r"|".join(META_PREFIX_PATTERNS) + r")\s*[:\-–—,]?\s*", re.IGNORECASE)
119
+
120
+ def clean_prefix(t: str) -> str:
121
+ t = (t or "").strip()
122
+ for _ in range(5):
123
+ t2 = META_PREFIX_RE.sub("", t).lstrip()
124
+ if t2 == t: break
125
+ t = t2
126
+ return t
127
+
128
+ def strip_meta_sentence(s: str) -> str:
129
+ s = clean_prefix(s or "")
130
+ if re.match(r"^\s*(berdasarkan|menurut|merujuk|mengacu|bersumber|dari)\b", s, re.IGNORECASE):
131
+ s = re.sub(r"^\s*[^,.;!?]*[,.;!?]\s*", "", s) or s
132
+ s = clean_prefix(s)
133
+ return s.strip()
134
+
135
+ SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+")
136
+ def split_sentences(text: str) -> List[str]:
137
+ outs = []
138
+ for p in SENT_SPLIT_RE.split(text or ""):
139
+ s = clean_prefix((p or "").strip())
140
+ if not s: continue
141
+ if s[-1] not in ".!?": s += "."
142
+ if QUESTION_LIKE_RE.search(s): continue
143
+ if INSTRUCTION_RE.search(s): continue
144
+ if len(s.strip()) < 10: continue
145
+ outs.append(s)
146
+ return outs
147
+
148
+ # ========= MODEL WARMUP (LAZY) =========
149
+ def warmup_models():
150
+ global ENCODER_TOKENIZER, ENCODER_MODEL, LLM
151
+ if ENCODER_TOKENIZER is None or ENCODER_MODEL is None:
152
+ log.info(f"[INIT] Load encoder: {ENCODER_NAME} (CPU)")
153
+ ENCODER_TOKENIZER = AutoTokenizer.from_pretrained(ENCODER_NAME)
154
+ ENCODER_MODEL = AutoModel.from_pretrained(ENCODER_NAME).to(ENCODER_DEVICE).eval()
155
+ if LLM is None:
156
+ log.info(f"[INIT] Load LLM: {MODEL_PATH}")
157
+ LLM = load_model(MODEL_PATH, n_ctx=CTX_WINDOW, n_gpu_layers=N_GPU_LAYERS, n_threads=N_THREADS)
158
+
159
+ # ========= LOAD ASSETS PER-MAPEL =========
160
+ @lru_cache(maxsize=8)
161
+ def load_subject_assets(subject_key: str) -> SubjectAssets:
162
+ if subject_key not in SUBJECTS:
163
+ raise ValueError(f"Unknown subject: {subject_key}")
164
+ cfg = SUBJECTS[subject_key]
165
+ log.info(f"[ASSETS] Loading subject={subject_key} | index={cfg['index']}")
166
+ if not os.path.exists(cfg["index"]): raise FileNotFoundError(cfg["index"])
167
+ if not os.path.exists(cfg["chunks"]): raise FileNotFoundError(cfg["chunks"])
168
+ if not os.path.exists(cfg["embeddings"]): raise FileNotFoundError(cfg["embeddings"])
169
+
170
+ index = faiss.read_index(cfg["index"])
171
+ with open(cfg["chunks"], "r", encoding="utf-8") as f:
172
+ texts = [it["text"] for it in json.load(f)]
173
+ embs = np.load(cfg["embeddings"])
174
+ if index.ntotal != len(embs):
175
+ raise RuntimeError(f"Mismatch ntotal({index.ntotal}) vs emb({len(embs)})")
176
+
177
+ return SubjectAssets(index=index, texts=texts, embs=embs)
178
+
179
+ # ========= ENCODER & RETRIEVAL =========
180
+ @torch.inference_mode()
181
+ def encode_query_exact(text: str) -> np.ndarray:
182
+ toks = ENCODER_TOKENIZER(text, padding=True, truncation=True, return_tensors="pt").to(ENCODER_DEVICE)
183
+ out = ENCODER_MODEL(**toks)
184
+ vec = out.last_hidden_state.mean(dim=1)
185
+ return vec.cpu().numpy()
186
+
187
+ def cosine_sim(a: np.ndarray, b: np.ndarray) -> float:
188
+ a = np.asarray(a).reshape(-1); b = np.asarray(b).reshape(-1)
189
+ return float(np.dot(a, b) / ((np.linalg.norm(a) * np.linalg.norm(b)) + 1e-12))
190
+
191
+ def best_cosine_from_faiss(query: str, subject_key: str) -> float:
192
+ assets = load_subject_assets(subject_key)
193
+ q = encode_query_exact(query)
194
+ _, I = assets.index.search(q, TOP_K_FAISS)
195
+ qv = q.reshape(-1)
196
+ best = -1.0
197
+ for i in I[0]:
198
+ if 0 <= i < len(assets.texts):
199
+ best = max(best, cosine_sim(qv, assets.embs[i]))
200
+ return best
201
+
202
+ def retrieve_rerank_cosine(query: str, subject_key: str) -> List[str]:
203
+ assets = load_subject_assets(subject_key)
204
+ q = encode_query_exact(query)
205
+ D, idx = assets.index.search(q, TOP_K_FAISS)
206
+ idxs = [i for i in idx[0] if 0 <= i < len(assets.texts)]
207
+ if not idxs:
208
+ return []
209
+ qv = q.reshape(-1)
210
+ scores = [cosine_sim(qv, assets.embs[i]) for i in idxs]
211
+ pairs = sorted(zip(scores, idxs), reverse=True)
212
+ top_texts = [assets.texts[i] for _, i in pairs[:TOP_K_FINAL]]
213
+ log.info(f"[RETRIEVE] subject={subject_key} | top={len(top_texts)}")
214
+ return top_texts
215
+
216
+ def pick_best_sentences(query: str, chunks: List[str], top_k: int = 5) -> List[str]:
217
+ if not chunks: return []
218
+ qv = encode_query_exact(query).reshape(-1)
219
+ cands: List[Tuple[float, str]] = []
220
+ for ch in chunks:
221
+ for s in split_sentences(ch):
222
+ sv = encode_query_exact(s).reshape(-1)
223
+ cos = cosine_sim(qv, sv)
224
+ ovl = lexical_overlap(query, s)
225
+ penalty = 0.1 if len(s) < 50 else 0.0
226
+ score = 0.7 * cos + 0.3 * ovl - penalty
227
+ if score >= MIN_HYBRID:
228
+ cands.append((score, s))
229
+ cands.sort(key=lambda x: x[0], reverse=True)
230
+ return [s for _, s in cands[:top_k]]
231
+
232
+ def build_prompt(user_query: str, sentences: List[str]) -> str:
233
+ block = "\n".join(f"- {clean_prefix(s)}" for s in sentences)
234
+ system = (
235
+ "- Gunakan HANYA daftar kalimat fakta berikut sebagai sumber.\n"
236
+ "- Jika tidak ada kalimat yang menjawab, balas: maap pengetahuan tidak ada dalam database\n"
237
+ "- Jawab TEPAT 1 kalimat, ringkas, Bahasa Indonesia baku.\n"
238
+ "- DILARANG menulis frasa meta seperti 'berdasarkan', 'menurut', 'merujuk', atau 'bersumber'."
239
+ )
240
+ return f"""{system}
241
+
242
+ KALIMAT SUMBER:
243
+ {block}
244
+
245
+ PERTANYAAN:
246
+ {user_query}
247
+
248
+ JAWAB (1 kalimat saja):
249
+ """
250
+
251
+ @lru_cache(maxsize=512)
252
+ def validate_input_cached(q: str) -> bool:
253
+ try:
254
+ return validate_input(q)
255
+ except Exception as e:
256
+ log.exception(f"[GUARDRAIL] error: {e}")
257
+ return False
258
+
259
+ # ========= AUTH (POSTGRES) =========
260
+ from werkzeug.security import generate_password_hash, check_password_hash
261
+ from sqlalchemy import create_engine, Column, Integer, String, Text, Boolean, func, or_
262
+ from sqlalchemy.orm import sessionmaker, scoped_session, declarative_base
263
+
264
+ POSTGRES_URL = os.environ.get("POSTGRES_URL")
265
+ if not POSTGRES_URL:
266
+ raise RuntimeError("POSTGRES_URL tidak ditemukan. Set di Settings → Variables.")
267
+
268
+ engine = create_engine(POSTGRES_URL, pool_pre_ping=True, future=True, echo=False)
269
+ SessionLocal = scoped_session(sessionmaker(bind=engine, autoflush=False, autocommit=False, future=True))
270
+ Base = declarative_base()
271
+
272
+ class User(Base):
273
+ __tablename__ = "users"
274
+ id = Column(Integer, primary_key=True)
275
+ username = Column(String(50), unique=True, nullable=False, index=True)
276
+ email = Column(String(120), unique=True, nullable=False, index=True)
277
+ password = Column(Text, nullable=False)
278
+ is_active = Column(Boolean, default=True, nullable=False)
279
+ is_admin = Column(Boolean, default=False, nullable=False)
280
+
281
+ class ChatHistory(Base):
282
+ __tablename__ = "chat_history"
283
+ id = Column(Integer, primary_key=True)
284
+ user_id = Column(Integer, nullable=False, index=True)
285
+ subject_key = Column(String(50), nullable=False, index=True)
286
+ role = Column(String(10), nullable=False)
287
+ message = Column(Text, nullable=False)
288
+ timestamp = Column(Integer, server_default=func.extract("epoch", func.now()))
289
+
290
+ Base.metadata.create_all(bind=engine)
291
+
292
+ JKT_TZ = ZoneInfo("Asia/Jakarta")
293
+ @app.template_filter("fmt_ts")
294
+ def fmt_ts(epoch_int: int):
295
+ try:
296
+ dt = datetime.fromtimestamp(int(epoch_int), tz=JKT_TZ)
297
+ return dt.strftime("%d %b %Y %H:%M")
298
+ except Exception:
299
+ return "-"
300
+
301
+ def db():
302
+ return SessionLocal()
303
+
304
+ def login_required(view_func):
305
+ def wrapper(*args, **kwargs):
306
+ if not session.get("logged_in"):
307
+ return redirect(url_for("auth_login"))
308
+ return view_func(*args, **kwargs)
309
+ wrapper.__name__ = view_func.__name__
310
+ return wrapper
311
+
312
+ def admin_required(view_func):
313
+ def wrapper(*args, **kwargs):
314
+ if not session.get("logged_in"):
315
+ return redirect(url_for("auth_login"))
316
+ if not session.get("is_admin"):
317
+ flash("Hanya admin yang boleh mengakses halaman itu.", "error")
318
+ return redirect(url_for("subjects"))
319
+ return view_func(*args, **kwargs)
320
+ wrapper.__name__ = view_func.__name__
321
+ return wrapper
322
+
323
+
324
+ # ========= ROUTES =========
325
+ @app.route("/")
326
+ def root():
327
+ return redirect(url_for("auth_login"))
328
+
329
+ @app.route("/auth/login", methods=["GET", "POST"])
330
+ def auth_login():
331
+ if request.method == "POST":
332
+ identity = (request.form.get("identity") or "").strip().lower()
333
+ pw_input = (request.form.get("password") or "").strip()
334
+ if not identity or not pw_input:
335
+ flash("Mohon isi email/username dan password.", "error")
336
+ return render_template("login.html"), 400
337
+ s = db()
338
+ try:
339
+ user = (
340
+ s.query(User)
341
+ .filter(or_(func.lower(User.username) == identity,
342
+ func.lower(User.email) == identity))
343
+ .first()
344
+ )
345
+ ok = bool(user and user.is_active and check_password_hash(user.password, pw_input))
346
+ finally:
347
+ s.close()
348
+ if not ok:
349
+ flash("Identitas atau password salah.", "error")
350
+ return render_template("login.html"), 401
351
+ session["logged_in"] = True
352
+ session["user_id"] = user.id
353
+ session["username"] = user.username
354
+ session["is_admin"] = bool(user.is_admin)
355
+ return redirect(url_for("subjects"))
356
+ return render_template("login.html")
357
+
358
+ @app.route("/auth/register", methods=["GET", "POST"])
359
+ def auth_register():
360
+ if request.method == "POST":
361
+ username = (request.form.get("username") or "").strip().lower()
362
+ email = (request.form.get("email") or "").strip().lower()
363
+ pw = (request.form.get("password") or "").strip()
364
+ confirm = (request.form.get("confirm") or "").strip()
365
+ if not username or not email or not pw:
366
+ flash("Semua field wajib diisi.", "error")
367
+ return render_template("register.html"), 400
368
+ if len(pw) < 6:
369
+ flash("Password minimal 6 karakter.", "error")
370
+ return render_template("register.html"), 400
371
+ if pw != confirm:
372
+ flash("Konfirmasi password tidak cocok.", "error")
373
+ return render_template("register.html"), 400
374
+ s = db()
375
+ try:
376
+ existed = (
377
+ s.query(User)
378
+ .filter(or_(func.lower(User.username) == username,
379
+ func.lower(User.email) == email))
380
+ .first()
381
+ )
382
+ if existed:
383
+ flash("Username/Email sudah terpakai.", "error")
384
+ return render_template("register.html"), 409
385
+ u = User(username=username, email=email, password=generate_password_hash(pw), is_active=True)
386
+ s.add(u); s.commit()
387
+ finally:
388
+ s.close()
389
+ flash("Registrasi berhasil. Silakan login.", "success")
390
+ return redirect(url_for("auth_login"))
391
+ return render_template("register.html")
392
+
393
+ @app.route("/auth/logout")
394
+ def auth_logout():
395
+ session.clear()
396
+ return redirect(url_for("auth_login"))
397
+
398
+ @app.route("/about")
399
+ def about():
400
+ return render_template("about.html")
401
+
402
+ @app.route("/subjects")
403
+ @login_required
404
+ def subjects():
405
+ return render_template("home.html", subjects=SUBJECTS)
406
+
407
+ @app.route("/chat/<subject_key>")
408
+ @login_required
409
+ def chat_subject(subject_key: str):
410
+ if subject_key not in SUBJECTS:
411
+ return redirect(url_for("subjects"))
412
+ session["subject_selected"] = subject_key
413
+ label = SUBJECTS[subject_key]["label"]
414
+
415
+ s = db()
416
+ try:
417
+ uid = session.get("user_id")
418
+ rows = (
419
+ s.query(ChatHistory)
420
+ .filter_by(user_id=uid, subject_key=subject_key)
421
+ .order_by(ChatHistory.id.asc())
422
+ .all()
423
+ )
424
+ history = [{"role": r.role, "message": r.message} for r in rows]
425
+ finally:
426
+ s.close()
427
+
428
+ return render_template("chat.html", subject=subject_key, subject_label=label, history=history)
429
+
430
+ @app.route("/health")
431
+ def health():
432
+ return jsonify({"ok": True, "encoder_loaded": ENCODER_MODEL is not None, "llm_loaded": LLM is not None})
433
+
434
+ @app.route("/ask/<subject_key>", methods=["POST"])
435
+ @login_required
436
+ def ask(subject_key: str):
437
+ if subject_key not in SUBJECTS:
438
+ return jsonify({"ok": False, "error": "invalid subject"}), 400
439
+
440
+ # pastikan model siap saat request (lazy)
441
+ warmup_models()
442
+
443
+ t0 = time.perf_counter()
444
+ data = request.get_json(silent=True) or {}
445
+ query = (data.get("message") or "").strip()
446
+
447
+ if not query:
448
+ return jsonify({"ok": False, "error": "empty query"}), 400
449
+ if not validate_input_cached(query):
450
+ return jsonify({"ok": True, "answer": GUARDRAIL_BLOCK_TEXT})
451
+
452
+ try:
453
+ _ = load_subject_assets(subject_key)
454
+ except Exception as e:
455
+ log.exception(f"[ASSETS] error: {e}")
456
+ return jsonify({"ok": False, "error": f"subject assets error: {e}"}), 500
457
+
458
+ best = best_cosine_from_faiss(query, subject_key)
459
+ log.info(f"[RAG] Subject={subject_key.upper()} | Best cosine={best:.3f}")
460
+ if best < MIN_COSINE:
461
+ return jsonify({"ok": True, "answer": FALLBACK_TEXT})
462
+
463
+ chunks = retrieve_rerank_cosine(query, subject_key)
464
+ if not chunks:
465
+ return jsonify({"ok": True, "answer": FALLBACK_TEXT})
466
+ sentences = pick_best_sentences(query, chunks, top_k=5)
467
+ if not sentences:
468
+ return jsonify({"ok": True, "answer": FALLBACK_TEXT})
469
+
470
+ prompt = build_prompt(query, sentences)
471
+
472
+ try:
473
+ answer = generate(
474
+ LLM, prompt,
475
+ max_tokens=64, temperature=0.2, top_p=1.0,
476
+ stop=["\n\n", "\n###", "###", "\nUser:",
477
+ "Berdasarkan", "berdasarkan", "Menurut", "menurut",
478
+ "Merujuk", "merujuk", "Mengacu", "mengacu", "Bersumber", "bersumber"]
479
+ ).strip()
480
+ except Exception as e:
481
+ log.exception(f"[LLM] generate error: {e}")
482
+ return jsonify({"ok": True, "answer": FALLBACK_TEXT})
483
+
484
+ m = re.search(r"(.+?[.!?])(\s|$)", answer)
485
+ answer = (m.group(1) if m else answer).strip()
486
+ answer = strip_meta_sentence(answer)
487
+
488
+ # === Simpan ke history ===
489
+ try:
490
+ s = db()
491
+ uid = session.get("user_id")
492
+ s.add_all([
493
+ ChatHistory(user_id=uid, subject_key=subject_key, role="user", message=query),
494
+ ChatHistory(user_id=uid, subject_key=subject_key, role="bot", message=answer)
495
+ ])
496
+ s.commit()
497
+ except Exception as e:
498
+ log.exception(f"[DB] gagal simpan chat history: {e}")
499
+ finally:
500
+ s.close()
501
+
502
+ if not answer or len(answer) < 2:
503
+ answer = FALLBACK_TEXT
504
+
505
+ if ENABLE_PROFILING:
506
+ log.info({"latency_total": time.perf_counter() - t0, "subject": subject_key, "faiss_best": best})
507
+
508
+ return jsonify({"ok": True, "answer": answer})
509
+
510
+ # ===== Admin views & delete actions (tetap) =====
511
+ from sqlalchemy.orm import Session
512
+ @app.route("/admin")
513
+ @admin_required
514
+ def admin_dashboard():
515
+ s = db()
516
+ try:
517
+ total_users = s.query(func.count(User.id)).scalar() or 0
518
+ total_active = s.query(func.count(User.id)).filter(User.is_active.is_(True)).scalar() or 0
519
+ total_admins = s.query(func.count(User.id)).filter(User.is_admin.is_(True)).scalar() or 0
520
+ total_msgs = s.query(func.count(ChatHistory.id)).scalar() or 0
521
+ finally:
522
+ s.close()
523
+ return render_template("admin_dashboard.html",
524
+ total_users=total_users,
525
+ total_active=total_active,
526
+ total_admins=total_admins,
527
+ total_msgs=total_msgs)
528
+
529
+ @app.route("/admin/users")
530
+ @admin_required
531
+ def admin_users():
532
+ q = (request.args.get("q") or "").strip().lower()
533
+ page = max(int(request.args.get("page", 1)), 1)
534
+ per_page = min(max(int(request.args.get("per_page", 20)), 5), 100)
535
+ s = db()
536
+ try:
537
+ base = s.query(User)
538
+ if q:
539
+ base = base.filter(or_(
540
+ func.lower(User.username).like(f"%{q}%"),
541
+ func.lower(User.email).like(f"%{q}%")
542
+ ))
543
+ total = base.count()
544
+ users = (base
545
+ .order_by(User.id.asc())
546
+ .offset((page - 1) * per_page)
547
+ .limit(per_page)
548
+ .all())
549
+ user_ids = [u.id for u in users] or [-1]
550
+ counts = dict(s.query(ChatHistory.user_id, func.count(ChatHistory.id))
551
+ .filter(ChatHistory.user_id.in_(user_ids))
552
+ .group_by(ChatHistory.user_id)
553
+ .all())
554
+ finally:
555
+ s.close()
556
+ return render_template("admin_users.html",
557
+ users=users, counts=counts,
558
+ q=q, page=page, per_page=per_page, total=total)
559
+
560
+ @app.route("/admin/history")
561
+ @admin_required
562
+ def admin_history():
563
+ q = (request.args.get("q") or "").strip().lower()
564
+ username = (request.args.get("username") or "").strip().lower()
565
+ subject = (request.args.get("subject") or "").strip().lower()
566
+ role = (request.args.get("role") or "").strip().lower()
567
+ page = max(int(request.args.get("page", 1)), 1)
568
+ per_page = min(max(int(request.args.get("per_page", 30)), 5), 200)
569
+
570
+ s = db()
571
+ try:
572
+ base = (s.query(ChatHistory, User).join(User, User.id == ChatHistory.user_id))
573
+ if q:
574
+ base = base.filter(func.lower(ChatHistory.message).like(f"%{q}%"))
575
+ if username:
576
+ base = base.filter(or_(
577
+ func.lower(User.username) == username,
578
+ func.lower(User.email) == username
579
+ ))
580
+ if subject:
581
+ base = base.filter(func.lower(ChatHistory.subject_key) == subject)
582
+ if role in ("user", "bot"):
583
+ base = base.filter(ChatHistory.role == role)
584
+ total = base.count()
585
+ rows = (base.order_by(ChatHistory.id.desc())
586
+ .offset((page - 1) * per_page)
587
+ .limit(per_page)
588
+ .all())
589
+ finally:
590
+ s.close()
591
+
592
+ items = [{
593
+ "id": r.ChatHistory.id,
594
+ "username": r.User.username,
595
+ "email": r.User.email,
596
+ "subject": r.ChatHistory.subject_key,
597
+ "role": r.ChatHistory.role,
598
+ "message": r.ChatHistory.message,
599
+ "timestamp": r.ChatHistory.timestamp,
600
+ } for r in rows]
601
+
602
+ return render_template("admin_history.html",
603
+ items=items, subjects=SUBJECTS,
604
+ q=q, username=username, subject=subject, role=role,
605
+ page=page, per_page=per_page, total=total)
606
+
607
+ def _is_last_admin(s: Session) -> bool:
608
+ return (s.query(func.count(User.id)).filter(User.is_admin.is_(True)).scalar() or 0) <= 1
609
+
610
+ @app.route("/admin/users/<int:user_id>/delete", methods=["POST"])
611
+ @admin_required
612
+ def admin_delete_user(user_id: int):
613
+ s = db()
614
+ try:
615
+ me_id = session.get("user_id")
616
+ user = s.query(User).filter_by(id=user_id).first()
617
+ if not user:
618
+ flash("User tidak ditemukan.", "error")
619
+ return redirect(request.referrer or url_for("admin_users"))
620
+ if user.id == me_id:
621
+ flash("Tidak bisa menghapus akun yang sedang login.", "error")
622
+ return redirect(request.referrer or url_for("admin_users"))
623
+ if user.is_admin and _is_last_admin(s):
624
+ flash("Tidak bisa menghapus admin terakhir.", "error")
625
+ return redirect(request.referrer or url_for("admin_users"))
626
+ s.query(ChatHistory).filter(ChatHistory.user_id == user.id).delete(synchronize_session=False)
627
+ s.delete(user); s.commit()
628
+ flash(f"User #{user_id} beserta seluruh riwayatnya telah dihapus.", "success")
629
+ except Exception as e:
630
+ s.rollback(); log.exception(f"[ADMIN] delete user error: {e}")
631
+ flash("Gagal menghapus user.", "error")
632
+ finally:
633
+ s.close()
634
+ return redirect(request.referrer or url_for("admin_users"))
635
+
636
+ @app.route("/admin/users/<int:user_id>/history/clear", methods=["POST"])
637
+ @admin_required
638
+ def admin_clear_user_history(user_id: int):
639
+ s = db()
640
+ try:
641
+ exists = s.query(User.id).filter_by(id=user_id).first()
642
+ if not exists:
643
+ flash("User tidak ditemukan.", "error")
644
+ return redirect(request.referrer or url_for("admin_history"))
645
+ deleted = s.query(ChatHistory).filter(ChatHistory.user_id == user_id).delete(synchronize_session=False)
646
+ s.commit()
647
+ flash(f"Riwayat chat user #{user_id} dihapus ({deleted} baris).", "success")
648
+ except Exception as e:
649
+ s.rollback(); log.exception(f"[ADMIN] clear history error: {e}")
650
+ flash("Gagal menghapus riwayat.", "error")
651
+ finally:
652
+ s.close()
653
+ return redirect(request.referrer or url_for("admin_history"))
654
+
655
+ @app.route("/admin/history/<int:chat_id>/delete", methods=["POST"])
656
+ @admin_required
657
+ def admin_delete_chat(chat_id: int):
658
+ s = db()
659
+ try:
660
+ row = s.query(ChatHistory).filter_by(id=chat_id).first()
661
+ if not row:
662
+ flash("Baris riwayat tidak ditemukan.", "error")
663
+ return redirect(request.referrer or url_for("admin_history"))
664
+ s.delete(row); s.commit()
665
+ flash(f"Riwayat chat #{chat_id} dihapus.", "success")
666
+ except Exception as e:
667
+ s.rollback(); log.exception(f"[ADMIN] delete chat error: {e}")
668
+ flash("Gagal menghapus riwayat.", "error")
669
+ finally:
670
+ s.close()
671
+ return redirect(request.referrer or url_for("admin_history"))
672
+
673
+ # ========= ENTRY =========
674
+ if __name__ == "__main__":
675
+ port = int(os.environ.get("PORT", 7860))
676
+ app.run(host="0.0.0.0", port=port, debug=False)
app.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+ python prepare_assets.py
4
+ exec gunicorn app:app --workers 1 --threads 8 --timeout 180 --bind 0.0.0.0:$PORT
prepare_assets.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # prepare_assets.py
2
+ # Download ONLY: GGUF (llama.cpp) + prefetch guardrail XNLI
3
+ import os, shutil
4
+ from pathlib import Path
5
+ from huggingface_hub import hf_hub_download, snapshot_download
6
+
7
+ BASE = Path(__file__).resolve().parent
8
+ os.environ.setdefault("HF_HOME", str(BASE / ".hf-cache")) # cache lokal biar cepat restart
9
+
10
+ GGUF_REPO_ID = os.getenv("GGUF_REPO_ID", "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF")
11
+ GGUF_FILENAME = os.getenv("GGUF_FILENAME", "DeepSeek-R1-0528-Qwen3-8B-Q4_K_M.gguf")
12
+ XNLI_REPO_ID = os.getenv("XNLI_REPO_ID", "joeddav/xlm-roberta-large-xnli")
13
+
14
+ def ensure_dir(p: Path):
15
+ p.parent.mkdir(parents=True, exist_ok=True)
16
+
17
+ def main():
18
+ print("=== PREPARE_ASSETS start ===")
19
+
20
+ # 1) Download GGUF -> models/
21
+ try:
22
+ target = BASE / "models" / GGUF_FILENAME
23
+ ensure_dir(target)
24
+ local = hf_hub_download(repo_id=GGUF_REPO_ID, filename=GGUF_FILENAME, repo_type="model")
25
+ shutil.copy(local, target)
26
+ print(f"[OK] GGUF -> {target}")
27
+ except Exception as e:
28
+ print(f"[WARN] GGUF download gagal: {e}")
29
+
30
+ # 2) Prefetch guardrail model ke cache (biar pipeline cepat)
31
+ try:
32
+ snapshot_download(repo_id=XNLI_REPO_ID) # hanya ke cache
33
+ print(f"[OK] Prefetch guardrail: {XNLI_REPO_ID}")
34
+ except Exception as e:
35
+ print(f"[WARN] Prefetch XNLI gagal: {e}")
36
+
37
+ print("=== PREPARE_ASSETS done ===")
38
+
39
+ if __name__ == "__main__":
40
+ main()
requerments.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flask==3.0.3
2
+ jinja2==3.1.4
3
+ werkzeug==3.0.3
4
+ python-dotenv==1.0.1
5
+
6
+ sqlalchemy==2.0.36
7
+ psycopg2-binary==2.9.9
8
+
9
+ numpy==1.26.4
10
+ faiss-cpu==1.8.0
11
+ scikit-learn==1.5.2
12
+
13
+ torch==2.4.1
14
+ transformers==4.44.2
15
+ huggingface_hub==0.26.2
16
+
17
+ llama-cpp-python==0.3.4
18
+ gunicorn==21.2.0
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.11