Famazo commited on
Commit
4d8d6f6
·
1 Parent(s): 36b57cc

Upload 9 files

Browse files
backend/__pycache__/api.cpython-310.pyc ADDED
Binary file (2.77 kB). View file
 
backend/api.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+ import torch
6
+ import pandas as pd
7
+ from pathlib import Path
8
+
9
+ # === 1. Setup API dan CORS ===
10
+ app = FastAPI()
11
+ app.add_middleware(
12
+ CORSMiddleware,
13
+ allow_origins=["*"],
14
+ allow_credentials=True,
15
+ allow_methods=["*"],
16
+ allow_headers=["*"],
17
+ )
18
+
19
+ # === 2. Global Setup (Model, Tokenizer, Data) ===
20
+ BASE_DIR = Path(__file__).resolve().parent
21
+ MODEL_DIR = BASE_DIR / "bert_chatbot_model" # folder, bukan .onnx
22
+ DATASET_PATH = BASE_DIR / "dataset_chatbot_template.xlsx"
23
+
24
+ try:
25
+ tokenizer = AutoTokenizer.from_pretrained(str(MODEL_DIR))
26
+ model = AutoModelForSequenceClassification.from_pretrained(str(MODEL_DIR))
27
+ df_jawaban = pd.read_excel(DATASET_PATH)
28
+
29
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30
+ model.to(device)
31
+ model.eval()
32
+ except Exception as e:
33
+ print(f"❌ FATAL ERROR: {e}")
34
+
35
+ responses = {
36
+ "about_me": "I am a passionate developer specializing in AI and web development.",
37
+ "skills": "My main skills are HTML5, CSS3, JavaScript, Laravel, Node.js, Database, TensorFlow, PyTorch, Firebase, and Jupyter Notebook.",
38
+ "projects": "Some of my projects are Mobile Apps Bald Detection and Jupyter Notebook Bald Detection.",
39
+ "experience": "I have worked as IT Support, AI Engineer, and Freelancer on multiple projects.",
40
+ "career_goal": "My career goal is to become a Full Stack Developer and Machine Learning Engineer.",
41
+ "greeting": "Hello! How can I help you regarding this portfolio?",
42
+ "fallback": "I'm sorry, I don't understand. Please ask another question."
43
+ }
44
+
45
+ class ChatRequest(BaseModel):
46
+ text: str
47
+
48
+ @app.get("/")
49
+ async def root():
50
+ return {"message": "🚀 Chatbot API running on Hugging Face"}
51
+
52
+ @app.post("/chatbot")
53
+ async def chat(req: ChatRequest):
54
+ if 'model' not in globals():
55
+ return {"response": responses["fallback"], "intent": "error_loading"}
56
+
57
+ try:
58
+ inputs = tokenizer(req.text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
59
+ with torch.no_grad():
60
+ outputs = model(**inputs)
61
+ pred_id = torch.argmax(outputs.logits, dim=1).item()
62
+
63
+ intent = model.config.id2label.get(pred_id, "fallback")
64
+
65
+ try:
66
+ jawaban = df_jawaban.loc[df_jawaban['Intent'] == intent, 'Jawaban_ID'].iloc[0]
67
+ except IndexError:
68
+ jawaban = responses.get(intent, responses["fallback"])
69
+
70
+ return {"intent": intent, "response": jawaban}
71
+
72
+ except Exception as e:
73
+ print(f"❌ Runtime Error: {e}")
74
+ return {"response": "Internal server error"}
backend/dataset_chatbot_template.xlsx ADDED
Binary file (11 kB). View file
 
backend/models/bert-base-multilingual-cased/config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "directionality": "bidi",
8
+ "dtype": "float32",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "about_me",
14
+ "1": "career_goal",
15
+ "2": "experience",
16
+ "3": "fallback",
17
+ "4": "greeting",
18
+ "5": "projects",
19
+ "6": "skills"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "about_me": 0,
25
+ "career_goal": 1,
26
+ "experience": 2,
27
+ "fallback": 3,
28
+ "greeting": 4,
29
+ "projects": 5,
30
+ "skills": 6
31
+ },
32
+ "layer_norm_eps": 1e-12,
33
+ "max_position_embeddings": 512,
34
+ "model_type": "bert",
35
+ "num_attention_heads": 12,
36
+ "num_hidden_layers": 12,
37
+ "pad_token_id": 0,
38
+ "pooler_fc_size": 768,
39
+ "pooler_num_attention_heads": 12,
40
+ "pooler_num_fc_layers": 3,
41
+ "pooler_size_per_head": 128,
42
+ "pooler_type": "first_token_transform",
43
+ "position_embedding_type": "absolute",
44
+ "problem_type": "single_label_classification",
45
+ "transformers_version": "4.56.1",
46
+ "type_vocab_size": 2,
47
+ "use_cache": true,
48
+ "vocab_size": 119547
49
+ }
backend/models/bert-base-multilingual-cased/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
backend/models/bert-base-multilingual-cased/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
backend/models/bert-base-multilingual-cased/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
backend/models/bert-base-multilingual-cased/vocab.txt ADDED
The diff for this file is too large to render. See raw diff