Spaces:
Configuration error
Configuration error
File size: 5,024 Bytes
3150b99 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import requests
import re
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
# ---------------------- Setup Awal ----------------------
st.set_page_config(page_title="Analisis Opini Twitter Indonesia", layout="wide")
# Load model IndoBERT Sentiment
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained("indobenchmark/indobert-base-p1")
model = AutoModelForSequenceClassification.from_pretrained("mdhugol/indonesia-bert-sentiment-classification")
return tokenizer, model
tokenizer, model = load_model()
label_map = {0: 'Negatif', 1: 'Netral', 2: 'Positif'}
# Header
st.title("🇮🇩 Dashboard Analisis Sentimen X")
st.markdown("""
Aplikasi ini menggunakan model IndoBERT untuk menganalisis sentimen tweet berbahasa Indonesia dari Twitter atau file CSV.
""")
# Sidebar
st.sidebar.header("Konfigurasi")
twitter_auth_token = st.sidebar.text_input("Twitter Bearer Token:", type="password")
mode = st.sidebar.radio("Pilih Mode Data:", ["Ambil dari Twitter", "Upload File CSV"])
# Fungsi Analisis
@st.cache_data
def analisis_sentimen_indober(df):
results = []
for text in df['content']:
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
logits = model(**inputs).logits
probs = softmax(logits.numpy()[0])
label_idx = probs.argmax()
results.append(label_map[label_idx])
df['label'] = results
return df
def tampilkan_hasil(df):
st.write("### Contoh Data", df.head())
st.write("### Distribusi Sentimen")
st.bar_chart(df['label'].value_counts())
st.write("### WordCloud")
all_text = " ".join(df['content'])
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(all_text)
fig, ax = plt.subplots()
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
st.pyplot(fig)
csv = df.to_csv(index=False).encode('utf-8')
st.download_button(
label="📥 Download Hasil CSV",
data=csv,
file_name='hasil_sentimen_indonesia.csv',
mime='text/csv',
)
# Mode Twitter API
if mode == "Ambil dari Twitter":
keyword = st.text_input("Masukkan Kata Kunci:")
jumlah = st.slider("Jumlah Tweet:", 10, 100, 30)
if st.button("Ambil dan Analisis Tweet"):
if not twitter_auth_token:
st.error("Silakan masukkan Bearer Token.")
elif not keyword:
st.warning("Masukkan kata kunci terlebih dahulu.")
elif any(ord(c) > 127 for c in keyword):
st.error("Kata kunci tidak boleh mengandung emoji atau karakter non-ASCII.")
else:
with st.spinner("Mengambil tweet..."):
headers = {
"Authorization": f"Bearer {twitter_auth_token}",
"User-Agent": "StreamlitApp"
}
query = re.sub(r'[^\w\s]', '', keyword)
url = f"https://api.twitter.com/2/tweets/search/recent?query={query} lang:id&max_results={jumlah}&tweet.fields=created_at,text"
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
tweets = response.json().get("data", [])
if tweets:
df = pd.DataFrame(tweets)
df.rename(columns={"created_at": "date", "text": "content"}, inplace=True)
df = analisis_sentimen_indober(df)
tampilkan_hasil(df)
else:
st.warning("Tidak ada tweet ditemukan.")
elif response.status_code == 401:
st.error("Token tidak valid atau tidak memiliki izin.")
elif response.status_code == 429:
st.error("Terlalu banyak permintaan. Tunggu beberapa saat dan coba lagi.")
else:
st.error(f"Gagal mengambil data. Status: {response.status_code}")
except Exception as e:
st.error(f"Terjadi kesalahan: {e}")
# ---------------------- Mode Upload CSV ----------------------
elif mode == "Upload File CSV":
uploaded_file = st.file_uploader("Unggah file CSV dengan kolom 'content'", type="csv")
if uploaded_file:
try:
df = pd.read_csv(uploaded_file)
if 'content' not in df.columns:
st.error("Kolom 'content' tidak ditemukan.")
else:
df = analisis_sentimen_indober(df)
tampilkan_hasil(df)
except Exception as e:
st.error(f"Gagal membaca file: {e}")
|