FLS

Sleeping

App Files Files Community

kambris commited on Nov 24, 2024

Commit

9402b4b

verified ·

1 Parent(s): 277802e

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -3

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ import torch
 import numpy as np
 from collections import Counter
 import os
 # Add Arabic stop words
 ARABIC_STOP_WORDS = {
     'في', 'من', 'إلى', 'على', 'عن', 'مع', 'خلال', 'حتى', 'إذا', 'ثم',
@@ -69,6 +71,21 @@ def split_text(text, max_length=512):
     return chunks
 def clean_arabic_text(text):
     """Clean Arabic text by removing stop words and normalizing."""
     words = text.split()
@@ -202,18 +219,22 @@ def format_emotions(emotion_counts):
         })
     return formatted_emotions
-def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, top_n=50, topic_strategy="Auto", n_topics=None, min_topic_size=30):
     """Process the data and generate summaries with flexible topic configuration."""
     summaries = []
     topic_model_params = {
         "language": "arabic",
         "calculate_probabilities": True,
-        "min_topic_size": min_topic_size,
         "n_gram_range": (1, 2),
         "top_n_words": 15,
         "verbose": True
     }
     if topic_strategy == "Manual":
         topic_model_params["nr_topics"] = n_topics
@@ -250,7 +271,8 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
         try:
             topics, probs = topic_model.fit_transform(texts, embeddings)
             topic_counts = Counter(topics)
             if -1 in topic_counts:
                 del topic_counts[-1]
@@ -385,6 +407,12 @@ if uploaded_file is not None:
                                     st.subheader("Emotions")
                                     for emotion in summary['top_emotions']:
                                         st.write(f"• {emotion['emotion']}: {emotion['count']} poems")
                     with tab2:
                         st.subheader("Global Topic Distribution")

 import numpy as np
 from collections import Counter
 import os
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
 # Add Arabic stop words
 ARABIC_STOP_WORDS = {
     'في', 'من', 'إلى', 'على', 'عن', 'مع', 'خلال', 'حتى', 'إذا', 'ثم',
     return chunks
+def create_arabic_wordcloud(text, title):
+    wordcloud = WordCloud(
+        width=1200,
+        height=600,
+        background_color='white',
+        font_path='arial',  # Works with system Arabic fonts
+        max_words=200
+    ).generate(text)
+    fig, ax = plt.subplots(figsize=(15, 8))
+    ax.imshow(wordcloud, interpolation='bilinear')
+    ax.axis('off')
+    ax.set_title(title, fontsize=16, pad=20)
+    return fig
 def clean_arabic_text(text):
     """Clean Arabic text by removing stop words and normalizing."""
     words = text.split()
         })
     return formatted_emotions
+def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, top_n=50, topic_strategy="Auto", n_topics=None, min_topic_size=1):
     """Process the data and generate summaries with flexible topic configuration."""
     summaries = []
     topic_model_params = {
         "language": "arabic",
         "calculate_probabilities": True,
+        "min_topic_size": 1,
         "n_gram_range": (1, 2),
         "top_n_words": 15,
         "verbose": True
+        "seed_topic_list": None
     }
+    st.write(f"Total documents: {len(df)}")
+    st.write(f"Topic strategy: {topic_strategy}")
+    st.write(f"Min topic size: {min_topic_size}")
     if topic_strategy == "Manual":
         topic_model_params["nr_topics"] = n_topics
         try:
             topics, probs = topic_model.fit_transform(texts, embeddings)
+            st.write(f"Number of unique topics: {len(set(topics))}")
+            st.write(f"Topic distribution: {Counter(topics)}")
             topic_counts = Counter(topics)
             if -1 in topic_counts:
                 del topic_counts[-1]
                                     st.subheader("Emotions")
                                     for emotion in summary['top_emotions']:
                                         st.write(f"• {emotion['emotion']}: {emotion['count']} poems")
+                                st.subheader("Word Cloud Visualization")
+                                country_poems = df[df['country'] == summary['country']]['poem']
+                                combined_text = ' '.join(country_poems)
+                                wordcloud_fig = create_arabic_wordcloud(combined_text, f"Most Common Words in {summary['country']} Poems")
+                                st.pyplot(wordcloud_fig)
                     with tab2:
                         st.subheader("Global Topic Distribution")