Spaces:

Awlly
/

NLP_app

Sleeping

App Files Files Community

Awlly commited on Feb 3, 2024

Commit

acd5d65

2 Parent(s): 4a76dec 1f670ae

Merge branch 'main' of hf.co:spaces/Awlly/NLP_app

Browse files

Files changed (13) hide show

README.md +79 -0
__pycache__/preprocessing.cpython-310.pyc +0 -0
app_models/__pycache__/bag_of_words_MODEL.cpython-310.pyc +0 -0
app_models/__pycache__/gpt_MODEL.cpython-310.pyc +0 -0
app_models/__pycache__/lstm_MODEL.cpython-310.pyc +0 -0
app_models/__pycache__/rubert_MODEL.cpython-310.pyc +0 -0
app_models/__pycache__/toxicity_MODEL.cpython-310.pyc +0 -0
app_models/gpt_MODEL.py +2 -2
app_pages/__pycache__/page1_model_comparison.cpython-310.pyc +0 -0
app_pages/__pycache__/page2_rubert_toxicity.cpython-310.pyc +0 -0
app_pages/__pycache__/page3_gpt_model.cpython-310.pyc +0 -0
app_pages/page1_model_comparison.py +29 -7
app_pages/page3_gpt_model.py +3 -2

README.md CHANGED Viewed

@@ -10,3 +10,82 @@ pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## NLP App Hugging Face's logo
+Hugging Face
+# Streamlit app with computer vision 💡
+Elbrus Bootcamp | Phase-2 | Team Project
+## Team🧑🏻‍💻
+1. [Awlly](https://github.com/Awlly)
+2. [sakoser](https://github.com/sakoser)
+3. [whoisida]https://github.com/whoisida
+## Task 📌lassifi
+Create a service that classifies movie reviews into good, neutral and bad categories, a service that classifies user input as toxic or non-toxic, as well as a GPT 2 based text generation service that was trained to emulate a certain author’s writing.
+## Contents 📝
+1. Classifies movie reviewsusing LSTM,ruBert,BOW 💨 [Dataset](https://drive.google.com/file/d/1c92sz81bEfOw-rutglKpmKGm6rySmYbt/view?usp=sharing)
+2. classifies user input as toxic or non-toxi using ruBert-tiny-toxicity 📑 [Dataset](https://drive.google.com/file/d/1O7orH9CrNEhnbnA5KjXji8sgrn6iD5n-/view?usp=drive_link)
+3. GPT 2 based text generation service
+## Deployment 🎈
+The service is implemented on [Hugging Face](https://huggingface.co/spaces/Awlly/NLP_app)
+## Libraries 📖
+```python
+import os
+import unicodedata
+import nltk
+from dataclasses import dataclass
+import joblib
+import numpy as np
+import matplotlib.pyplot as plt
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset
+from torchvision.datasets import ImageFolder
+from torchvision import datasets
+from torchvision import transforms as T
+from torchvision.io import read_image
+from torch.utils.data import Dataset, random_split
+import torchutils as tu
+from transformers import GPT2LMHeadModel, GPT2Tokenizer
+from typing import Tuple
+from tqdm import tqdm
+from transformers import AutoModel, AutoTokenizer
+from transformers import AutoModelForSequenceClassification
+import pydensecrf.densecrf as dcrf
+import pydensecrf.utils as dcrf_utils
+from preprocessing import data_preprocessing
+import streamlit as st
+import string
+from sklearn.linear_model import LogisticRegression
+import re
+from preprocessing import preprocess_single_string
+```
+from preprocessing import data_preprocessing
+## Guide 📜
+####  How to run locally?
+1. To create a Python virtual environment for running the code, enter:
+    ``python3 -m venv my-env``
+2. Activate the new environment:
+    * Windows: ```my-env\Scripts\activate.bat```
+    * macOS and Linux: ```source my-env/bin/activate```

__pycache__/preprocessing.cpython-310.pyc DELETED Viewed

Binary file (2.32 kB)

app_models/__pycache__/bag_of_words_MODEL.cpython-310.pyc DELETED Viewed

Binary file (630 Bytes)

app_models/__pycache__/gpt_MODEL.cpython-310.pyc DELETED Viewed

Binary file (1.08 kB)

app_models/__pycache__/lstm_MODEL.cpython-310.pyc DELETED Viewed

Binary file (3.49 kB)

app_models/__pycache__/rubert_MODEL.cpython-310.pyc DELETED Viewed

Binary file (1.43 kB)

app_models/__pycache__/toxicity_MODEL.cpython-310.pyc DELETED Viewed

Binary file (985 Bytes)

app_models/gpt_MODEL.py CHANGED Viewed

@@ -10,7 +10,7 @@ model = GPT2LMHeadModel.from_pretrained(model_path)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
-def generate_text(prompt_text, length, temperature):
     encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors="pt")
     encoded_prompt = encoded_prompt.to(device)
@@ -22,7 +22,7 @@ def generate_text(prompt_text, length, temperature):
         top_p=0.9,
         repetition_penalty=1.2,
         do_sample=True,
-        num_return_sequences=1,
     )
     # Decode the generated text

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
+def generate_text(prompt_text, length, temperature, beams):
     encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors="pt")
     encoded_prompt = encoded_prompt.to(device)
         top_p=0.9,
         repetition_penalty=1.2,
         do_sample=True,
+        num_return_sequences=beams,
     )
     # Decode the generated text

app_pages/__pycache__/page1_model_comparison.cpython-310.pyc DELETED Viewed

Binary file (904 Bytes)

app_pages/__pycache__/page2_rubert_toxicity.cpython-310.pyc DELETED Viewed

Binary file (794 Bytes)

app_pages/__pycache__/page3_gpt_model.cpython-310.pyc DELETED Viewed

Binary file (845 Bytes)

app_pages/page1_model_comparison.py CHANGED Viewed

@@ -2,6 +2,7 @@ import streamlit as st
 from app_models.rubert_MODEL import classify_text
 from app_models.bag_of_words_MODEL import predict
 from app_models.lstm_MODEL import predict_review
 class_prefix =  'This review is likely...'
@@ -11,11 +12,32 @@ def run():
     # Example placeholder for user input
     user_input = st.text_area("")
     # Placeholder buttons for model selection
-    if st.button('Classify with BoW/TF-IDF'):
-        st.write(f'{class_prefix}{predict(user_input)}')
-    if st.button('Classify with LSTM'):
-        st.write(f'{class_prefix}{predict_review(user_input)}')
-    if st.button('Classify with ruBERT'):
-        st.write(f'{class_prefix}{classify_text(user_input)}')

 from app_models.rubert_MODEL import classify_text
 from app_models.bag_of_words_MODEL import predict
 from app_models.lstm_MODEL import predict_review
+import time
 class_prefix =  'This review is likely...'
     # Example placeholder for user input
     user_input = st.text_area("")
+    if st.button('Classify with All Models'):
+        # Measure and display Bag of Words/TF-IDF prediction time
+        start_time = time.time()
+        bow_tfidf_result = predict(user_input)
+        end_time = time.time()
+        st.write(f'{class_prefix} {bow_tfidf_result} according to Bag of Words/TF-IDF. Time taken: {end_time - start_time:.2f} seconds.')
+        # Measure and display LSTM prediction time
+        start_time = time.time()
+        lstm_result = predict_review(user_input)
+        end_time = time.time()
+        st.write(f'{class_prefix} {lstm_result} according to LSTM. Time taken: {end_time - start_time:.2f} seconds.')
+        # Measure and display ruBERT prediction time
+        start_time = time.time()
+        rubert_result = classify_text(user_input)
+        end_time = time.time()
+        st.write(f'{class_prefix} {rubert_result} according to ruBERT. Time taken: {end_time - start_time:.2f} seconds.')
     # Placeholder buttons for model selection
+    # if st.button('Classify with BoW/TF-IDF'):
+    #     st.write(f'{class_prefix}{predict(user_input)}')
+    # if st.button('Classify with LSTM'):
+    #     st.write(f'{class_prefix}{predict_review(user_input)}')
+    # if st.button('Classify with ruBERT'):
+    #     st.write(f'{class_prefix}{classify_text(user_input)}')

app_pages/page3_gpt_model.py CHANGED Viewed

@@ -6,9 +6,10 @@ def run():
     st.title('GPT Text Generation')
     prompt_text = st.text_area("Input Text", "Type here...")
     length = st.slider("Length of Generated Text", min_value=50, max_value=500, value=200)
-    temperature = st.slider("Temperature", min_value=0.1, max_value=1.0, value=0.7, step=0.1)
     if st.button('Generate Text'):
         with st.spinner('Generating...'):
-            generated_text = generate_text(prompt_text, length, temperature)
             st.text_area("Generated Text", generated_text, height=250)

     st.title('GPT Text Generation')
     prompt_text = st.text_area("Input Text", "Type here...")
     length = st.slider("Length of Generated Text", min_value=50, max_value=500, value=200)
+    temperature = st.slider("Temperature", min_value=0.1, max_value=2.0, value=0.7, step=0.1)
+    beams = st.slider("Number of Generations", min_value=2, max_value=10, value=4, step=1)
     if st.button('Generate Text'):
         with st.spinner('Generating...'):
+            generated_text = generate_text(prompt_text, length, temperature, beams)
             st.text_area("Generated Text", generated_text, height=250)