hotel-analysis

Sleeping

App Files Files Community

redfernstech commited on Oct 28, 2024

Commit

9576521

verified ·

1 Parent(s): ae6a156

Update app.py

Browse files

Files changed (1) hide show

app.py +236 -88

app.py CHANGED Viewed

@@ -1,89 +1,237 @@
 import streamlit as st
-from streamlit_image_comparison import image_comparison
-IMAGE_TO_URL = {
-    "sample_image_1": "https://user-images.githubusercontent.com/34196005/143309873-c0c1f31c-c42e-4a36-834e-da0a2336bb19.jpg",
-    "sample_image_2": "https://user-images.githubusercontent.com/34196005/143309867-42841f5a-9181-4d22-b570-65f90f2da231.jpg",
-}
-st.set_page_config(
-    page_title="Streamlit Image Comparison",
-    page_icon="🔥",
-    layout="centered",
-    initial_sidebar_state="auto",
-)
-st.markdown(
-    """
-    <h2 style='text-align: center'>
-    Streamlit Image Comparison Demo
-    </h2>
-    """,
-    unsafe_allow_html=True,
-)
-st.markdown(
-    """
-    <p style='text-align: center'>
-    <a href='https://github.com/fcakyon/streamlit-image-comparison' target='_blank'>https://github.com/fcakyon/streamlit-image-comparison</a>
-    <br />
-    Follow me for more! <a href='https://twitter.com/fcakyon' target='_blank'> <img src="https://img.icons8.com/color/48/000000/twitter--v1.png" height="30"></a><a href='https://github.com/fcakyon' target='_blank'><img src="https://img.icons8.com/fluency/48/000000/github.png" height="27"></a><a href='https://www.linkedin.com/in/fcakyon/' target='_blank'><img src="https://img.icons8.com/fluency/48/000000/linkedin.png" height="30"></a> <a href='https://fcakyon.medium.com/' target='_blank'><img src="https://img.icons8.com/ios-filled/48/000000/medium-monogram.png" height="26"></a>
-    </p>
-    """,
-    unsafe_allow_html=True,
-)
-st.write("##")
-with st.form(key="Streamlit Image Comparison"):
-    # image one inputs
-    col1, col2 = st.columns([3, 1])
-    with col1:
-        img1_url = st.text_input("Image one URL:", value=IMAGE_TO_URL["sample_image_1"])
-    with col2:
-        img1_text = st.text_input("Image one text:", value="YOLOX")
-    # image two inputs
-    col1, col2 = st.columns([3, 1])
-    with col1:
-        img2_url = st.text_input("Image two URL:", value=IMAGE_TO_URL["sample_image_2"])
-    with col2:
-        img2_text = st.text_input("Image two text:", value="SAHI+YOLOX")
-    # continious parameters
-    col1, col2 = st.columns([1, 1])
-    with col1:
-        starting_position = st.slider(
-            "Starting position of the slider:", min_value=0, max_value=100, value=50
-        )
-    with col2:
-        width = st.slider(
-            "Component width:", min_value=400, max_value=1000, value=700, step=100
-        )
-    # boolean parameters
-    col1, col2, col3, col4 = st.columns([1, 3, 3, 3])
-    with col2:
-        show_labels = st.checkbox("Show labels", value=True)
-    with col3:
-        make_responsive = st.checkbox("Make responsive", value=True)
-    with col4:
-        in_memory = st.checkbox("In memory", value=True)
-    # centered submit button
-    col1, col2, col3 = st.columns([6, 4, 6])
-    with col2:
-        submit = st.form_submit_button("Update Render 🔥")
-static_component = image_comparison(
-    img1=img1_url,
-    img2=img2_url,
-    label1=img1_text,
-    label2=img2_text,
-    width=width,
-    starting_position=starting_position,
-    show_labels=show_labels,
-    make_responsive=make_responsive,
-    in_memory=in_memory,
-)

 import streamlit as st
+import pandas as pd
+import os
+import re
+import preprocessor as p
+import joblib
+import base64
+project_description = """
+# Hotel Data Analysis Project
+## Overview
+I have completed a hotel data analysis project using an instant web scraper.
+This project involved scraping hotel data and hotel reviews separately, cleaning the data,
+concatenating it, and performing sentiment analysis on the DataFrame.
+Additionally, I clustered the hotel reviews, applied sentiment analysis, and passed
+those clusters to an LLM (Language Model) to extract strengths and weaknesses of hotels.
+## Steps
+### 1. Scraping Hotel Data
+- Utilized an instant web scraper to collect hotel data.
+- Scraped hotel data separately from hotel reviews.
+### 2. Data Collection
+- Collected hotel data and hotel reviews data separately for each hotel.
+### 3. Data Cleaning
+- Cleaned the collected data to remove any inconsistencies or errors.
+- Applied preprocessing techniques to prepare the data for analysis.
+### 4. Data Concatenation
+- Concatenated the cleaned hotel data and hotel reviews data to create a unified dataset for analysis.
+### 5. Sentiment Analysis
+- Performed sentiment analysis on the concatenated DataFrame.
+- Utilized the results to understand the overall sentiment of hotel reviews.
+### 6. Clustering Hotel Reviews
+- Clustered the hotel reviews based on their content to identify patterns and similarities.
+### 7. Extracting Strengths and Weaknesses
+- Passed the clustered reviews to an LLM (Language Model) to extract strengths and weaknesses of hotels.
+- Used the extracted information to gain insights into customer perceptions.
+## Conclusion
+This project demonstrates the use of web scraping, data cleaning, sentiment analysis, and clustering techniques to analyze hotel data.
+The extracted strengths and weaknesses provide valuable insights for hotel management to improve customer satisfaction and service quality.
+"""
+def create_download_link(df, filename):
+    csv = df.to_csv(index=False)
+    b64 = base64.b64encode(csv.encode()).decode()
+    href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">Download CSV file</a>'
+    return href
+# Path to the directory containing CSV files
+directory_path = r'hotel reviews'
+# Get a list of CSV files in the directory
+csv_files = [file for file in os.listdir(directory_path) if file.endswith('.csv')]
+# Function to concatenate selected columns
+def concatenate_columns(df, selected_columns):
+    concatenated_data = df[selected_columns[0]].tolist() + df[selected_columns[1]].tolist()
+    return pd.DataFrame({'ConcatenatedData': concatenated_data})
+# Function to display selected dataset
+def display_selected_dataset(selected_dataset):
+    dataset_path = os.path.join(directory_path, selected_dataset)
+    selected_df = pd.read_csv(dataset_path)
+    st.subheader(f'Dataset: {selected_dataset}')
+    st.write(selected_df)
+def clean_tweets(series):
+    REPLACE_NO_SPACE = re.compile("[.;:!\'?,\"()\[\]]")
+    REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")
+    tempArr = []
+    for line in series:
+        # Check if the value is NaN
+        if pd.isnull(line):
+            tempArr.append("")
+            continue
+        # Send to tweet_processor
+        tmpL = p.clean(line)
+        # Remove punctuation
+        tmpL = REPLACE_NO_SPACE.sub("", tmpL.lower())
+        # Replace specific characters with spaces
+        tmpL = REPLACE_WITH_SPACE.sub(" ", tmpL)
+        # Remove extra spaces
+        tmpL = " ".join(tmpL.split())
+        tempArr.append(tmpL)
+    return tempArr
+# Streamlit app
+def main():
+    # Create a menu bar
+    menu = st.sidebar.selectbox(
+        'Navigation',
+        ['Home', 'collected hotel data', 'Display Hotel Data', 'Display hotel reviews Datasets', 'CSV Column Concatenation and Sentiment Analysis']
+    )
+    if menu == 'Home':
+        st.markdown(project_description)
+    elif menu == 'collected hotel data':
+        # Display DataFrame
+        df = pd.read_csv('chennai hotes.csv')
+        df1 = pd.read_csv('stream.csv')
+        st.subheader('Collected chennai hotes Data')
+        st.write(df)
+        st.subheader('preprocess applyed data')
+        st.write(df1)
+    elif menu == 'Display Hotel Data':
+        # Display hotel data
+        df = pd.read_csv('stream.csv')
+        css = """
+            <style>
+                .hotel-container {
+                    border: 1px solid #ddd;
+                    border-radius: 5px;
+                    padding: 10px;
+                    margin-bottom: 20px;
+                }
+                .hotel-image {
+                    max-width: 100%;
+                    border-radius: 5px;
+                    margin-bottom: 10px;
+                }
+                .hotel-details {
+                    font-size: 16px;
+                }
+            </style>
+        """
+        st.markdown(css, unsafe_allow_html=True)
+        for index, row in df.iterrows():
+            st.markdown(f"""
+        <div class="hotel-container">
+            <img class="hotel-image" src="{row['hotel image']}">
+            <div class="hotel-details">
+                <h2>{row['Hotel Name']}</h2>
+                <p><strong>Rating:</strong> {row['rating']}</p>
+                <p><strong>Location:</strong> {row['location']} ({row['nearest places']})</p>
+                <p><strong>Website:</strong> <a href="{row['hotel website']}">Website link</a></p>
+                <p><strong>Number of Reviews:</strong> {row['number of reviewss 2']}</p>
+                <p><strong>Room Type:</strong> {row['room type']}</p>
+                <p><strong>Price:</strong> {row['price']}</p>
+                <p><strong>Strengths:</strong> {row['Strengths']}</p>
+                <p><strong>Weaknesses:</strong> {row['Weaknesses']}</p>
+            </div>
+        </div>
+       """, unsafe_allow_html=True)
+    elif menu == 'Display hotel reviews Datasets':
+        selected_dataset = st.selectbox('Select Dataset', csv_files)
+        if selected_dataset:
+            display_selected_dataset(selected_dataset)
+    elif menu == 'CSV Column Concatenation and Sentiment Analysis':
+        st.title('CSV Column Concatenation and Sentiment Analysis')
+        new_names = {
+            'a3332d346a': 'Reviewer Name',
+            'afac1f68d9': 'Reviewer Country',
+            'abf093bdfe': 'Room Type',
+            'abf093bdfe 2': 'Length of Stay',
+            'abf093bdfe 3': 'Review Date',
+            'abf093bdfe 4': 'Traveler Type',
+            'abf093bdfe 5': 'Second Review Date',
+            'f6431b446c': 'Overall Rating',
+            'a53cbfa6de': 'Positive Comments',
+            'a53cbfa6de 2': 'Negative Comments',
+            'a3332d346a 2': 'Hotel Response',
+            'a53cbfa6de 3': 'Hotel Response1'
+        }
+        # File upload
+        uploaded_file = st.file_uploader('Upload CSV file', type=['csv'])
+        if uploaded_file is not None:
+            df = pd.read_csv(uploaded_file)
+            df.rename(columns=new_names, inplace=True)
+            # Show original DataFrame
+            st.subheader('Original DataFrame:')
+            st.write(df)
+            # Select columns
+            selected_columns = st.multiselect('Select columns to concatenate', df.columns)
+            if st.button('Concatenate columns'):
+                if len(selected_columns) == 2:
+                    # Concatenate columns
+                    new_df = concatenate_columns(df, selected_columns)
+                    # Remove null values
+                    new_df = new_df.dropna()
+                    # Drop duplicates
+                    new_df = new_df.drop_duplicates()
+                    # Reset the index
+                    new_df = new_df.reset_index(drop=True)
+                    # Clean tweets
+                    new_df['CleanedData'] = clean_tweets(new_df['ConcatenatedData'])
+                    # Load the saved model
+                    loaded_model = joblib.load('sentiment_analysis_model.pkl')
+                    # Apply sentiment analysis
+                    new_df['Sentiment'] = loaded_model.predict(new_df['CleanedData'])
+                    # Display concatenated, cleaned, and sentiment analyzed DataFrame
+                    st.subheader('Concatenated, Cleaned, and Sentiment Analyzed DataFrame:')
+                    st.write(new_df)
+                    # Create download link
+                    st.markdown(create_download_link(new_df, 'concatenated_sentiment_analyzed_data'), unsafe_allow_html=True)
+                else:
+                    st.warning('Please select exactly two columns to concatenate.')
+# Run the app
+if __name__ == '__main__':
+    main()