Risha15's picture
Update app.py
cbbf6b8 verified
import streamlit as st
import pandas as pd
import numpy as np
import requests
import io
import tempfile
import os
# Try to import plotly with error handling
try:
import plotly.express as px
import plotly.graph_objects as go
PLOTLY_AVAILABLE = True
except Exception as e:
st.warning(f"Plotly import warning: {e}")
PLOTLY_AVAILABLE = False
# Page configuration
st.set_page_config(
page_title="Steam Game Recommender",
page_icon="๐ŸŽฎ",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS
st.markdown("""
<style>
.main-header {
font-size: 3rem;
color: #1f77b4;
text-align: center;
margin-bottom: 2rem;
}
.recommendation-card {
padding: 1.5rem;
border-radius: 10px;
border: 1px solid #ddd;
margin: 1rem 0;
background-color: #f9f9f9;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.similarity-high {
background-color: #d4edda;
border-left: 5px solid #28a745;
}
.similarity-medium {
background-color: #fff3cd;
border-left: 5px solid #ffc107;
}
.similarity-low {
background-color: #f8d7da;
border-left: 5px solid #dc3545;
}
.game-title {
color: #1f77b4;
margin-bottom: 0.5rem;
}
.stat-card {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 1rem;
border-radius: 10px;
text-align: center;
}
</style>
""", unsafe_allow_html=True)
@st.cache_data(ttl=3600)
def load_data():
"""Load the full Steam games dataset using huggingface_hub"""
try:
st.info("๐Ÿš€ Initializing dataset download...")
# Method: Using huggingface_hub with snapshot download
from huggingface_hub import snapshot_download
repo_id = "FronkonGames/steam-games-dataset"
progress_bar = st.progress(0)
status_text = st.empty()
status_text.text("๐Ÿ“ฅ Downloading Steam games dataset from Hugging Face...")
progress_bar.progress(20)
with tempfile.TemporaryDirectory() as tmpdir:
# Download the dataset files
snapshot_download(
repo_id=repo_id,
repo_type="dataset",
allow_patterns="data/*.parquet",
local_dir=tmpdir,
local_dir_use_symlinks=False
)
status_text.text("๐Ÿ” Locating data files...")
progress_bar.progress(50)
# Find and load the parquet file
data_dir = os.path.join(tmpdir, "data")
if os.path.exists(data_dir):
parquet_files = [f for f in os.listdir(data_dir) if f.endswith('.parquet')]
if parquet_files:
parquet_path = os.path.join(data_dir, parquet_files[0])
status_text.text("๐Ÿ“Š Loading dataset into memory...")
progress_bar.progress(80)
# Read the parquet file
df = pd.read_parquet(parquet_path)
progress_bar.progress(100)
status_text.text("โœ… Dataset loaded successfully!")
st.success(f"๐ŸŽ‰ Successfully loaded {len(df):,} Steam games!")
return df
else:
st.error("โŒ No parquet files found in the dataset")
return pd.DataFrame()
else:
st.error("โŒ Data directory not found")
return pd.DataFrame()
except Exception as e:
st.error(f"โŒ Error loading dataset: {str(e)}")
return pd.DataFrame()
@st.cache_data
def preprocess_data(df):
"""Preprocess the data for recommendations"""
if df.empty:
return df
st.info("๐Ÿ”„ Preprocessing data...")
# Create a copy to avoid modifying cached data
df_processed = df.copy()
# Handle missing values
numeric_columns = ['Price', 'Positive', 'Negative']
for col in numeric_columns:
if col in df_processed.columns:
df_processed[col] = pd.to_numeric(df_processed[col], errors='coerce').fillna(0)
# Calculate rating score
if 'Positive' in df_processed.columns and 'Negative' in df_processed.columns:
df_processed['Total_Reviews'] = df_processed['Positive'] + df_processed['Negative']
df_processed['Rating_Score'] = np.where(
df_processed['Total_Reviews'] > 0,
df_processed['Positive'] / df_processed['Total_Reviews'] * 100,
0
)
else:
df_processed['Rating_Score'] = 50 # Default rating
# Clean text columns
text_columns = ['Genres', 'Tags', 'Categories', 'About the game', 'Name']
for col in text_columns:
if col in df_processed.columns:
df_processed[col] = df_processed[col].fillna('').astype(str)
# Ensure platform columns are boolean
platform_columns = ['Windows', 'Mac', 'Linux']
for col in platform_columns:
if col in df_processed.columns:
df_processed[col] = df_processed[col].fillna(False).astype(bool)
st.success("โœ… Data preprocessing complete!")
return df_processed
class SteamGameRecommender:
def __init__(self, games_df):
self.df = games_df
def recommend_by_features(self, genres='', tags='', price_max=60,
platforms=None, min_rating=0, min_reviews=0, top_n=10):
"""Type 1: Feature-based recommendations"""
if self.df.empty:
return pd.DataFrame()
filtered_games = self.df.copy()
# Apply genre filter
if genres:
filtered_games = filtered_games[
filtered_games['Genres'].str.contains(genres, case=False, na=False)
]
# Apply tag filter
if tags:
filtered_games = filtered_games[
filtered_games['Tags'].str.contains(tags, case=False, na=False)
]
# Apply price filter
if price_max is not None:
filtered_games = filtered_games[filtered_games['Price'] <= price_max]
# Apply platform filters
if platforms:
platform_filter = False
if 'Windows' in platforms and 'Windows' in filtered_games.columns:
platform_filter = platform_filter | (filtered_games['Windows'] == True)
if 'Mac' in platforms and 'Mac' in filtered_games.columns:
platform_filter = platform_filter | (filtered_games['Mac'] == True)
if 'Linux' in platforms and 'Linux' in filtered_games.columns:
platform_filter = platform_filter | (filtered_games['Linux'] == True)
if platform_filter is not False:
filtered_games = filtered_games[platform_filter]
# Apply rating filter
if min_rating > 0:
filtered_games = filtered_games[filtered_games['Rating_Score'] >= min_rating]
# Apply minimum reviews filter
if min_reviews > 0 and 'Total_Reviews' in filtered_games.columns:
filtered_games = filtered_games[filtered_games['Total_Reviews'] >= min_reviews]
# Sort by rating and return top N
if len(filtered_games) > 0:
# Sort by rating score (descending), then by number of reviews (descending)
if 'Total_Reviews' in filtered_games.columns:
recommendations = filtered_games.sort_values(
['Rating_Score', 'Total_Reviews'],
ascending=[False, False]
).head(top_n)
else:
recommendations = filtered_games.sort_values('Rating_Score', ascending=False).head(top_n)
# Select available columns
available_columns = [
'Name', 'Genres', 'Price', 'Rating_Score', 'Positive', 'Negative',
'Release date', 'About the game', 'Total_Reviews'
]
result_columns = [col for col in available_columns if col in recommendations.columns]
return recommendations[result_columns]
else:
return pd.DataFrame()
def get_popular_games(self, top_n=10):
"""Get popular games based on reviews and ratings"""
if self.df.empty:
return pd.DataFrame()
# Filter games with substantial reviews
if 'Total_Reviews' in self.df.columns:
popular_games = self.df[self.df['Total_Reviews'] > 100].copy()
else:
popular_games = self.df.copy()
if len(popular_games) > 0:
# Sort by rating and reviews
if 'Total_Reviews' in popular_games.columns:
popular_games = popular_games.sort_values(
['Rating_Score', 'Total_Reviews'],
ascending=[False, False]
).head(top_n)
else:
popular_games = popular_games.sort_values('Rating_Score', ascending=False).head(top_n)
available_columns = [
'Name', 'Genres', 'Price', 'Rating_Score', 'Positive', 'Negative',
'Release date', 'About the game'
]
result_columns = [col for col in available_columns if col in popular_games.columns]
return popular_games[result_columns]
else:
return pd.DataFrame()
def display_game_card(game, index):
"""Display a game card with consistent formatting"""
rating = game.get('Rating_Score', 0)
# Determine card color based on rating
if rating >= 80:
card_class = "similarity-high"
rating_emoji = "๐Ÿ”ฅ"
elif rating >= 60:
card_class = "similarity-medium"
rating_emoji = "โญ"
else:
card_class = "similarity-low"
rating_emoji = "โš ๏ธ"
with st.container():
st.markdown(f'<div class="recommendation-card {card_class}">', unsafe_allow_html=True)
col1, col2, col3 = st.columns([3, 1, 1])
with col1:
st.markdown(f'<h3 class="game-title">{index + 1}. {game.get("Name", "Unknown")}</h3>', unsafe_allow_html=True)
st.write(f"**Genres:** {game.get('Genres', 'N/A')}")
description = game.get('About the game', '')
if description and len(description) > 50:
st.write(f"**Description:** {description[:250]}...")
with col2:
price = game.get('Price', 0)
st.metric("๐Ÿ’ฐ Price", f"${price:.2f}" if price > 0 else "Free")
st.metric(f"{rating_emoji} Rating", f"{rating:.1f}%")
with col3:
release_date = game.get('Release date', 'N/A')
st.write(f"**Release Date:** {release_date}")
positive = game.get('Positive', 0)
negative = game.get('Negative', 0)
total_reviews = positive + negative
if total_reviews > 0:
st.write(f"**Reviews:** ๐Ÿ‘ {int(positive):,} | ๐Ÿ‘Ž {int(negative):,}")
st.write(f"**Approval:** {(positive/total_reviews*100):.1f}%")
st.markdown('</div>', unsafe_allow_html=True)
def display_text_chart(data, title):
"""Display a simple text-based chart when plotly is not available"""
st.subheader(title)
for item, count in data.items():
st.write(f"**{item}:** {count:,} games")
def main():
# Header
st.markdown('<h1 class="main-header">๐ŸŽฎ Steam Game Recommendation System</h1>',
unsafe_allow_html=True)
st.markdown("### Type 1: Feature-Based Recommendations | Full Dataset (80,000+ Games)")
# Initialize session state
if 'last_recommendations' not in st.session_state:
st.session_state.last_recommendations = None
# Load data
df = load_data()
if df.empty:
st.error("""
โŒ Unable to load the dataset. This might be due to:
- Network connectivity issues
- Hugging Face API limitations
- Dataset availability
Please try refreshing the page or check back later.
""")
return
# Preprocess data
df = preprocess_data(df)
recommender = SteamGameRecommender(df)
if df.empty:
return
# Sidebar for filters
st.sidebar.title("๐Ÿ” Filter Games")
st.sidebar.markdown(f"<div class='stat-card'><h3>๐Ÿ“Š Loaded</h3><h2>{len(df):,}</h2><p>Games</p></div>",
unsafe_allow_html=True)
# Genre selection
st.sidebar.subheader("๐ŸŽฏ Genres & Tags")
if 'Genres' in df.columns:
all_genres = set()
for genres in df['Genres'].head(5000):
if isinstance(genres, str):
for genre in genres.split(','):
genre_clean = genre.strip()
if genre_clean and len(genre_clean) > 1:
all_genres.add(genre_clean)
all_genres = sorted(list(all_genres))
selected_genre = st.sidebar.selectbox("Select Genre", [""] + all_genres)
else:
selected_genre = ""
# Tag selection
if 'Tags' in df.columns:
all_tags = set()
for tags in df['Tags'].head(5000):
if isinstance(tags, str):
for tag in tags.split(','):
tag_clean = tag.strip()
if tag_clean and len(tag_clean) > 1:
all_tags.add(tag_clean)
all_tags = sorted(list(all_tags))[:150]
selected_tag = st.sidebar.selectbox("Select Tag (Optional)", [""] + all_tags)
else:
selected_tag = ""
# Price and rating filters
st.sidebar.subheader("๐Ÿ’ฐ Price & Rating")
max_price = st.sidebar.slider("Maximum Price ($)", 0, 100, 60)
min_rating = st.sidebar.slider("Minimum Rating (%)", 0, 100, 70)
min_reviews = st.sidebar.slider("Minimum Reviews", 0, 1000, 10)
# Platforms
st.sidebar.subheader("๐Ÿ–ฅ๏ธ Platforms")
windows = st.sidebar.checkbox("Windows", value=True)
mac = st.sidebar.checkbox("Mac")
linux = st.sidebar.checkbox("Linux")
platforms = []
if windows: platforms.append('Windows')
if mac: platforms.append('Mac')
if linux: platforms.append('Linux')
# Number of recommendations
st.sidebar.subheader("๐Ÿ“‹ Results")
num_recommendations = st.sidebar.slider("Number of Recommendations", 5, 50, 15)
# Recommendation buttons
col1, col2 = st.sidebar.columns(2)
with col1:
if st.button("๐ŸŽฏ Get Recommendations", type="primary", use_container_width=True):
with st.spinner(f'๐Ÿ” Searching through {len(df):,} games...'):
recommendations = recommender.recommend_by_features(
genres=selected_genre,
tags=selected_tag,
price_max=max_price,
platforms=platforms,
min_rating=min_rating,
min_reviews=min_reviews,
top_n=num_recommendations
)
st.session_state.last_recommendations = recommendations
with col2:
if st.button("๐Ÿ”ฅ Popular Games", use_container_width=True):
with st.spinner('Finding popular games...'):
recommendations = recommender.get_popular_games(top_n=num_recommendations)
st.session_state.last_recommendations = recommendations
# Display recommendations
if st.session_state.last_recommendations is not None:
recommendations = st.session_state.last_recommendations
if len(recommendations) > 0:
st.success(f"๐ŸŽ‰ Found {len(recommendations)} games matching your criteria!")
for idx, (_, game) in enumerate(recommendations.iterrows()):
display_game_card(game, idx)
# Statistics
st.subheader("๐Ÿ“Š Recommendation Statistics")
col1, col2, col3, col4 = st.columns(4)
with col1:
avg_price = recommendations['Price'].mean()
st.metric("Average Price", f"${avg_price:.2f}")
with col2:
avg_rating = recommendations['Rating_Score'].mean()
st.metric("Average Rating", f"{avg_rating:.1f}%")
with col3:
total_positive = recommendations.get('Positive', pd.Series([0])).sum()
st.metric("Total ๐Ÿ‘ Reviews", f"{int(total_positive):,}")
with col4:
st.metric("Games Found", len(recommendations))
else:
st.warning("No games found matching your criteria. Try adjusting your filters.")
# Main area when no search is performed
else:
st.info("""
๐Ÿ‘‹ **Welcome to the Steam Game Recommender!**
Use the sidebar filters to find your perfect games from our database of **80,000+ games**.
""")
# Dataset statistics
if not df.empty:
col1, col2, col3, col4 = st.columns(4)
with col1:
total_games = len(df)
st.metric("Total Games", f"{total_games:,}")
with col2:
avg_price = df['Price'].mean()
st.metric("Average Price", f"${avg_price:.2f}")
with col3:
rated_games = df[df['Rating_Score'] > 0]
avg_rating = rated_games['Rating_Score'].mean() if len(rated_games) > 0 else 0
st.metric("Average Rating", f"{avg_rating:.1f}%")
with col4:
free_games = len(df[df['Price'] == 0])
st.metric("Free Games", f"{free_games:,}")
# Popular genres display
st.subheader("๐ŸŽฏ Most Popular Genres")
if 'Genres' in df.columns:
genre_counts = pd.Series([
genre for genres in df['Genres'].head(10000)
for genre in str(genres).split(',')
if genre.strip() and len(genre.strip()) > 1
]).value_counts().head(15)
if len(genre_counts) > 0:
if PLOTLY_AVAILABLE:
try:
fig_genres = px.bar(
x=genre_counts.values,
y=genre_counts.index,
orientation='h',
title='Top 15 Game Genres in Database',
labels={'x': 'Number of Games', 'y': 'Genre'},
color=genre_counts.values,
color_continuous_scale='viridis'
)
fig_genres.update_layout(showlegend=False)
st.plotly_chart(fig_genres, use_container_width=True)
except Exception as e:
st.warning(f"Plotly chart error: {e}")
display_text_chart(genre_counts.head(10), "Top 10 Genres")
else:
display_text_chart(genre_counts.head(10), "Top 10 Genres")
if __name__ == "__main__":
main()