Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import requests | |
| import io | |
| import tempfile | |
| import os | |
| # Try to import plotly with error handling | |
| try: | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| PLOTLY_AVAILABLE = True | |
| except Exception as e: | |
| st.warning(f"Plotly import warning: {e}") | |
| PLOTLY_AVAILABLE = False | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="Steam Game Recommender", | |
| page_icon="๐ฎ", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| font-size: 3rem; | |
| color: #1f77b4; | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| } | |
| .recommendation-card { | |
| padding: 1.5rem; | |
| border-radius: 10px; | |
| border: 1px solid #ddd; | |
| margin: 1rem 0; | |
| background-color: #f9f9f9; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
| } | |
| .similarity-high { | |
| background-color: #d4edda; | |
| border-left: 5px solid #28a745; | |
| } | |
| .similarity-medium { | |
| background-color: #fff3cd; | |
| border-left: 5px solid #ffc107; | |
| } | |
| .similarity-low { | |
| background-color: #f8d7da; | |
| border-left: 5px solid #dc3545; | |
| } | |
| .game-title { | |
| color: #1f77b4; | |
| margin-bottom: 0.5rem; | |
| } | |
| .stat-card { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 1rem; | |
| border-radius: 10px; | |
| text-align: center; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def load_data(): | |
| """Load the full Steam games dataset using huggingface_hub""" | |
| try: | |
| st.info("๐ Initializing dataset download...") | |
| # Method: Using huggingface_hub with snapshot download | |
| from huggingface_hub import snapshot_download | |
| repo_id = "FronkonGames/steam-games-dataset" | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| status_text.text("๐ฅ Downloading Steam games dataset from Hugging Face...") | |
| progress_bar.progress(20) | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| # Download the dataset files | |
| snapshot_download( | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| allow_patterns="data/*.parquet", | |
| local_dir=tmpdir, | |
| local_dir_use_symlinks=False | |
| ) | |
| status_text.text("๐ Locating data files...") | |
| progress_bar.progress(50) | |
| # Find and load the parquet file | |
| data_dir = os.path.join(tmpdir, "data") | |
| if os.path.exists(data_dir): | |
| parquet_files = [f for f in os.listdir(data_dir) if f.endswith('.parquet')] | |
| if parquet_files: | |
| parquet_path = os.path.join(data_dir, parquet_files[0]) | |
| status_text.text("๐ Loading dataset into memory...") | |
| progress_bar.progress(80) | |
| # Read the parquet file | |
| df = pd.read_parquet(parquet_path) | |
| progress_bar.progress(100) | |
| status_text.text("โ Dataset loaded successfully!") | |
| st.success(f"๐ Successfully loaded {len(df):,} Steam games!") | |
| return df | |
| else: | |
| st.error("โ No parquet files found in the dataset") | |
| return pd.DataFrame() | |
| else: | |
| st.error("โ Data directory not found") | |
| return pd.DataFrame() | |
| except Exception as e: | |
| st.error(f"โ Error loading dataset: {str(e)}") | |
| return pd.DataFrame() | |
| def preprocess_data(df): | |
| """Preprocess the data for recommendations""" | |
| if df.empty: | |
| return df | |
| st.info("๐ Preprocessing data...") | |
| # Create a copy to avoid modifying cached data | |
| df_processed = df.copy() | |
| # Handle missing values | |
| numeric_columns = ['Price', 'Positive', 'Negative'] | |
| for col in numeric_columns: | |
| if col in df_processed.columns: | |
| df_processed[col] = pd.to_numeric(df_processed[col], errors='coerce').fillna(0) | |
| # Calculate rating score | |
| if 'Positive' in df_processed.columns and 'Negative' in df_processed.columns: | |
| df_processed['Total_Reviews'] = df_processed['Positive'] + df_processed['Negative'] | |
| df_processed['Rating_Score'] = np.where( | |
| df_processed['Total_Reviews'] > 0, | |
| df_processed['Positive'] / df_processed['Total_Reviews'] * 100, | |
| 0 | |
| ) | |
| else: | |
| df_processed['Rating_Score'] = 50 # Default rating | |
| # Clean text columns | |
| text_columns = ['Genres', 'Tags', 'Categories', 'About the game', 'Name'] | |
| for col in text_columns: | |
| if col in df_processed.columns: | |
| df_processed[col] = df_processed[col].fillna('').astype(str) | |
| # Ensure platform columns are boolean | |
| platform_columns = ['Windows', 'Mac', 'Linux'] | |
| for col in platform_columns: | |
| if col in df_processed.columns: | |
| df_processed[col] = df_processed[col].fillna(False).astype(bool) | |
| st.success("โ Data preprocessing complete!") | |
| return df_processed | |
| class SteamGameRecommender: | |
| def __init__(self, games_df): | |
| self.df = games_df | |
| def recommend_by_features(self, genres='', tags='', price_max=60, | |
| platforms=None, min_rating=0, min_reviews=0, top_n=10): | |
| """Type 1: Feature-based recommendations""" | |
| if self.df.empty: | |
| return pd.DataFrame() | |
| filtered_games = self.df.copy() | |
| # Apply genre filter | |
| if genres: | |
| filtered_games = filtered_games[ | |
| filtered_games['Genres'].str.contains(genres, case=False, na=False) | |
| ] | |
| # Apply tag filter | |
| if tags: | |
| filtered_games = filtered_games[ | |
| filtered_games['Tags'].str.contains(tags, case=False, na=False) | |
| ] | |
| # Apply price filter | |
| if price_max is not None: | |
| filtered_games = filtered_games[filtered_games['Price'] <= price_max] | |
| # Apply platform filters | |
| if platforms: | |
| platform_filter = False | |
| if 'Windows' in platforms and 'Windows' in filtered_games.columns: | |
| platform_filter = platform_filter | (filtered_games['Windows'] == True) | |
| if 'Mac' in platforms and 'Mac' in filtered_games.columns: | |
| platform_filter = platform_filter | (filtered_games['Mac'] == True) | |
| if 'Linux' in platforms and 'Linux' in filtered_games.columns: | |
| platform_filter = platform_filter | (filtered_games['Linux'] == True) | |
| if platform_filter is not False: | |
| filtered_games = filtered_games[platform_filter] | |
| # Apply rating filter | |
| if min_rating > 0: | |
| filtered_games = filtered_games[filtered_games['Rating_Score'] >= min_rating] | |
| # Apply minimum reviews filter | |
| if min_reviews > 0 and 'Total_Reviews' in filtered_games.columns: | |
| filtered_games = filtered_games[filtered_games['Total_Reviews'] >= min_reviews] | |
| # Sort by rating and return top N | |
| if len(filtered_games) > 0: | |
| # Sort by rating score (descending), then by number of reviews (descending) | |
| if 'Total_Reviews' in filtered_games.columns: | |
| recommendations = filtered_games.sort_values( | |
| ['Rating_Score', 'Total_Reviews'], | |
| ascending=[False, False] | |
| ).head(top_n) | |
| else: | |
| recommendations = filtered_games.sort_values('Rating_Score', ascending=False).head(top_n) | |
| # Select available columns | |
| available_columns = [ | |
| 'Name', 'Genres', 'Price', 'Rating_Score', 'Positive', 'Negative', | |
| 'Release date', 'About the game', 'Total_Reviews' | |
| ] | |
| result_columns = [col for col in available_columns if col in recommendations.columns] | |
| return recommendations[result_columns] | |
| else: | |
| return pd.DataFrame() | |
| def get_popular_games(self, top_n=10): | |
| """Get popular games based on reviews and ratings""" | |
| if self.df.empty: | |
| return pd.DataFrame() | |
| # Filter games with substantial reviews | |
| if 'Total_Reviews' in self.df.columns: | |
| popular_games = self.df[self.df['Total_Reviews'] > 100].copy() | |
| else: | |
| popular_games = self.df.copy() | |
| if len(popular_games) > 0: | |
| # Sort by rating and reviews | |
| if 'Total_Reviews' in popular_games.columns: | |
| popular_games = popular_games.sort_values( | |
| ['Rating_Score', 'Total_Reviews'], | |
| ascending=[False, False] | |
| ).head(top_n) | |
| else: | |
| popular_games = popular_games.sort_values('Rating_Score', ascending=False).head(top_n) | |
| available_columns = [ | |
| 'Name', 'Genres', 'Price', 'Rating_Score', 'Positive', 'Negative', | |
| 'Release date', 'About the game' | |
| ] | |
| result_columns = [col for col in available_columns if col in popular_games.columns] | |
| return popular_games[result_columns] | |
| else: | |
| return pd.DataFrame() | |
| def display_game_card(game, index): | |
| """Display a game card with consistent formatting""" | |
| rating = game.get('Rating_Score', 0) | |
| # Determine card color based on rating | |
| if rating >= 80: | |
| card_class = "similarity-high" | |
| rating_emoji = "๐ฅ" | |
| elif rating >= 60: | |
| card_class = "similarity-medium" | |
| rating_emoji = "โญ" | |
| else: | |
| card_class = "similarity-low" | |
| rating_emoji = "โ ๏ธ" | |
| with st.container(): | |
| st.markdown(f'<div class="recommendation-card {card_class}">', unsafe_allow_html=True) | |
| col1, col2, col3 = st.columns([3, 1, 1]) | |
| with col1: | |
| st.markdown(f'<h3 class="game-title">{index + 1}. {game.get("Name", "Unknown")}</h3>', unsafe_allow_html=True) | |
| st.write(f"**Genres:** {game.get('Genres', 'N/A')}") | |
| description = game.get('About the game', '') | |
| if description and len(description) > 50: | |
| st.write(f"**Description:** {description[:250]}...") | |
| with col2: | |
| price = game.get('Price', 0) | |
| st.metric("๐ฐ Price", f"${price:.2f}" if price > 0 else "Free") | |
| st.metric(f"{rating_emoji} Rating", f"{rating:.1f}%") | |
| with col3: | |
| release_date = game.get('Release date', 'N/A') | |
| st.write(f"**Release Date:** {release_date}") | |
| positive = game.get('Positive', 0) | |
| negative = game.get('Negative', 0) | |
| total_reviews = positive + negative | |
| if total_reviews > 0: | |
| st.write(f"**Reviews:** ๐ {int(positive):,} | ๐ {int(negative):,}") | |
| st.write(f"**Approval:** {(positive/total_reviews*100):.1f}%") | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| def display_text_chart(data, title): | |
| """Display a simple text-based chart when plotly is not available""" | |
| st.subheader(title) | |
| for item, count in data.items(): | |
| st.write(f"**{item}:** {count:,} games") | |
| def main(): | |
| # Header | |
| st.markdown('<h1 class="main-header">๐ฎ Steam Game Recommendation System</h1>', | |
| unsafe_allow_html=True) | |
| st.markdown("### Type 1: Feature-Based Recommendations | Full Dataset (80,000+ Games)") | |
| # Initialize session state | |
| if 'last_recommendations' not in st.session_state: | |
| st.session_state.last_recommendations = None | |
| # Load data | |
| df = load_data() | |
| if df.empty: | |
| st.error(""" | |
| โ Unable to load the dataset. This might be due to: | |
| - Network connectivity issues | |
| - Hugging Face API limitations | |
| - Dataset availability | |
| Please try refreshing the page or check back later. | |
| """) | |
| return | |
| # Preprocess data | |
| df = preprocess_data(df) | |
| recommender = SteamGameRecommender(df) | |
| if df.empty: | |
| return | |
| # Sidebar for filters | |
| st.sidebar.title("๐ Filter Games") | |
| st.sidebar.markdown(f"<div class='stat-card'><h3>๐ Loaded</h3><h2>{len(df):,}</h2><p>Games</p></div>", | |
| unsafe_allow_html=True) | |
| # Genre selection | |
| st.sidebar.subheader("๐ฏ Genres & Tags") | |
| if 'Genres' in df.columns: | |
| all_genres = set() | |
| for genres in df['Genres'].head(5000): | |
| if isinstance(genres, str): | |
| for genre in genres.split(','): | |
| genre_clean = genre.strip() | |
| if genre_clean and len(genre_clean) > 1: | |
| all_genres.add(genre_clean) | |
| all_genres = sorted(list(all_genres)) | |
| selected_genre = st.sidebar.selectbox("Select Genre", [""] + all_genres) | |
| else: | |
| selected_genre = "" | |
| # Tag selection | |
| if 'Tags' in df.columns: | |
| all_tags = set() | |
| for tags in df['Tags'].head(5000): | |
| if isinstance(tags, str): | |
| for tag in tags.split(','): | |
| tag_clean = tag.strip() | |
| if tag_clean and len(tag_clean) > 1: | |
| all_tags.add(tag_clean) | |
| all_tags = sorted(list(all_tags))[:150] | |
| selected_tag = st.sidebar.selectbox("Select Tag (Optional)", [""] + all_tags) | |
| else: | |
| selected_tag = "" | |
| # Price and rating filters | |
| st.sidebar.subheader("๐ฐ Price & Rating") | |
| max_price = st.sidebar.slider("Maximum Price ($)", 0, 100, 60) | |
| min_rating = st.sidebar.slider("Minimum Rating (%)", 0, 100, 70) | |
| min_reviews = st.sidebar.slider("Minimum Reviews", 0, 1000, 10) | |
| # Platforms | |
| st.sidebar.subheader("๐ฅ๏ธ Platforms") | |
| windows = st.sidebar.checkbox("Windows", value=True) | |
| mac = st.sidebar.checkbox("Mac") | |
| linux = st.sidebar.checkbox("Linux") | |
| platforms = [] | |
| if windows: platforms.append('Windows') | |
| if mac: platforms.append('Mac') | |
| if linux: platforms.append('Linux') | |
| # Number of recommendations | |
| st.sidebar.subheader("๐ Results") | |
| num_recommendations = st.sidebar.slider("Number of Recommendations", 5, 50, 15) | |
| # Recommendation buttons | |
| col1, col2 = st.sidebar.columns(2) | |
| with col1: | |
| if st.button("๐ฏ Get Recommendations", type="primary", use_container_width=True): | |
| with st.spinner(f'๐ Searching through {len(df):,} games...'): | |
| recommendations = recommender.recommend_by_features( | |
| genres=selected_genre, | |
| tags=selected_tag, | |
| price_max=max_price, | |
| platforms=platforms, | |
| min_rating=min_rating, | |
| min_reviews=min_reviews, | |
| top_n=num_recommendations | |
| ) | |
| st.session_state.last_recommendations = recommendations | |
| with col2: | |
| if st.button("๐ฅ Popular Games", use_container_width=True): | |
| with st.spinner('Finding popular games...'): | |
| recommendations = recommender.get_popular_games(top_n=num_recommendations) | |
| st.session_state.last_recommendations = recommendations | |
| # Display recommendations | |
| if st.session_state.last_recommendations is not None: | |
| recommendations = st.session_state.last_recommendations | |
| if len(recommendations) > 0: | |
| st.success(f"๐ Found {len(recommendations)} games matching your criteria!") | |
| for idx, (_, game) in enumerate(recommendations.iterrows()): | |
| display_game_card(game, idx) | |
| # Statistics | |
| st.subheader("๐ Recommendation Statistics") | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| avg_price = recommendations['Price'].mean() | |
| st.metric("Average Price", f"${avg_price:.2f}") | |
| with col2: | |
| avg_rating = recommendations['Rating_Score'].mean() | |
| st.metric("Average Rating", f"{avg_rating:.1f}%") | |
| with col3: | |
| total_positive = recommendations.get('Positive', pd.Series([0])).sum() | |
| st.metric("Total ๐ Reviews", f"{int(total_positive):,}") | |
| with col4: | |
| st.metric("Games Found", len(recommendations)) | |
| else: | |
| st.warning("No games found matching your criteria. Try adjusting your filters.") | |
| # Main area when no search is performed | |
| else: | |
| st.info(""" | |
| ๐ **Welcome to the Steam Game Recommender!** | |
| Use the sidebar filters to find your perfect games from our database of **80,000+ games**. | |
| """) | |
| # Dataset statistics | |
| if not df.empty: | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| total_games = len(df) | |
| st.metric("Total Games", f"{total_games:,}") | |
| with col2: | |
| avg_price = df['Price'].mean() | |
| st.metric("Average Price", f"${avg_price:.2f}") | |
| with col3: | |
| rated_games = df[df['Rating_Score'] > 0] | |
| avg_rating = rated_games['Rating_Score'].mean() if len(rated_games) > 0 else 0 | |
| st.metric("Average Rating", f"{avg_rating:.1f}%") | |
| with col4: | |
| free_games = len(df[df['Price'] == 0]) | |
| st.metric("Free Games", f"{free_games:,}") | |
| # Popular genres display | |
| st.subheader("๐ฏ Most Popular Genres") | |
| if 'Genres' in df.columns: | |
| genre_counts = pd.Series([ | |
| genre for genres in df['Genres'].head(10000) | |
| for genre in str(genres).split(',') | |
| if genre.strip() and len(genre.strip()) > 1 | |
| ]).value_counts().head(15) | |
| if len(genre_counts) > 0: | |
| if PLOTLY_AVAILABLE: | |
| try: | |
| fig_genres = px.bar( | |
| x=genre_counts.values, | |
| y=genre_counts.index, | |
| orientation='h', | |
| title='Top 15 Game Genres in Database', | |
| labels={'x': 'Number of Games', 'y': 'Genre'}, | |
| color=genre_counts.values, | |
| color_continuous_scale='viridis' | |
| ) | |
| fig_genres.update_layout(showlegend=False) | |
| st.plotly_chart(fig_genres, use_container_width=True) | |
| except Exception as e: | |
| st.warning(f"Plotly chart error: {e}") | |
| display_text_chart(genre_counts.head(10), "Top 10 Genres") | |
| else: | |
| display_text_chart(genre_counts.head(10), "Top 10 Genres") | |
| if __name__ == "__main__": | |
| main() |