Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| from huggingface_hub import HfApi | |
| import io | |
| from datetime import datetime, timedelta | |
| import time | |
| import pyarrow as pa | |
| import pyarrow.parquet as pq | |
| import math | |
| import re | |
| import pyarrow.csv as csv | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| # Set page config for a wider layout and custom theme | |
| st.set_page_config(layout="wide", page_title="Job Listings Dashboard") | |
| # Custom CSS for black background and styling | |
| st.markdown(""" | |
| <style> | |
| .stApp { | |
| background-color: #000000; | |
| color: #FFFFFF; | |
| } | |
| .stButton>button { | |
| background-color: #4e79a7; | |
| color: white; | |
| } | |
| .stSelectbox, .stMultiSelect { | |
| color: #FFFFFF; | |
| } | |
| .stDataFrame { | |
| background-color: #1E1E1E; | |
| } | |
| .plotly-graph-div { | |
| background-color: #1E1E1E; | |
| } | |
| .big-font { | |
| font-size: 48px; | |
| font-weight: bold; | |
| text-align: center; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.markdown(""" | |
| <style> | |
| h1 { | |
| text-align: center; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Hugging Face setup | |
| HF_TOKEN = st.secrets["HF_TOKEN"] | |
| HF_USERNAME = st.secrets["HF_USERNAME"] | |
| DATASET_NAME = "jobeasz" | |
| import pyarrow.feather as feather | |
| def load_and_concat_data(): | |
| api = HfApi() | |
| dataset_files = api.list_repo_files(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", repo_type="dataset") | |
| feather_files = [file for file in dataset_files if file.endswith('.feather')] | |
| # Function to download and load a single file | |
| def download_and_load(file): | |
| try: | |
| file_content = api.hf_hub_download( | |
| repo_id=f"{HF_USERNAME}/{DATASET_NAME}", | |
| filename=file, | |
| repo_type="dataset", | |
| token=HF_TOKEN | |
| ) | |
| return feather.read_feather(file_content) | |
| except Exception as e: | |
| print(f"Error loading {file}: {str(e)}") | |
| return None | |
| # Download files in parallel | |
| all_data = [] | |
| with ThreadPoolExecutor(max_workers=20) as executor: | |
| future_to_file = {executor.submit(download_and_load, file): file for file in feather_files} | |
| for future in as_completed(future_to_file): | |
| df = future.result() | |
| if df is not None: | |
| all_data.append(df) | |
| if not all_data: | |
| return pd.DataFrame() | |
| # Rest of your processing logic remains the same | |
| concatenated_df = pd.concat(all_data, ignore_index=True) | |
| columns_to_keep = [ | |
| 'site', 'job_url', 'title', 'company', 'location', | |
| 'job_type', 'date_posted', 'is_remote', 'company_url' | |
| ] | |
| filtered_df = concatenated_df[columns_to_keep].reset_index(drop=True) | |
| filtered_df['date_posted'] = pd.to_datetime(filtered_df['date_posted'], errors='coerce') | |
| filtered_df = filtered_df[filtered_df['date_posted'].dt.year==2025] | |
| filtered_df['title'] = filtered_df['title'].str.lower() | |
| filtered_df['company'] = filtered_df['company'].str.lower() | |
| def clean_location(location): | |
| if pd.isna(location): | |
| return location | |
| location = location.lower() | |
| location = re.sub(r',\s*(us|usa)$', '', location) | |
| return location | |
| filtered_df['location'] = filtered_df['location'].apply(clean_location) | |
| filtered_df = filtered_df.drop_duplicates() | |
| return filtered_df | |
| def get_unique_values(df): | |
| return { | |
| 'companies': df['company'].unique(), | |
| 'locations': df['location'].unique(), | |
| 'job_types': df['job_type'].unique(), | |
| 'Role_Name': df['title'].unique(), | |
| 'Date_posted': df['date_posted'].unique() | |
| } | |
| def prepare_dashboard_data(df): | |
| top_companies = df['company'].value_counts().head(10) | |
| top_locations = df['location'].value_counts().head(10) | |
| top_job_titles = df['title'].value_counts().head(20) | |
| df_by_date = df.groupby('date_posted').size().reset_index(name='count') | |
| return top_companies, top_locations, top_job_titles, df_by_date | |
| def create_chart(data, _x, y, title, color_sequence): | |
| fig = px.bar(data, x=_x, y=y, title=title, color_discrete_sequence=color_sequence) | |
| fig.update_layout(plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', font_color='#FFFFFF') | |
| return fig | |
| def create_time_series(df, time_unit='day'): | |
| if time_unit == 'week': | |
| # Group by week and year | |
| df_by_date = df.groupby(df['date_posted'].dt.to_period('W')).size().reset_index(name='count') | |
| df_by_date['date_posted'] = df_by_date['date_posted'].dt.to_timestamp() | |
| else: | |
| # Keep daily grouping as before | |
| df_by_date = df.groupby('date_posted').size().reset_index(name='count') | |
| fig = px.line(df_by_date, x='date_posted', y='count', title="Job Postings Over Time", color_discrete_sequence=['#4e79a7']) | |
| fig.update_layout( | |
| plot_bgcolor='rgba(0,0,0,0)', | |
| paper_bgcolor='rgba(0,0,0,0)', | |
| font_color='#FFFFFF', | |
| xaxis_title="Date", | |
| yaxis_title="Number of Job Postings" | |
| ) | |
| # Adjust x-axis ticks for weekly view | |
| if time_unit == 'week': | |
| fig.update_xaxes( | |
| dtick="W1", | |
| tickformat="%d %b %Y", | |
| ticklabelmode="period" | |
| ) | |
| return fig | |
| def display_dashboard(df): | |
| top_companies, top_locations, top_job_titles, df_by_date = prepare_dashboard_data(df) | |
| today = datetime.now().date() | |
| jobs_today = df[df['date_posted'].dt.date == today].shape[0] | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Job Postings Overview") | |
| st.metric("Total Job Postings", len(df)) | |
| st.metric("Unique Companies", df['company'].nunique()) | |
| st.metric("Job Postings Today", jobs_today) | |
| min_date = df['date_posted'].min().date() | |
| max_date = df['date_posted'].max().date() | |
| st.write(f"Job postings from {min_date} to {max_date}") | |
| with col2: | |
| fig = create_chart(top_companies, top_companies.index, top_companies.values, "Top 10 Companies", ['#4e79a7']) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Job Postings Over Time Chart | |
| fig_time_series = create_time_series(df,time_unit='week') | |
| st.plotly_chart(fig_time_series, use_container_width=True) | |
| col3, col4 = st.columns(2) | |
| with col3: | |
| fig = create_chart(top_locations, top_locations.index, top_locations.values, "Top 10 Locations", ['#f28e2b']) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col4: | |
| fig = create_chart(top_job_titles, top_job_titles.index, top_job_titles.values, "Top 20 Job Titles", ['#59a14f']) | |
| st.plotly_chart(fig, use_container_width=True) | |
| def filter_dataframe(df, companies, locations, job_types,Role_Name,Date_posted): | |
| filtered_df = df | |
| if companies: | |
| filtered_df = filtered_df[filtered_df['company'].isin(companies)] | |
| if locations: | |
| filtered_df = filtered_df[filtered_df['location'].isin(locations)] | |
| if job_types: | |
| filtered_df = filtered_df[filtered_df['job_type'].isin(job_types)] | |
| if Role_Name: | |
| filtered_df = filtered_df[filtered_df['title'].isin(Role_Name)] | |
| if Date_posted: | |
| filtered_df = filtered_df[filtered_df['date_posted'].isin(Date_posted)] | |
| return filtered_df | |
| def display_data_explorer(df): | |
| st.subheader("Data Explorer") | |
| show_all = st.radio("Display", ("All Data", "Filtered Data")) | |
| if show_all == "Filtered Data": | |
| unique_values = get_unique_values(df) | |
| col1, col2, col3, col4,col5 = st.columns(5) | |
| with col1: | |
| companies = st.multiselect("Select Companies", options=unique_values['companies']) | |
| with col2: | |
| locations = st.multiselect("Select Locations", options=unique_values['locations']) | |
| with col3: | |
| job_types = st.multiselect("Select Job Types", options=unique_values['job_types']) | |
| with col4: | |
| Role_Name = st.multiselect("Select Role Types", options=unique_values['Role_Name']) | |
| with col5: | |
| Date_posted = st.multiselect("Select Date Posted", options=unique_values['Date_posted']) | |
| filtered_df = filter_dataframe(df, companies, locations, job_types, Role_Name,Date_posted) | |
| else: | |
| filtered_df = df | |
| filtered_df=filtered_df.sort_values(by='date_posted',ascending=False) | |
| st.write(f"Showing {len(filtered_df)} job listings") | |
| # Pagination | |
| items_per_page = 15 | |
| num_pages = math.ceil(len(filtered_df) / items_per_page) | |
| col1, col2, col3 = st.columns([1, 3, 1]) | |
| with col2: | |
| page = st.number_input("Page", min_value=1, max_value=num_pages, value=1) | |
| start_idx = (page - 1) * items_per_page | |
| end_idx = start_idx + items_per_page | |
| page_df = filtered_df.iloc[start_idx:end_idx] | |
| def make_clickable(url): | |
| return f'<a href="{url}" target="_blank" style="color: #4e79a7;">Link</a>' | |
| page_df['job_url'] = page_df['job_url'].apply(make_clickable) | |
| page_df['company_url'] = page_df['company_url'].apply(make_clickable) | |
| st.write(page_df.to_html(escape=False, index=False), unsafe_allow_html=True) | |
| col1, col2, col3 = st.columns([1, 3, 1]) | |
| with col2: | |
| st.write(f"Page {page} of {num_pages}") | |
| def display_about_page(): | |
| st.markdown(""" | |
| ## What is this application? | |
| The Job Listings Dashboard is a powerful tool designed to provide insights into the job market. It offers a comprehensive view of job postings, allowing users to explore trends, top companies, locations, and job titles. | |
| ### Key Features: | |
| - **Interactive Dashboard**: Visualize job market trends with dynamic charts and graphs. | |
| - **Data Explorer**: Dive deep into individual job listings with advanced filtering options. | |
| - **Real-time Data**: Fetch the latest job data from our Hugging Face dataset. | |
| ## How to use this application | |
| ### Dashboard | |
| 1. Navigate to the Dashboard using the sidebar. | |
| 2. View overall statistics such as total job postings, unique companies, and today's postings. | |
| 3. Explore interactive charts showing: | |
| - Top companies hiring | |
| - Job postings over time | |
| - Top locations for job opportunities | |
| - Most common job titles | |
| ### Data Explorer | |
| 1. Switch to the Data Explorer using the sidebar. | |
| 2. Choose between viewing all data or applying filters. | |
| 3. Use the multi-select dropdowns to filter by: | |
| - Companies | |
| - Locations | |
| - Job Types | |
| 4. Browse the filtered job listings table. | |
| 5. Click on job or company links to view more details on the original posting site. | |
| ## Data Source | |
| This application fetches data from my Private dataset which scrapes data from varoious job hosting portal and the data gets updated daily. | |
| ## Contact | |
| For questions, feedback, or collaboration opportunities, feel free to reach out: | |
| - LinkedIn: [Nihar Palem](https://www.linkedin.com/in/nihar-palem-1b955a183/) | |
| """) | |
| # Add a clickable LinkedIn button | |
| linkedin_url = "https://www.linkedin.com/in/nihar-palem-1b955a183/" | |
| st.markdown(f""" | |
| <a href="{linkedin_url}" target="_blank"> | |
| <img src="https://content.linkedin.com/content/dam/me/business/en-us/amp/brand-site/v2/bg/LI-Logo.svg.original.svg" width="100"> | |
| </a> | |
| """, unsafe_allow_html=True) | |
| def main(): | |
| st.title("Job Easz") | |
| # Load data | |
| df = load_and_concat_data() | |
| if df.empty: | |
| st.error("No data available. Please check your dataset.") | |
| return | |
| # Sidebar for navigation | |
| st.sidebar.title("Navigation") | |
| page = st.sidebar.radio("Go to", ["Dashboard", "Data Explorer", "About"]) | |
| # Navigation logic | |
| if page == "Dashboard": | |
| display_dashboard(df) | |
| elif page == "Data Explorer": | |
| display_data_explorer(df) | |
| elif page == "About": | |
| display_about_page() | |
| if __name__ == "__main__": | |
| main() | 
