Spaces:
Build error
Build error
| import pandas as pd | |
| import ydata_profiling | |
| import gradio as gr | |
| from pydantic_settings import BaseSettings | |
| from tempfile import NamedTemporaryFile | |
| import sweetviz as sv | |
| import sketch | |
| #from datatile.summary.df import DataFrameSummary | |
| from autoviz.AutoViz_Class import AutoViz_Class | |
| from traceml.summary.df import DataFrameSummary | |
| import os | |
| df1=gr.State() | |
| def variable_table(df): | |
| """ | |
| Analyzes a DataFrame and categorizes variables with colorful HTML formatting. | |
| Args: | |
| df (pandas.DataFrame): The DataFrame to analyze. | |
| Returns: | |
| str: HTML code representing the analysis results with colorful highlights. | |
| """ | |
| # Analyze variable types | |
| numerical_vars=[] | |
| text_vars=[] | |
| categorical_vars=[] | |
| # Identify categorical, numerical, and text columns | |
| # Iterate over columns | |
| for col in df.columns: | |
| print(col ) | |
| unique_values = df[col].nunique() | |
| total_values = df[col].count() | |
| dtype_kind = df[col].dtype.kind | |
| print( 'unique value-',unique_values,' total_values ',total_values,dtype_kind) | |
| # Check criteria for numerical, text, and categorical variables | |
| if (dtype_kind == 'f' or dtype_kind == 'i') and unique_values >= total_values / 10: | |
| numerical_vars.append(col) | |
| print('Numerical') | |
| elif (dtype_kind == 'O' or dtype_kind == 'b') and unique_values > total_values / 10: | |
| text_vars.append(col) | |
| print('Text') | |
| else: | |
| categorical_vars.append(col) | |
| print('categorical') | |
| print(categorical_vars,numerical_vars,text_vars) | |
| gr.Info(f"Column Analysis completed. {len(categorical_vars)} categorical variables , {len(numerical_vars)} numerical variables , {len(text_vars)} Text variables found!") | |
| # Build HTML table with styles | |
| table_style = 'border: 1px solid #ddd; border-collapse: collapse; text-align: left; font-size: 14px;' | |
| header_style = 'background-color: #f2f2f2; padding: 5px 10px;' | |
| data_style = 'padding: 5px 10px; border-bottom: 1px solid #ddd;' | |
| category_color = '#90ee90' # Light green for categorical | |
| numerical_color = '#add8e6' # Light blue for numerical | |
| text_color = '#ffd9b3' # Light yellow for text | |
| html = f"<table style='{table_style}'>" | |
| html += f"<tr><th style='{header_style}'>Variable Type</th><th style='{header_style}'>Columns</th></tr>" | |
| # Add rows for each variable type with coloring | |
| if categorical_vars: | |
| html += f"<tr style='background-color: {category_color};'><td>Categorical</td><td style='{data_style}'>{', '.join(categorical_vars)}</td></tr>" | |
| if numerical_vars: | |
| html += f"<tr style='background-color: {numerical_color};'><td>Numerical</td><td style='{data_style}'>{', '.join(numerical_vars)}</td></tr>" | |
| if text_vars: | |
| html += f"<tr style='background-color: {text_color};'><td>Text</td><td style='{data_style}'>{', '.join(text_vars)}</td></tr>" | |
| # Handle cases where no variables are found | |
| if not (categorical_vars or numerical_vars or text_vars): | |
| html += "<tr><td>No variables found!</td></tr>" | |
| html += "</table>" | |
| return html | |
| def generate_report(file, type): | |
| df = pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file) | |
| pandas_html_report =ydata_profiling.ProfileReport(df).to_html() | |
| temp_file1 = NamedTemporaryFile(delete=False, suffix=".html") | |
| temp_file1.write(pandas_html_report.encode('utf-8')) | |
| temp_file1.close() | |
| print('Pandas completed') | |
| gr.Info('Pandas Profiling report completed') | |
| # dataprep_report = create_report(df) | |
| # temp_file2 = NamedTemporaryFile(delete=False, suffix=".html") | |
| # temp_file2.write(dataprep_report.encode('utf-8')) | |
| # temp_file2.close() | |
| def convert_mixed_to_str(df): | |
| mixed_cols = df.select_dtypes(include=['object']).columns # Detect object dtype (usually mixed) | |
| for col in mixed_cols: | |
| df[col] = df[col].astype(str) # Convert mixed columns to strings | |
| return df | |
| df = convert_mixed_to_str(df.copy()) | |
| df.columns = df.columns.str.upper() | |
| df1.value=df | |
| sweetviz_report = sv.analyze(df) | |
| #sweetviz_report=sweetviz_report.show_html() | |
| #print(type(sweetviz_report)) | |
| report=sweetviz_report.show_html( filepath='SWEETVIZ_REPORT.html', | |
| open_browser=False, | |
| layout='widescreen', | |
| scale=None) | |
| HTMLFileToBeOpened = open('SWEETVIZ_REPORT.html', "r") | |
| # Reading the file and storing in a variable | |
| contents = HTMLFileToBeOpened.read() | |
| print('sweetviz completed') | |
| gr.Info('Sweetviz report completed') | |
| temp_file3 = NamedTemporaryFile(delete=False, suffix=".html") | |
| temp_file3.write(contents.encode('utf-8')) | |
| temp_file3.close() | |
| dfviz=df | |
| try: | |
| AV = AutoViz_Class() | |
| gr.Info('Starting Autoviz ..it may take a while..pls be patient') | |
| dft = AV.AutoViz(filename="", sep=',', dfte=df, header=0, verbose=1, | |
| lowess=False,chart_format='html',max_rows_analyzed=150000, | |
| max_cols_analyzed=30,save_plot_dir=None) | |
| reports = [] | |
| print(os.listdir("./")) | |
| output_dir_html="./AutoViz_Plots/AutoViz" | |
| for filename in os.listdir(output_dir_html): | |
| if filename.endswith(".html"): | |
| title = os.path.splitext(filename)[0] # Extract title from filename (without extension) | |
| reports.append(title) | |
| reports.append(output_dir_html+'/'+filename) | |
| print(reports) | |
| dft = AV.AutoViz(filename='', sep=',', header=0, verbose=2,dfte=df, | |
| lowess=False,chart_format='png',max_rows_analyzed=150000, | |
| max_cols_analyzed=30,save_plot_dir=None) | |
| imgs = [] | |
| output_dir_img="./AutoViz_Plots/AutoViz" | |
| for filename in os.listdir(output_dir_img): | |
| if filename.endswith(".png"): | |
| title = os.path.splitext(filename)[0] # Extract title from filename (without extension) | |
| imgs.append(title) | |
| imgs.append(output_dir_img+'/'+filename) | |
| print(imgs) | |
| except Exception as e: | |
| dfviz=df | |
| print(e) | |
| print('Exception during Autoviz') | |
| while len(reports)<10: | |
| # Comparing the lengths and appending t and m to output if needed | |
| t = gr.Markdown(visible=False) | |
| m = gr.File(visible=False) | |
| reports.append(t) | |
| reports.append(m) | |
| while len(imgs)<10: | |
| # Comparing the lengths and appending t and m to output if needed | |
| t = gr.Markdown(visible=False) | |
| m = gr.Image(visible=False) | |
| imgs.append(t) | |
| imgs.append(m) | |
| dfs = DataFrameSummary(df) | |
| sd=dfs.summary() | |
| sd.index.name = 'PARAMETERS' | |
| sd1= sd.reset_index(drop=False) | |
| def highlight_cols(x): | |
| df = x.copy() | |
| df.loc[:, :] = 'color: purple' | |
| df[['PARAMETERS']] = 'color: red' | |
| return df | |
| # Applying the style function | |
| sd1 = sd1.style.apply(highlight_cols, axis = None) | |
| sd1.columns = sd1.columns.str.upper() | |
| return [temp_file1.name ,temp_file3.name ,variable_table(df),dfviz,sd1]+reports+imgs | |
| def query_chat(query): | |
| df=df1.value | |
| print(df.sketch.ask(query)) | |
| return df.sketch.ask(query) | |
| def handle_click(file, sheet_name): | |
| print('Handle click') | |
| results = generate_report(file, sheet_name) | |
| return results | |
| def update_sheets(file): | |
| sheet_names = pd.ExcelFile(file).sheet_names | |
| print('Sheet updated') | |
| sheet_dropdown = gr.Dropdown( | |
| label="Select sheet", choices=[sheet_names], visible=True # Initially disabled | |
| ) | |
| return sheet_dropdown | |
| with gr.Blocks() as cluster: | |
| with gr.Column(): | |
| gr.HTML(value="""<h1 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">EXCEL ANALYSIS AND INSIGHTS</h1>""") | |
| with gr.Row(): | |
| file=gr.File(file_types=['.csv', '.xlsx'], label="Upload a CSV or Excel file") | |
| btn=gr.Button(value="Download Report") | |
| with gr.Row(): | |
| gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">PANDAS REPORT</h2>""") | |
| out1=gr.File(label="Download CSV") | |
| gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">SWEETVIZ REPORT</h2>""") | |
| out2=gr.File(label="Download CSV") | |
| sheet_dropdown = gr.Dropdown( | |
| label="Select sheet", choices=[], visible=False # Initially disabled | |
| ) | |
| with gr.Column(): | |
| gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS IN FILE</h2>""") | |
| #gr.Markdown("***Uploaded File***") | |
| var=gr.HTML() | |
| with gr.Column(): | |
| gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"UPLOADED FILE</h2>""") | |
| #gr.Markdown("#Uploaded File") | |
| dataframe1=gr.Dataframe() | |
| with gr.Column(): | |
| gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS STATS AND ANALYSIS</h2>""") | |
| #gr.Markdown("#Columns Analysis") | |
| dataframe2=gr.Dataframe() | |
| with gr.Column(): | |
| gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"ASK QUESTIONS ON THE COLUMNS </h2>""") | |
| with gr.Row(): | |
| query=gr.Textbox(label="Enter any question on data columns", | |
| info="Natural language query", | |
| lines=1, | |
| value="What is maximum value of first column") | |
| #outlabel=gr.HighlightedText(label='Query Result ') | |
| outbox=gr.Textbox(label='Query Result') | |
| query_btn=gr.Button(value='Submit Query') | |
| query_btn.click(query_chat,inputs=query,outputs=outbox) | |
| #Autoviz insights graphs | |
| gr.HTML(value="""<h3 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">Insight Visualisations</h3>""") | |
| with gr.Column(): | |
| plots2 = [] | |
| for i in range(5): | |
| t = gr.Markdown() | |
| with gr.Column(scale=1): | |
| m=gr.Image() | |
| plots2.append(t) | |
| plots2.append(m) | |
| print(type(plots2[0])) | |
| gr.HTML(value="""<h3 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">Insight graphs</h3>""") | |
| with gr.Row(): | |
| plots1 = [] | |
| for i in range(5): | |
| with gr.Row(): | |
| t = gr.Markdown() | |
| m=gr.File() | |
| plots1.append(t) | |
| plots1.append(m) | |
| print(type(plots1[0])) | |
| btn.click(handle_click, inputs=[file, sheet_dropdown], outputs=[out1, out2, var,dataframe1, dataframe2]+plots1+plots2) | |
| file.change(update_sheets, inputs=file, outputs=sheet_dropdown) | |
| #btn.click(generate_report,inputs=[file],outputs=[out1,out2,var,dataframe1,dataframe2]) | |
| cluster.launch(debug=True) | |