Spaces:

asoria
/

auto-notebook-creator

Sleeping

App Files Files Community

asoria commited on Sep 5, 2024

Commit

90bcf2d

1 Parent(s): 117da13

Render notebook as HTML instead

Browse files

Files changed (2) hide show

app.py +23 -33
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -13,6 +13,7 @@ from utils.notebook_utils import (
 )
 from dotenv import load_dotenv
 import os
 # TODOs:
 # Improve UI code preview
@@ -64,6 +65,9 @@ def create_notebook_file(cells, notebook_name):
     with open(notebook_name, "w") as f:
         nbf.write(nb, f)
     logging.info(f"Notebook {notebook_name} created successfully")
 def get_first_rows_as_df(dataset: str, config: str, split: str, limit: int):
@@ -96,15 +100,15 @@ def longest_string_column(df):
 def generate_eda_cells(dataset_id):
-    yield from generate_cells(dataset_id, eda_cells, "eda")
 def generate_rag_cells(dataset_id):
-    yield from generate_cells(dataset_id, rag_cells, "rag")
 def generate_embedding_cells(dataset_id):
-    yield from generate_cells(dataset_id, embeggins_cells, "embeddings")
 def _push_to_hub(
@@ -135,20 +139,18 @@ def generate_cells(dataset_id, cells, notebook_type="eda"):
     except Exception as err:
         gr.Error("Unable to retrieve dataset info from HF Hub.")
         logging.error(f"Failed to fetch compatible libraries: {err}")
-        return []
     if not libraries:
         logging.error(f"Dataset not compatible with pandas library - not libraries")
-        yield "", "## ❌ This dataset is not compatible with pandas library ❌"
-        return
     pandas_library = next(
         (lib for lib in libraries.get("libraries", []) if lib["library"] == "pandas"),
         None,
     )
     if not pandas_library:
         logging.error("Dataset not compatible with pandas library - not pandas library")
-        yield "", "## ❌ This dataset is not compatible with pandas library ❌"
-        return
     first_config_loading_code = pandas_library["loading_codes"][0]
     first_code = first_config_loading_code["code"]
     first_config = first_config_loading_code["config_name"]
@@ -166,48 +168,38 @@ def generate_cells(dataset_id, cells, notebook_type="eda"):
         logging.error(
             "Dataset does not have categorical columns, which are required for RAG generation."
         )
-        yield (
             "",
             "## ❌ This dataset does not have categorical columns, which are required for Embeddings/RAG generation ❌",
         )
-        return
     if notebook_type == "eda" and not (has_categoric_columns or has_numeric_columns):
         logging.error(
             "Dataset does not have categorical or numeric columns, which are required for EDA generation."
         )
-        yield (
             "",
             "## ❌ This dataset does not have categorical or numeric columns, which are required for EDA generation ❌",
         )
-        return
     cells = replace_wildcards(
         cells, wildcards, replacements, has_numeric_columns, has_categoric_columns
     )
-    generated_text = ""
-    # Show only the first 30 lines, would like to have a scroll in gr.Code https://github.com/gradio-app/gradio/issues/9192
-    for cell in cells:
-        if cell["cell_type"] == "markdown":
-            continue
-        generated_text += cell["source"] + "\n\n"
-        yield generated_text, ""
-        if generated_text.count("\n") > 30:
-            generated_text += (
-                f"## See more lines available in the generated notebook 🤗 ......"
-            )
-            yield generated_text, ""
-            break
     notebook_name = f"{dataset_id.replace('/', '-')}-{notebook_type}.ipynb"
-    create_notebook_file(cells, notebook_name=notebook_name)
     _push_to_hub(dataset_id, notebook_name)
     notebook_link = f"https://colab.research.google.com/#fileId=https%3A//huggingface.co/datasets/asoria/dataset-notebook-creator-content/blob/main/{notebook_name}"
-    yield (
-        generated_text,
-        f"## ✅ Here you have the [generated notebook]({notebook_link}) ✅",
     )
-with gr.Blocks(fill_height=True, fill_width=True) as demo:
     gr.Markdown("# 🤖 Dataset notebook creator 🕵️")
     with gr.Row(equal_height=True):
         with gr.Column(scale=2):
@@ -262,9 +254,7 @@ with gr.Blocks(fill_height=True, fill_width=True) as demo:
                 )
         with gr.Column(scale=2):
-            code_component = gr.Code(
-                language="python", label="Notebook Code Preview", lines=40
-            )
             go_to_notebook = gr.Markdown("", visible=True)
     generate_eda_btn.click(

 )
 from dotenv import load_dotenv
 import os
+from nbconvert import HTMLExporter
 # TODOs:
 # Improve UI code preview
     with open(notebook_name, "w") as f:
         nbf.write(nb, f)
     logging.info(f"Notebook {notebook_name} created successfully")
+    html_exporter = HTMLExporter()
+    html_data, _ = html_exporter.from_notebook_node(nb)
+    return html_data
 def get_first_rows_as_df(dataset: str, config: str, split: str, limit: int):
 def generate_eda_cells(dataset_id):
+    return generate_cells(dataset_id, eda_cells, "eda")
 def generate_rag_cells(dataset_id):
+    return generate_cells(dataset_id, rag_cells, "rag")
 def generate_embedding_cells(dataset_id):
+    return generate_cells(dataset_id, embeggins_cells, "embeddings")
 def _push_to_hub(
     except Exception as err:
         gr.Error("Unable to retrieve dataset info from HF Hub.")
         logging.error(f"Failed to fetch compatible libraries: {err}")
+        return "", "## ❌ This dataset is not accessible from the Hub ❌"
     if not libraries:
         logging.error(f"Dataset not compatible with pandas library - not libraries")
+        return "", "## ❌ This dataset is not compatible with pandas library ❌"
     pandas_library = next(
         (lib for lib in libraries.get("libraries", []) if lib["library"] == "pandas"),
         None,
     )
     if not pandas_library:
         logging.error("Dataset not compatible with pandas library - not pandas library")
+        return "", "## ❌ This dataset is not compatible with pandas library ❌"
     first_config_loading_code = pandas_library["loading_codes"][0]
     first_code = first_config_loading_code["code"]
     first_config = first_config_loading_code["config_name"]
         logging.error(
             "Dataset does not have categorical columns, which are required for RAG generation."
         )
+        return (
             "",
             "## ❌ This dataset does not have categorical columns, which are required for Embeddings/RAG generation ❌",
         )
     if notebook_type == "eda" and not (has_categoric_columns or has_numeric_columns):
         logging.error(
             "Dataset does not have categorical or numeric columns, which are required for EDA generation."
         )
+        return (
             "",
             "## ❌ This dataset does not have categorical or numeric columns, which are required for EDA generation ❌",
         )
     cells = replace_wildcards(
         cells, wildcards, replacements, has_numeric_columns, has_categoric_columns
     )
     notebook_name = f"{dataset_id.replace('/', '-')}-{notebook_type}.ipynb"
+    html_content = create_notebook_file(cells, notebook_name=notebook_name)
     _push_to_hub(dataset_id, notebook_name)
     notebook_link = f"https://colab.research.google.com/#fileId=https%3A//huggingface.co/datasets/asoria/dataset-notebook-creator-content/blob/main/{notebook_name}"
+    return (
+        html_content,
+        f"## 🎉 Ready to explore? Play and run the generated notebook 👉 [here]({notebook_link})!",
     )
+with gr.Blocks(
+    fill_height=True,
+    fill_width=True,
+    css="#box { height: 650px; overflow-y: scroll !important}",
+) as demo:
     gr.Markdown("# 🤖 Dataset notebook creator 🕵️")
     with gr.Row(equal_height=True):
         with gr.Column(scale=2):
                 )
         with gr.Column(scale=2):
+            code_component = gr.HTML(elem_id="box")
             go_to_notebook = gr.Markdown("", visible=True)
     generate_eda_btn.click(

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ huggingface_hub
 nbformat
 httpx
 outlines
-python-dotenv

 nbformat
 httpx
 outlines
+python-dotenv
+nbconvert