Spaces:
Sleeping
Sleeping
Render notebook as HTML instead
Browse files- app.py +23 -33
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -13,6 +13,7 @@ from utils.notebook_utils import (
|
|
| 13 |
)
|
| 14 |
from dotenv import load_dotenv
|
| 15 |
import os
|
|
|
|
| 16 |
|
| 17 |
# TODOs:
|
| 18 |
# Improve UI code preview
|
|
@@ -64,6 +65,9 @@ def create_notebook_file(cells, notebook_name):
|
|
| 64 |
with open(notebook_name, "w") as f:
|
| 65 |
nbf.write(nb, f)
|
| 66 |
logging.info(f"Notebook {notebook_name} created successfully")
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
|
| 69 |
def get_first_rows_as_df(dataset: str, config: str, split: str, limit: int):
|
|
@@ -96,15 +100,15 @@ def longest_string_column(df):
|
|
| 96 |
|
| 97 |
|
| 98 |
def generate_eda_cells(dataset_id):
|
| 99 |
-
|
| 100 |
|
| 101 |
|
| 102 |
def generate_rag_cells(dataset_id):
|
| 103 |
-
|
| 104 |
|
| 105 |
|
| 106 |
def generate_embedding_cells(dataset_id):
|
| 107 |
-
|
| 108 |
|
| 109 |
|
| 110 |
def _push_to_hub(
|
|
@@ -135,20 +139,18 @@ def generate_cells(dataset_id, cells, notebook_type="eda"):
|
|
| 135 |
except Exception as err:
|
| 136 |
gr.Error("Unable to retrieve dataset info from HF Hub.")
|
| 137 |
logging.error(f"Failed to fetch compatible libraries: {err}")
|
| 138 |
-
return
|
| 139 |
|
| 140 |
if not libraries:
|
| 141 |
logging.error(f"Dataset not compatible with pandas library - not libraries")
|
| 142 |
-
|
| 143 |
-
return
|
| 144 |
pandas_library = next(
|
| 145 |
(lib for lib in libraries.get("libraries", []) if lib["library"] == "pandas"),
|
| 146 |
None,
|
| 147 |
)
|
| 148 |
if not pandas_library:
|
| 149 |
logging.error("Dataset not compatible with pandas library - not pandas library")
|
| 150 |
-
|
| 151 |
-
return
|
| 152 |
first_config_loading_code = pandas_library["loading_codes"][0]
|
| 153 |
first_code = first_config_loading_code["code"]
|
| 154 |
first_config = first_config_loading_code["config_name"]
|
|
@@ -166,48 +168,38 @@ def generate_cells(dataset_id, cells, notebook_type="eda"):
|
|
| 166 |
logging.error(
|
| 167 |
"Dataset does not have categorical columns, which are required for RAG generation."
|
| 168 |
)
|
| 169 |
-
|
| 170 |
"",
|
| 171 |
"## β This dataset does not have categorical columns, which are required for Embeddings/RAG generation β",
|
| 172 |
)
|
| 173 |
-
return
|
| 174 |
if notebook_type == "eda" and not (has_categoric_columns or has_numeric_columns):
|
| 175 |
logging.error(
|
| 176 |
"Dataset does not have categorical or numeric columns, which are required for EDA generation."
|
| 177 |
)
|
| 178 |
-
|
| 179 |
"",
|
| 180 |
"## β This dataset does not have categorical or numeric columns, which are required for EDA generation β",
|
| 181 |
)
|
| 182 |
-
return
|
| 183 |
|
| 184 |
cells = replace_wildcards(
|
| 185 |
cells, wildcards, replacements, has_numeric_columns, has_categoric_columns
|
| 186 |
)
|
| 187 |
-
|
| 188 |
-
# Show only the first 30 lines, would like to have a scroll in gr.Code https://github.com/gradio-app/gradio/issues/9192
|
| 189 |
-
for cell in cells:
|
| 190 |
-
if cell["cell_type"] == "markdown":
|
| 191 |
-
continue
|
| 192 |
-
generated_text += cell["source"] + "\n\n"
|
| 193 |
-
yield generated_text, ""
|
| 194 |
-
if generated_text.count("\n") > 30:
|
| 195 |
-
generated_text += (
|
| 196 |
-
f"## See more lines available in the generated notebook π€ ......"
|
| 197 |
-
)
|
| 198 |
-
yield generated_text, ""
|
| 199 |
-
break
|
| 200 |
notebook_name = f"{dataset_id.replace('/', '-')}-{notebook_type}.ipynb"
|
| 201 |
-
create_notebook_file(cells, notebook_name=notebook_name)
|
| 202 |
_push_to_hub(dataset_id, notebook_name)
|
| 203 |
notebook_link = f"https://colab.research.google.com/#fileId=https%3A//huggingface.co/datasets/asoria/dataset-notebook-creator-content/blob/main/{notebook_name}"
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
f"##
|
| 207 |
)
|
| 208 |
|
| 209 |
|
| 210 |
-
with gr.Blocks(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
gr.Markdown("# π€ Dataset notebook creator π΅οΈ")
|
| 212 |
with gr.Row(equal_height=True):
|
| 213 |
with gr.Column(scale=2):
|
|
@@ -262,9 +254,7 @@ with gr.Blocks(fill_height=True, fill_width=True) as demo:
|
|
| 262 |
)
|
| 263 |
|
| 264 |
with gr.Column(scale=2):
|
| 265 |
-
code_component = gr.
|
| 266 |
-
language="python", label="Notebook Code Preview", lines=40
|
| 267 |
-
)
|
| 268 |
go_to_notebook = gr.Markdown("", visible=True)
|
| 269 |
|
| 270 |
generate_eda_btn.click(
|
|
|
|
| 13 |
)
|
| 14 |
from dotenv import load_dotenv
|
| 15 |
import os
|
| 16 |
+
from nbconvert import HTMLExporter
|
| 17 |
|
| 18 |
# TODOs:
|
| 19 |
# Improve UI code preview
|
|
|
|
| 65 |
with open(notebook_name, "w") as f:
|
| 66 |
nbf.write(nb, f)
|
| 67 |
logging.info(f"Notebook {notebook_name} created successfully")
|
| 68 |
+
html_exporter = HTMLExporter()
|
| 69 |
+
html_data, _ = html_exporter.from_notebook_node(nb)
|
| 70 |
+
return html_data
|
| 71 |
|
| 72 |
|
| 73 |
def get_first_rows_as_df(dataset: str, config: str, split: str, limit: int):
|
|
|
|
| 100 |
|
| 101 |
|
| 102 |
def generate_eda_cells(dataset_id):
|
| 103 |
+
return generate_cells(dataset_id, eda_cells, "eda")
|
| 104 |
|
| 105 |
|
| 106 |
def generate_rag_cells(dataset_id):
|
| 107 |
+
return generate_cells(dataset_id, rag_cells, "rag")
|
| 108 |
|
| 109 |
|
| 110 |
def generate_embedding_cells(dataset_id):
|
| 111 |
+
return generate_cells(dataset_id, embeggins_cells, "embeddings")
|
| 112 |
|
| 113 |
|
| 114 |
def _push_to_hub(
|
|
|
|
| 139 |
except Exception as err:
|
| 140 |
gr.Error("Unable to retrieve dataset info from HF Hub.")
|
| 141 |
logging.error(f"Failed to fetch compatible libraries: {err}")
|
| 142 |
+
return "", "## β This dataset is not accessible from the Hub β"
|
| 143 |
|
| 144 |
if not libraries:
|
| 145 |
logging.error(f"Dataset not compatible with pandas library - not libraries")
|
| 146 |
+
return "", "## β This dataset is not compatible with pandas library β"
|
|
|
|
| 147 |
pandas_library = next(
|
| 148 |
(lib for lib in libraries.get("libraries", []) if lib["library"] == "pandas"),
|
| 149 |
None,
|
| 150 |
)
|
| 151 |
if not pandas_library:
|
| 152 |
logging.error("Dataset not compatible with pandas library - not pandas library")
|
| 153 |
+
return "", "## β This dataset is not compatible with pandas library β"
|
|
|
|
| 154 |
first_config_loading_code = pandas_library["loading_codes"][0]
|
| 155 |
first_code = first_config_loading_code["code"]
|
| 156 |
first_config = first_config_loading_code["config_name"]
|
|
|
|
| 168 |
logging.error(
|
| 169 |
"Dataset does not have categorical columns, which are required for RAG generation."
|
| 170 |
)
|
| 171 |
+
return (
|
| 172 |
"",
|
| 173 |
"## β This dataset does not have categorical columns, which are required for Embeddings/RAG generation β",
|
| 174 |
)
|
|
|
|
| 175 |
if notebook_type == "eda" and not (has_categoric_columns or has_numeric_columns):
|
| 176 |
logging.error(
|
| 177 |
"Dataset does not have categorical or numeric columns, which are required for EDA generation."
|
| 178 |
)
|
| 179 |
+
return (
|
| 180 |
"",
|
| 181 |
"## β This dataset does not have categorical or numeric columns, which are required for EDA generation β",
|
| 182 |
)
|
|
|
|
| 183 |
|
| 184 |
cells = replace_wildcards(
|
| 185 |
cells, wildcards, replacements, has_numeric_columns, has_categoric_columns
|
| 186 |
)
|
| 187 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
notebook_name = f"{dataset_id.replace('/', '-')}-{notebook_type}.ipynb"
|
| 189 |
+
html_content = create_notebook_file(cells, notebook_name=notebook_name)
|
| 190 |
_push_to_hub(dataset_id, notebook_name)
|
| 191 |
notebook_link = f"https://colab.research.google.com/#fileId=https%3A//huggingface.co/datasets/asoria/dataset-notebook-creator-content/blob/main/{notebook_name}"
|
| 192 |
+
return (
|
| 193 |
+
html_content,
|
| 194 |
+
f"## π Ready to explore? Play and run the generated notebook π [here]({notebook_link})!",
|
| 195 |
)
|
| 196 |
|
| 197 |
|
| 198 |
+
with gr.Blocks(
|
| 199 |
+
fill_height=True,
|
| 200 |
+
fill_width=True,
|
| 201 |
+
css="#box { height: 650px; overflow-y: scroll !important}",
|
| 202 |
+
) as demo:
|
| 203 |
gr.Markdown("# π€ Dataset notebook creator π΅οΈ")
|
| 204 |
with gr.Row(equal_height=True):
|
| 205 |
with gr.Column(scale=2):
|
|
|
|
| 254 |
)
|
| 255 |
|
| 256 |
with gr.Column(scale=2):
|
| 257 |
+
code_component = gr.HTML(elem_id="box")
|
|
|
|
|
|
|
| 258 |
go_to_notebook = gr.Markdown("", visible=True)
|
| 259 |
|
| 260 |
generate_eda_btn.click(
|
requirements.txt
CHANGED
|
@@ -3,4 +3,5 @@ huggingface_hub
|
|
| 3 |
nbformat
|
| 4 |
httpx
|
| 5 |
outlines
|
| 6 |
-
python-dotenv
|
|
|
|
|
|
| 3 |
nbformat
|
| 4 |
httpx
|
| 5 |
outlines
|
| 6 |
+
python-dotenv
|
| 7 |
+
nbconvert
|