| import json | |
| import os | |
| import urllib.parse | |
| import gradio as gr | |
| import requests | |
| from gradio_huggingfacehub_search import HuggingfaceHubSearch | |
| from huggingface_hub import InferenceClient | |
| example = HuggingfaceHubSearch().example_value() | |
| client = InferenceClient( | |
| "meta-llama/Meta-Llama-3.1-70B-Instruct", | |
| token=os.environ["HF_TOKEN"], | |
| ) | |
| def get_iframe(hub_repo_id, sql_query=None): | |
| if sql_query: | |
| sql_query = urllib.parse.quote(sql_query) | |
| url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer?sql_console=true&sql={sql_query}" | |
| else: | |
| url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer" | |
| iframe = f""" | |
| <iframe | |
| src="{url}" | |
| frameborder="0" | |
| width="100%" | |
| height="800px" | |
| ></iframe> | |
| """ | |
| return iframe | |
| def get_column_info(hub_repo_id): | |
| url: str = f"https://datasets-server.huggingface.co/info?dataset={hub_repo_id}" | |
| response = requests.get(url) | |
| try: | |
| data = response.json() | |
| data = data.get("dataset_info") | |
| key = list(data.keys())[0] | |
| features: str = json.dumps(data.get(key).get("features")) | |
| except Exception as e: | |
| gr.Error(f"Error getting column info: {e}") | |
| return features | |
| def query_dataset(hub_repo_id, features, query): | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": "You are a helpful assistant that returns a DuckDB SQL query based on the user's query and dataset features. Only return the SQL query, no other text.", | |
| }, | |
| { | |
| "role": "user", | |
| "content": f"""table train | |
| # Features | |
| {features} | |
| # Query | |
| {query} | |
| """, | |
| }, | |
| ] | |
| response = client.chat_completion( | |
| messages=messages, | |
| max_tokens=1000, | |
| stream=False, | |
| ) | |
| query = response.choices[0].message.content | |
| return query, get_iframe(hub_repo_id, query) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("""# π₯ π¦ π€ Text To SQL Hub Datasets π₯ π¦ π€ | |
| This is a basic text to SQL tool that allows you to query datasets on Huggingface Hub. | |
| It is built with [DuckDB](https://duckdb.org/), [Huggingface's Inference API](https://huggingface.co/docs/api-inference/index), and [LLama 3.1 70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct). | |
| Also, it uses the [dataset-server API](https://redocly.github.io/redoc/?url=https://datasets-server.huggingface.co/openapi.json#operation/isValidDataset). | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| search_in = HuggingfaceHubSearch( | |
| label="Search Huggingface Hub", | |
| placeholder="Search for models on Huggingface", | |
| search_type="dataset", | |
| ) | |
| btn = gr.Button("Show Dataset") | |
| with gr.Row(): | |
| search_out = gr.HTML(label="Search Results") | |
| with gr.Row(): | |
| features = gr.Code(label="Features", language="json", visible=False) | |
| with gr.Row(): | |
| query = gr.Textbox( | |
| label="Natural Language Query", | |
| placeholder="Enter a natural language query to generate SQL", | |
| ) | |
| with gr.Row(): | |
| sql_out = gr.Code(label="SQL Query") | |
| with gr.Row(): | |
| btn2 = gr.Button("Query Dataset") | |
| gr.on( | |
| [btn.click, search_in.submit], | |
| fn=get_iframe, | |
| inputs=[search_in], | |
| outputs=[search_out], | |
| ).then( | |
| fn=get_column_info, | |
| inputs=[search_in], | |
| outputs=[features], | |
| ) | |
| btn2.click( | |
| fn=query_dataset, | |
| inputs=[search_in, features, query], | |
| outputs=[sql_out, search_out], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |