wikides / app.py
timtjoe's picture
remove data persist feature from app.py
2ef947c verified
import gradio as gr
from datasets import load_dataset, Dataset
# Define the dataset name and split
DATASET_NAME = "Thang/wikides"
SPLIT = "train"
# Function to fetch data with a limit on demand
def get_data_with_limit(limit: int = 50):
"""
Loads and fetches data from the dataset with a specified limit on each call.
The limit defaults to 50 if not provided or if it's invalid.
"""
# Type and value checking for the limit parameter
if not isinstance(limit, int) or limit < 1:
limit = 50
try:
# Load the dataset on demand.
# This will download the data each time if not in cache,
# but prevents it from being stored on the disk permanently.
dataset = load_dataset(DATASET_NAME, split=SPLIT)
# Ensure the limit doesn't exceed the dataset size
num_rows = len(dataset)
if limit > num_rows:
limit = num_rows
# Slice the dataset to get the required number of rows and convert to a dictionary
return dataset.select(range(limit)).to_dict()
except Exception as e:
# Return a structured error response if something goes wrong
return {"error": "Internal Server Error", "message": str(e)}
# Create the Gradio interface
demo = gr.Interface(
fn=get_data_with_limit,
inputs=[
gr.Number(label="limit", value=50, minimum=1)
],
outputs=[
gr.JSON(label="Data")
],
title="WikiDES Data API",
description=f"A simple API to access data from the **{DATASET_NAME}** dataset. The dataset is loaded on-demand for each request. The default limit is 50.",
examples=[
[10],
[25],
[100]
],
allow_flagging="never",
api_name="data"
)
# Launch the Gradio app
demo.launch()