File size: 1,777 Bytes
c0ab4e3 b2b7dda c0ab4e3 2ef947c c0ab4e3 2ef947c c0ab4e3 b2b7dda c0ab4e3 2ef947c b2b7dda c0ab4e3 2ef947c c0ab4e3 2ef947c c0ab4e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
from datasets import load_dataset, Dataset
# Define the dataset name and split
DATASET_NAME = "Thang/wikides"
SPLIT = "train"
# Function to fetch data with a limit on demand
def get_data_with_limit(limit: int = 50):
"""
Loads and fetches data from the dataset with a specified limit on each call.
The limit defaults to 50 if not provided or if it's invalid.
"""
# Type and value checking for the limit parameter
if not isinstance(limit, int) or limit < 1:
limit = 50
try:
# Load the dataset on demand.
# This will download the data each time if not in cache,
# but prevents it from being stored on the disk permanently.
dataset = load_dataset(DATASET_NAME, split=SPLIT)
# Ensure the limit doesn't exceed the dataset size
num_rows = len(dataset)
if limit > num_rows:
limit = num_rows
# Slice the dataset to get the required number of rows and convert to a dictionary
return dataset.select(range(limit)).to_dict()
except Exception as e:
# Return a structured error response if something goes wrong
return {"error": "Internal Server Error", "message": str(e)}
# Create the Gradio interface
demo = gr.Interface(
fn=get_data_with_limit,
inputs=[
gr.Number(label="limit", value=50, minimum=1)
],
outputs=[
gr.JSON(label="Data")
],
title="WikiDES Data API",
description=f"A simple API to access data from the **{DATASET_NAME}** dataset. The dataset is loaded on-demand for each request. The default limit is 50.",
examples=[
[10],
[25],
[100]
],
allow_flagging="never",
api_name="data"
)
# Launch the Gradio app
demo.launch() |