File size: 1,777 Bytes
c0ab4e3
 
 
b2b7dda
 
 
c0ab4e3
2ef947c
c0ab4e3
 
2ef947c
c0ab4e3
 
b2b7dda
c0ab4e3
 
 
 
2ef947c
 
 
 
 
 
 
 
 
 
b2b7dda
c0ab4e3
 
2ef947c
c0ab4e3
 
 
 
 
 
 
 
 
 
 
 
2ef947c
c0ab4e3
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import gradio as gr
from datasets import load_dataset, Dataset

# Define the dataset name and split
DATASET_NAME = "Thang/wikides"
SPLIT = "train"

# Function to fetch data with a limit on demand
def get_data_with_limit(limit: int = 50):
    """
    Loads and fetches data from the dataset with a specified limit on each call.
    The limit defaults to 50 if not provided or if it's invalid.
    """
    # Type and value checking for the limit parameter
    if not isinstance(limit, int) or limit < 1:
        limit = 50

    try:
        # Load the dataset on demand.
        # This will download the data each time if not in cache,
        # but prevents it from being stored on the disk permanently.
        dataset = load_dataset(DATASET_NAME, split=SPLIT)
        
        # Ensure the limit doesn't exceed the dataset size
        num_rows = len(dataset)
        if limit > num_rows:
            limit = num_rows

        # Slice the dataset to get the required number of rows and convert to a dictionary
        return dataset.select(range(limit)).to_dict()
    except Exception as e:
        # Return a structured error response if something goes wrong
        return {"error": "Internal Server Error", "message": str(e)}

# Create the Gradio interface
demo = gr.Interface(
    fn=get_data_with_limit,
    inputs=[
        gr.Number(label="limit", value=50, minimum=1)
    ],
    outputs=[
        gr.JSON(label="Data")
    ],
    title="WikiDES Data API",
    description=f"A simple API to access data from the **{DATASET_NAME}** dataset. The dataset is loaded on-demand for each request. The default limit is 50.",
    examples=[
        [10],
        [25],
        [100]
    ],
    allow_flagging="never",
    api_name="data"
)

# Launch the Gradio app
demo.launch()