| import gradio as gr | |
| from datasets import load_dataset, Dataset | |
| # Define the dataset name and split | |
| DATASET_NAME = "Thang/wikides" | |
| SPLIT = "train" | |
| # Function to fetch data with a limit on demand | |
| def get_data_with_limit(limit: int = 50): | |
| """ | |
| Loads and fetches data from the dataset with a specified limit on each call. | |
| The limit defaults to 50 if not provided or if it's invalid. | |
| """ | |
| # Type and value checking for the limit parameter | |
| if not isinstance(limit, int) or limit < 1: | |
| limit = 50 | |
| try: | |
| # Load the dataset on demand. | |
| # This will download the data each time if not in cache, | |
| # but prevents it from being stored on the disk permanently. | |
| dataset = load_dataset(DATASET_NAME, split=SPLIT) | |
| # Ensure the limit doesn't exceed the dataset size | |
| num_rows = len(dataset) | |
| if limit > num_rows: | |
| limit = num_rows | |
| # Slice the dataset to get the required number of rows and convert to a dictionary | |
| return dataset.select(range(limit)).to_dict() | |
| except Exception as e: | |
| # Return a structured error response if something goes wrong | |
| return {"error": "Internal Server Error", "message": str(e)} | |
| # Create the Gradio interface | |
| demo = gr.Interface( | |
| fn=get_data_with_limit, | |
| inputs=[ | |
| gr.Number(label="limit", value=50, minimum=1) | |
| ], | |
| outputs=[ | |
| gr.JSON(label="Data") | |
| ], | |
| title="WikiDES Data API", | |
| description=f"A simple API to access data from the **{DATASET_NAME}** dataset. The dataset is loaded on-demand for each request. The default limit is 50.", | |
| examples=[ | |
| [10], | |
| [25], | |
| [100] | |
| ], | |
| allow_flagging="never", | |
| api_name="data" | |
| ) | |
| # Launch the Gradio app | |
| demo.launch() |