Spaces:

timtjoe
/

wikides

Sleeping

wikides / app.py

remove data persist feature from app.py

2ef947c verified 3 months ago

1.78 kB

	import gradio as gr
	from datasets import load_dataset, Dataset

	# Define the dataset name and split
	DATASET_NAME = "Thang/wikides"
	SPLIT = "train"

	# Function to fetch data with a limit on demand
	def get_data_with_limit(limit: int = 50):
	"""
	Loads and fetches data from the dataset with a specified limit on each call.
	The limit defaults to 50 if not provided or if it's invalid.
	"""
	# Type and value checking for the limit parameter
	if not isinstance(limit, int) or limit < 1:
	limit = 50

	try:
	# Load the dataset on demand.
	# This will download the data each time if not in cache,
	# but prevents it from being stored on the disk permanently.
	dataset = load_dataset(DATASET_NAME, split=SPLIT)

	# Ensure the limit doesn't exceed the dataset size
	num_rows = len(dataset)
	if limit > num_rows:
	limit = num_rows

	# Slice the dataset to get the required number of rows and convert to a dictionary
	return dataset.select(range(limit)).to_dict()
	except Exception as e:
	# Return a structured error response if something goes wrong
	return {"error": "Internal Server Error", "message": str(e)}

	# Create the Gradio interface
	demo = gr.Interface(
	fn=get_data_with_limit,
	inputs=[
	gr.Number(label="limit", value=50, minimum=1)
	],
	outputs=[
	gr.JSON(label="Data")
	],
	title="WikiDES Data API",
	description=f"A simple API to access data from the {DATASET_NAME} dataset. The dataset is loaded on-demand for each request. The default limit is 50.",
	examples=[
	[10],
	[25],
	[100]
	],
	allow_flagging="never",
	api_name="data"
	)

	# Launch the Gradio app
	demo.launch()