Spaces:

DexterSptizu
/

table-gpt-query-csv-file

Runtime error

App Files Files Community

table-gpt-query-csv-file / app.py

DexterSptizu

Create app.py

5d000a0 verified 12 months ago

raw

history blame

3.49 kB

	import streamlit as st
	import pandas as pd
	from io import StringIO
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	# Predefined example CSV content
	EXAMPLE_CSV_CONTENT = """
	"Loss","Date","Score","Opponent","Record","Attendance"
	"Hampton (14–12)","September 25","8–7","Padres","67–84","31,193"
	"Speier (5–3)","September 26","3–1","Padres","67–85","30,711"
	"Elarton (4–9)","September 22","3–1","@ Expos","65–83","9,707"
	"Lundquist (0–1)","September 24","15–11","Padres","67–83","30,774"
	"Hampton (13–11)","September 6","9–5","Dodgers","61–78","31,407"
	"""

	# Load the model and tokenizer
	@st.cache_resource
	def load_model_and_tokenizer():
	model_name = "tablegpt/TableGPT2-7B"
	model = AutoModelForCausalLM.from_pretrained(
	model_name, torch_dtype="auto", device_map="auto"
	)
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	return model, tokenizer

	model, tokenizer = load_model_and_tokenizer()

	# Application UI
	st.title("Table Question Answering App")
	st.write(
	"""
	This app uses a language model to answer questions about tabular data.
	You can upload your own CSV file or use a predefined example to test it.
	"""
	)

	# Sidebar for input options
	st.sidebar.header("Input Options")
	data_source = st.sidebar.radio("Choose a data source:", ("Example CSV", "Upload CSV"))

	if data_source == "Example CSV":
	st.subheader("Using Example CSV Data")
	csv_file = StringIO(EXAMPLE_CSV_CONTENT)
	df = pd.read_csv(csv_file)
	else:
	st.subheader("Upload Your CSV File")
	uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
	if uploaded_file is not None:
	df = pd.read_csv(uploaded_file)
	else:
	st.warning("Please upload a CSV file to proceed.")
	st.stop()

	# Display the loaded dataframe
	st.write("### Data Preview")
	st.dataframe(df)

	# Question Input
	st.write("### Ask a Question")
	question = st.text_input("Enter your question:", "哪些比赛的战绩达到了40胜40负？")

	# Generate response if question is provided
	if question:
	example_prompt_template = """Given access to several pandas dataframes, write the Python code to answer the user's question.

	/*
	"{var_name}.head(5).to_string(index=False)" as follows:
	{df_info}
	*/

	Question: {user_question}
	"""
	prompt = example_prompt_template.format(
	var_name="df",
	df_info=df.head(5).to_string(index=False),
	user_question=question,
	)

	messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt},
	]
	text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

	with st.spinner("Generating response..."):
	generated_ids = model.generate(**model_inputs, max_new_tokens=512)
	generated_ids = [
	output_ids[len(input_ids) :]
	for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
	]
	response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

	# Display response
	st.write("### Model Response")
	st.text_area("Response", response, height=200)

	# Footer
	st.sidebar.info(
	"""
	This app demonstrates the use of a language model for tabular data understanding.
	Powered by [Hugging Face Transformers](https://huggingface.co/).
	"""
	)