Spaces:

satyam007
/

invoice_extraction

Runtime error

App Files Files Community

invoice_extraction / app.py

satyam007

Update app.py

3cf7dad verified 9 months ago

raw

history blame contribute delete

3.68 kB

	import gradio as gr
	from transformers import AutoModelForConditionalGeneration, AutoProcessor
	import torch
	import pandas as pd
	import pytesseract
	import cv2

	# Set Tesseract command (only works if Tesseract is already installed on the hosting server)
	pytesseract.pytesseract_cmd = r'/usr/bin/tesseract'

	# Initialize the model and processor from Hugging Face Hub
	model_name = "Qwen/Qwen2-VL-2B-Instruct-AWQ"

	model = AutoModelForConditionalGeneration.from_pretrained(
	model_name,
	torch_dtype="auto"
	)
	model.to("cpu")

	processor = AutoProcessor.from_pretrained(model_name)

	# Preprocessing image for OCR
	def preprocess_image(image_path):
	image = cv2.imread(image_path)
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	_, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
	return binary

	# OCR-based text extraction
	def ocr_extract_text(image_path):
	preprocessed_image = preprocess_image(image_path)
	return pytesseract.image_to_string(preprocessed_image)

	# Model-based image processing
	def process_image(image_path):
	try:
	messages = [{
	"role": "user",
	"content": [
	{"type": "image", "image": image_path},
	{"type": "text", "text": (
	"Extract the following details from the invoice:\n"
	"- 'invoice_number'\n"
	"- 'date'\n"
	"- 'place'\n"
	"- 'amount' (monetary value in the relevant currency)\n"
	"- 'category' (based on the invoice type)"
	)}
	]
	}]

	text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	# Removed process_vision_info and used the processor directly
	inputs = processor(text=[text], padding=True, return_tensors="pt")
	inputs = inputs.to(model.device)

	generated_ids = model.generate(**inputs, max_new_tokens=128)
	output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)

	return parse_details(output_text[0])

	except Exception as e:
	print(f"Model failed, falling back to OCR: {e}")
	ocr_text = ocr_extract_text(image_path)
	return parse_details(ocr_text)

	# Parsing details from text
	def parse_details(details):
	parsed_data = {
	"Invoice Number": None,
	"Date": None,
	"Place": None,
	"Amount": None,
	"Category": None
	}

	lines = details.split("\n")
	for line in lines:
	lower_line = line.lower()
	if "invoice" in lower_line:
	parsed_data["Invoice Number"] = line.split(":")[-1].strip()
	elif "date" in lower_line:
	parsed_data["Date"] = line.split(":")[-1].strip()
	elif "place" in lower_line:
	parsed_data["Place"] = line.split(":")[-1].strip()
	elif any(keyword in lower_line for keyword in ["total", "amount", "cost"]):
	parsed_data["Amount"] = line.split(":")[-1].strip()
	else:
	parsed_data["Category"] = "General"

	return parsed_data

	# Gradio Interface
	def gradio_interface(image_files):
	results = []
	for image_file in image_files:
	details = process_image(image_file.name)
	results.append(details)

	df = pd.DataFrame(results)
	return df

	# Launch Gradio App
	grpc_interface = gr.Interface(
	fn=gradio_interface,
	inputs=gr.File(label="Upload Invoice Images", file_types=["image"]),
	outputs=gr.Dataframe(interactive=True),
	title="Invoice Extraction System"
	)

	if __name__ == "__main__":
	grpc_interface.launch(share=True)