Spaces:

Mqleet
/

AutoPage

Running

App Files Files Community

AutoPage / utils /critic_utils.py

Mqleet

upd code

fcaa164 29 days ago

raw

history blame

5.45 kB

	from PIL import Image
	import io
	import json

	def crop_image(image, x:float, y:float, width:float, height:float):
	"""Crop the image based on the normalized coordinates.
	Return the cropped image.
	This has the effect of zooming in on the image crop.

	Args:
	image (PIL.Image.Image): the input image
	x (float): the horizontal coordinate of the upper-left corner of the box
	y (float): the vertical coordinate of that corner
	width (float): the box width
	height (float): the box height

	Returns:
	cropped_img (PIL.Image.Image): the cropped image

	Example:
	image = Image.open("sample_img.jpg")
	cropped_img = crop_image(image, 0.2, 0.3, 0.5, 0.4)
	display(cropped_img)
	"""

	# get height and width of image
	w, h = image.size

	# limit the range of x and y
	x = min(max(0, x), 1)
	y = min(max(0, y), 1)
	x2 = min(max(0, x+width), 1)
	y2 = min(max(0, y+height), 1)

	cropped_img = image.crop((xw, yh, x2w, y2h))

	buffer = io.BytesIO()
	cropped_img.save(buffer, format="JPEG")
	buffer.seek(0) # Reset buffer position

	# Load as a JpegImageFile
	jpeg_image = Image.open(buffer)
	return jpeg_image


	def zoom_in_image_by_bbox(image, box, padding=0.01):
	"""A simple wrapper function to crop the image based on the bounding box.
	The zoom factor cannot be too small. Minimum is 0.1

	Args:
	image (PIL.Image.Image): the input image
	box (List[float]): the bounding box in the format of [x, y, w, h]
	padding (float, optional): The padding for the image crop, outside of the bounding box. Defaults to 0.05.

	Returns:
	cropped_img (PIL.Image.Image): the cropped image

	Example:
	image = Image.open("sample_img.jpg")
	annotated_img, boxes = detection(image, "bus")
	cropped_img = zoom_in_image_by_bbox(image, boxes[0], padding=0.1)
	display(cropped_img)
	"""
	assert padding >= 0.01, "The padding should be at least 0.01"
	x, y, w, h = box
	x, y, w, h = x-padding, y-padding, w+2padding, h+2padding
	return crop_image(image, x, y, w, h)


	def parse_inch_string(inch_str: str) -> float:
	"""
	Convert a string like '12.0 Inches' into a float (12.0).
	"""
	return float(inch_str.replace(" Inches", "").strip())

	def convert_pptx_bboxes_to_image_space(bbox_dict, slide_width_in, slide_height_in):
	"""
	Convert each PPTX bounding box (in inches) to normalized image coords.

	bbox_dict format example:
	{
	'TitleAndAuthor': {
	'left': '12.0 Inches', 'top': '1.0 Inches',
	'width': '24.0 Inches', 'height': '2.0 Inches'
	},
	...
	}

	Returns a dictionary with the same keys, but values as [x_norm, y_norm, w_norm, h_norm].
	"""
	result = {}
	for label, box in bbox_dict.items():
	left_in = parse_inch_string(box['left'])
	top_in = parse_inch_string(box['top'])
	width_in = parse_inch_string(box['width'])
	height_in = parse_inch_string(box['height'])

	x_norm = left_in / slide_width_in
	y_norm = top_in / slide_height_in
	w_norm = width_in / slide_width_in
	h_norm = height_in / slide_height_in

	result[label] = [x_norm, y_norm, w_norm, h_norm]
	return result

	def convert_pptx_bboxes_json_to_image_json(bbox_json_str, slide_width_in, slide_height_in):
	"""
	Convert bounding boxes (in inches) from a JSON string to normalized image coords [0..1].

	Args:
	bbox_json_str (str): JSON text of the bounding box dictionary you provided.
	Example of the structure (in JSON):
	{
	"TitleAndAuthor": {
	"left": "12.0 Inches",
	"top": "1.0 Inches",
	"width": "24.0 Inches",
	"height": "2.0 Inches"
	},
	"Abstract-Section Title": { ... },
	...
	}
	slide_width_in (float): The total slide width in inches.
	slide_height_in (float): The total slide height in inches.

	Returns:
	str: A JSON string, where each key maps to [x_norm, y_norm, w_norm, h_norm].
	"""

	def parse_inch_string(inch_str: str) -> float:
	"""Helper to parse '12.0 Inches' -> 12.0 (float)."""
	return float(inch_str.replace(" Inches", "").strip())

	# 1) Parse the incoming JSON string to a Python dict
	if type(bbox_json_str) == str:
	bbox_dict = json.loads(bbox_json_str)
	else:
	bbox_dict = bbox_json_str

	# 2) Convert each bounding box to normalized coordinates [x, y, w, h]
	normalized_bboxes = {}
	for label, box in bbox_dict.items():
	left_in = parse_inch_string(box['left'])
	top_in = parse_inch_string(box['top'])
	width_in = parse_inch_string(box['width'])
	height_in = parse_inch_string(box['height'])

	x_norm = left_in / slide_width_in
	y_norm = top_in / slide_height_in
	w_norm = width_in / slide_width_in
	h_norm = height_in / slide_height_in

	normalized_bboxes[label] = [x_norm, y_norm, w_norm, h_norm]

	# 3) Return as a JSON string
	return normalized_bboxes