Spaces:

SiyunHE
/

glass_try_on1

Running

glass_try_on1 / app.py

Siyun He

make the face detection 3D and add comments

58a86d0 11 months ago

16.9 kB

	import cv2
	import cvzone
	import numpy as np
	import os
	import gradio as gr
	import mediapipe as mp
	from datetime import datetime

	# Load the YuNet model
	model_path = 'face_detection_yunet_2023mar.onnx'
	face_detector = cv2.FaceDetectorYN.create(model_path, "", (320, 320))

	# Initialize MediaPipe Face Mesh
	mp_face_mesh = mp.solutions.face_mesh
	face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)

	# Initialize the glass number
	num = 1
	overlay_bgr = cv2.imread(f'glasses/glass{num}.png', cv2.IMREAD_UNCHANGED)
	# Split the channels
	b, g, r, a = cv2.split(overlay_bgr)
	# Merge back in RGB order
	overlay_rgb = cv2.merge((r, g, b, a))
	# Use overlay_rgb in your process_frame function
	overlay = overlay_rgb

	# Count glasses files
	def count_files_in_directory(directory):
	file_count = 0
	for root, dirs, files in os.walk(directory):
	file_count += len(files)
	return file_count

	# Determine face shape
	def determine_face_shape_3d(landmarks):
	# Calculate 3D distances
	jaw_width = np.linalg.norm(landmarks[0] - landmarks[16])
	face_height = np.linalg.norm(landmarks[8] - landmarks[27])

	# Determine face shape based on 3D proportions
	if jaw_width / face_height > 1.5:
	return "Round"
	elif jaw_width / face_height < 1.2:
	return "Oval"
	else:
	return "Square"

	# Recommend glass shape based on face shape
	def recommend_glass_shape(face_shape):
	if face_shape == "Round":
	return "Square"
	elif face_shape == "Oval":
	return "Round"
	else:
	return "Square"

	directory_path = 'glasses'
	total_glass_num = count_files_in_directory(directory_path)

	# Change glasses
	def change_glasses():
	global num, overlay
	num += 1
	if num > total_glass_num:
	num = 1
	overlay_bgr = cv2.imread(f'glasses/glass{num}.png', cv2.IMREAD_UNCHANGED)
	b, g, r, a = cv2.split(overlay_bgr)
	overlay_rgb = cv2.merge((r, g, b, a))
	overlay = overlay_rgb
	return overlay

	def change_lip_color(frame, color_name='none'):
	# Define a mapping from color names to BGR values
	color_map = {
	'classic_red': (255, 0, 0), # Classic red
	'deep_red': (139, 0, 0), # Deep red
	'cherry_red': (205, 0, 0), # Cherry red
	'rose_red': (204, 102, 0), # Rose red
	'wine_red': (128, 0, 0), # Wine red
	'brick_red': (128, 64, 0), # Brick red
	'coral_red': (255, 128, 0), # Coral red
	'berry_red': (153, 0, 0), # Berry red
	'ruby_red': (255, 17, 0), # Ruby red
	'crimson_red': (220, 20, 60), # Crimson red
	}

	# Get the BGR color from the color name
	color = color_map.get(color_name, None)

	# If 'none' is selected, return the original frame
	if color is None:
	return frame

	# Convert to RGB for processing
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	results = face_mesh.process(frame_rgb)

	if results.multi_face_landmarks:
	for face_landmarks in results.multi_face_landmarks:
	# Define the region for the upper lip using landmark indices
	upper_lip_region = np.array([
	(face_landmarks.landmark[61].x * frame.shape[1], face_landmarks.landmark[61].y * frame.shape[0]),
	(face_landmarks.landmark[185].x * frame.shape[1], face_landmarks.landmark[185].y * frame.shape[0]),
	(face_landmarks.landmark[40].x * frame.shape[1], face_landmarks.landmark[40].y * frame.shape[0]),
	(face_landmarks.landmark[39].x * frame.shape[1], face_landmarks.landmark[39].y * frame.shape[0]),
	(face_landmarks.landmark[37].x * frame.shape[1], face_landmarks.landmark[37].y * frame.shape[0]),
	(face_landmarks.landmark[0].x * frame.shape[1], face_landmarks.landmark[0].y * frame.shape[0]),
	(face_landmarks.landmark[267].x * frame.shape[1], face_landmarks.landmark[267].y * frame.shape[0]),
	(face_landmarks.landmark[269].x * frame.shape[1], face_landmarks.landmark[269].y * frame.shape[0]),
	(face_landmarks.landmark[270].x * frame.shape[1], face_landmarks.landmark[270].y * frame.shape[0]),
	(face_landmarks.landmark[409].x * frame.shape[1], face_landmarks.landmark[409].y * frame.shape[0]),
	(face_landmarks.landmark[291].x * frame.shape[1], face_landmarks.landmark[291].y * frame.shape[0]),
	(face_landmarks.landmark[61].x * frame.shape[1], face_landmarks.landmark[61].y * frame.shape[0])
	], np.int32)

	# Define the region for the lower lip using landmark indices
	lower_lip_region = np.array([
	(face_landmarks.landmark[61].x * frame.shape[1], face_landmarks.landmark[61].y * frame.shape[0]),
	(face_landmarks.landmark[146].x * frame.shape[1], face_landmarks.landmark[146].y * frame.shape[0]),
	(face_landmarks.landmark[91].x * frame.shape[1], face_landmarks.landmark[91].y * frame.shape[0]),
	(face_landmarks.landmark[181].x * frame.shape[1], face_landmarks.landmark[181].y * frame.shape[0]),
	(face_landmarks.landmark[84].x * frame.shape[1], face_landmarks.landmark[84].y * frame.shape[0]),
	(face_landmarks.landmark[17].x * frame.shape[1], face_landmarks.landmark[17].y * frame.shape[0]),
	(face_landmarks.landmark[314].x * frame.shape[1], face_landmarks.landmark[314].y * frame.shape[0]),
	(face_landmarks.landmark[405].x * frame.shape[1], face_landmarks.landmark[405].y * frame.shape[0]),
	(face_landmarks.landmark[321].x * frame.shape[1], face_landmarks.landmark[321].y * frame.shape[0]),
	(face_landmarks.landmark[375].x * frame.shape[1], face_landmarks.landmark[375].y * frame.shape[0]),
	(face_landmarks.landmark[291].x * frame.shape[1], face_landmarks.landmark[291].y * frame.shape[0]),
	(face_landmarks.landmark[61].x * frame.shape[1], face_landmarks.landmark[61].y * frame.shape[0])
	], np.int32)

	lip_region = np.concatenate((upper_lip_region, lower_lip_region), axis=0)

	# Define the region for the teeth using landmark indices
	teeth_region = np.array([
	(face_landmarks.landmark[78].x * frame.shape[1], face_landmarks.landmark[78].y * frame.shape[0]),
	(face_landmarks.landmark[95].x * frame.shape[1], face_landmarks.landmark[95].y * frame.shape[0]),
	(face_landmarks.landmark[88].x * frame.shape[1], face_landmarks.landmark[88].y * frame.shape[0]),
	(face_landmarks.landmark[178].x * frame.shape[1], face_landmarks.landmark[178].y * frame.shape[0]),
	(face_landmarks.landmark[87].x * frame.shape[1], face_landmarks.landmark[87].y * frame.shape[0]),
	(face_landmarks.landmark[14].x * frame.shape[1], face_landmarks.landmark[14].y * frame.shape[0]),
	(face_landmarks.landmark[317].x * frame.shape[1], face_landmarks.landmark[317].y * frame.shape[0]),
	(face_landmarks.landmark[402].x * frame.shape[1], face_landmarks.landmark[402].y * frame.shape[0]),
	(face_landmarks.landmark[318].x * frame.shape[1], face_landmarks.landmark[318].y * frame.shape[0]),
	(face_landmarks.landmark[324].x * frame.shape[1], face_landmarks.landmark[324].y * frame.shape[0]),
	(face_landmarks.landmark[308].x * frame.shape[1], face_landmarks.landmark[308].y * frame.shape[0]),
	(face_landmarks.landmark[78].x * frame.shape[1], face_landmarks.landmark[78].y * frame.shape[0])
	], np.int32)

	# Create a mask for the lip region
	lip_mask = np.zeros(frame.shape[:2], dtype=np.uint8)
	cv2.fillPoly(lip_mask, [lip_region], 255)

	# Create a mask for the teeth region
	teeth_mask = np.zeros(frame.shape[:2], dtype=np.uint8)
	cv2.fillPoly(teeth_mask, [teeth_region], 255)

	# Subtract the teeth mask from the lip mask
	final_mask = cv2.subtract(lip_mask, teeth_mask)

	# Create a colored lip image
	colored_lips = np.zeros_like(frame)
	colored_lips[:] = color

	# Apply the colored lips only to the lip region
	lips_colored = cv2.bitwise_and(colored_lips, colored_lips, mask=final_mask)

	# Combine the original frame with the colored lips
	frame = cv2.bitwise_and(frame, frame, mask=cv2.bitwise_not(final_mask))
	frame = cv2.add(frame, lips_colored)

	return frame

	# Process frame for overlay and face shape detection
	def process_frame_3d(frame):
	global overlay

	frame = np.array(frame, copy=True)
	height, width = frame.shape[:2]

	face_detector.setInputSize((width, height))
	_, faces = face_detector.detect(frame)

	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	results = face_mesh.process(frame_rgb)

	face_shape = "Unknown"
	glass_shape = "Unknown"
	if faces is not None and results.multi_face_landmarks:
	for face in faces:
	x, y, w, h = face[:4].astype(int)
	face_landmarks = face[4:14].reshape(5, 2).astype(int)

	left_eye_x, left_eye_y = face_landmarks[0].astype(int)
	right_eye_x, right_eye_y = face_landmarks[1].astype(int)

	eye_center_x = (left_eye_x + right_eye_x) // 2
	eye_center_y = (left_eye_y + right_eye_y) // 2

	delta_x = right_eye_x - left_eye_x
	delta_y = right_eye_y - left_eye_y
	angle = np.degrees(np.arctan2(delta_y, delta_x))
	angle = -angle

	overlay_resize = cv2.resize(overlay, (int(w * 1.15), int(h * 0.8)))
	overlay_center = (overlay_resize.shape[1] // 2, overlay_resize.shape[0] // 2)
	rotation_matrix = cv2.getRotationMatrix2D(overlay_center, angle, 1.0)
	overlay_rotated = cv2.warpAffine(
	overlay_resize, rotation_matrix,
	(overlay_resize.shape[1], overlay_resize.shape[0]),
	flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0, 0)
	)

	overlay_x = eye_center_x - overlay_rotated.shape[1] // 2
	overlay_y = eye_center_y - overlay_rotated.shape[0] // 2

	try:
	frame = cvzone.overlayPNG(frame, overlay_rotated, [overlay_x, overlay_y])
	except Exception as e:
	print(f"Error overlaying glasses: {e}")

	for face_landmarks_mp in results.multi_face_landmarks:
	# Convert landmarks to 3D coordinates
	landmarks = np.array([(lm.x * frame.shape[1], lm.y * frame.shape[0], lm.z * frame.shape[1]) for lm in face_landmarks_mp.landmark])
	face_shape = determine_face_shape_3d(landmarks)
	glass_shape = recommend_glass_shape(face_shape)

	return frame, face_shape, glass_shape

	# Transform function
	def transform_cv2(frame, transform):
	if transform == "cartoon":
	# prepare color
	img_color = cv2.pyrDown(cv2.pyrDown(frame)) # Reduce the resolution
	for _ in range(6):
	img_color = cv2.bilateralFilter(img_color, 9, 9, 7) # Smoothen the image while preserving the edges
	img_color = cv2.pyrUp(cv2.pyrUp(img_color)) # Scale back to the original resolution

	# prepare edges
	img_edges = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) # Convert to grayscale
	img_edges = cv2.adaptiveThreshold(
	cv2.medianBlur(img_edges, 7),
	255,
	cv2.ADAPTIVE_THRESH_MEAN_C,
	cv2.THRESH_BINARY,
	9,
	2,
	) # Apply adaptive thresholding to get the edges
	img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB) # Convert back to color
	# combine color and edges
	img = cv2.bitwise_and(img_color, img_edges)
	return img # Combine the color and edges

	elif transform == "edges":
	# perform edge detection
	img = cv2.cvtColor(cv2.Canny(frame, 100, 200), cv2.COLOR_GRAY2BGR)
	return img

	elif transform == "sepia":
	# apply sepia effect
	kernel = np.array([[0.272, 0.534, 0.131],
	[0.349, 0.686, 0.168],
	[0.393, 0.769, 0.189]])
	img = cv2.transform(frame, kernel)
	img = np.clip(img, 0, 255) # ensure values are within byte range
	img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	return img_rgb

	elif transform == "negative":
	# apply negative effect
	img = cv2.bitwise_not(frame)
	return img

	elif transform == "sketch":
	# apply sketch effect
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	inv_gray = cv2.bitwise_not(gray)
	blur = cv2.GaussianBlur(inv_gray, (21, 21), 0)
	inv_blur = cv2.bitwise_not(blur)
	img = cv2.divide(gray, inv_blur, scale=256.0)
	img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
	return img

	elif transform == "blur":
	# apply blur effect
	img = cv2.GaussianBlur(frame, (15, 15), 0)
	return img

	else:
	return frame

	def refresh_interface():
	# Reset the image to an empty state or a default image
	input_img.update(value=None)
	# Return a message indicating the interface has been refreshed
	return "Interface refreshed!"

	def save_frame(frame):
	# Convert frame to RGB
	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	# Create a unique filename using the current timestamp
	filename = f"saved_frame_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
	# Save the frame to a temporary file
	cv2.imwrite(filename, frame)
	# # Refresh the interface
	# refresh_interface()
	return filename

	def webcam_input(frame, transform, lip_color):
	frame, face_shape, glass_shape = process_frame_3d(frame)
	if transform != "none" and lip_color == "none":
	frame = transform_cv2(frame, transform)
	elif lip_color != "none" and transform == "none":
	frame = change_lip_color(frame, lip_color)
	return frame, face_shape, glass_shape


	# Gradio interface
	with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue")) as demo:
	gr.Markdown("<h1 style='text-align: center; font-weight: bold;'>🤓 Glasses Virtual Try-On 🕶️👓</h1>")
	with gr.Column(elem_classes=["my-column"]):
	with gr.Group(elem_classes=["my-group"]):
	gr.Markdown("<p style='text-align: left; color: purple;'>🟣You can only apply one filter at a time, either the transform filter or the lip color filter.</p>")
	# Two filters: transform and lip color
	with gr.Row():
	transform = gr.Dropdown(
	choices=["cartoon", "edges", "sepia", "negative", "sketch", "blur", "none"],
	value="none", label="Select Filter"
	)

	lip_color = gr.Dropdown(
	choices=["classic_red", "deep_red", "cherry_red", "rose_red", "wine_red", "brick_red", "coral_red", "berry_red", "ruby_red", "crimson_red", "none"],
	value="none", label="Select Lip Color"
	)
	gr.Markdown("<p style='text-align: left; font-weight: bold; color: purple;'>🟣Click the Webcam icon to start the camera, and then press the record button to start the virtual try-on. If the glasses overlay isn’t showing, try moving further away from the camera.</p>")
	input_img = gr.Image(sources=["webcam"], type="numpy", streaming=True)
	next_button = gr.Button("Next Glasses➡️")
	gr.Markdown("<p style='text-align: left; color: purple;'>🟣Face Shape and Recommended Glass Shape</p>")

	# Face shape and recommended glass shape
	with gr.Row():
	face_shape_output = gr.Textbox(label="Detected Face Shape")
	glass_shape_output = gr.Textbox(label="Recommended Glass Shape")

	save_button = gr.Button("Save as a Picture📌")

	gr.Markdown("<p style='text-align: left; color: red;'>‼️Warning: Refresh the page after saving the picture to use the virtual try-on again.</p>")

	download_link = gr.File(label="Download Saved Picture")

	input_img.stream(webcam_input, [input_img, transform, lip_color], [input_img, face_shape_output, glass_shape_output], stream_every=0.1)

	with gr.Row():
	next_button.click(change_glasses, [], [])
	with gr.Row():
	save_button.click(save_frame, [input_img], [download_link])
	gr.Markdown("Reminder: All glasses images are screenshots from Goodr, segmented using glass_segmentation_helper.py, and then manually saved to the “glasses” folder for the try-on feature.")


	if __name__ == "__main__":
	demo.launch(share=True)