Spaces:
Runtime error
Runtime error
Update utility/utils.py
Browse files- utility/utils.py +34 -36
utility/utils.py
CHANGED
|
@@ -32,15 +32,16 @@ def draw_boxes(image, bounds, color='red', width=2):
|
|
| 32 |
draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
|
| 33 |
return image
|
| 34 |
|
| 35 |
-
#Image Quality upscaling
|
| 36 |
# Load image using OpenCV
|
| 37 |
def load_image(image_path):
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
# Function for upscaling image using OpenCV's INTER_CUBIC
|
| 41 |
def upscale_image(image, scale=2):
|
| 42 |
height, width = image.shape[:2]
|
| 43 |
-
# Simple upscaling using cubic interpolation
|
| 44 |
upscaled_image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC)
|
| 45 |
return upscaled_image
|
| 46 |
|
|
@@ -58,11 +59,9 @@ def sharpen_image(image):
|
|
| 58 |
|
| 59 |
# Function to increase contrast and enhance details without changing color
|
| 60 |
def enhance_image(image):
|
| 61 |
-
# Convert from BGR to RGB for PIL processing, then back to BGR
|
| 62 |
pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
| 63 |
enhancer = ImageEnhance.Contrast(pil_img)
|
| 64 |
enhanced_image = enhancer.enhance(1.5)
|
| 65 |
-
# Convert back to BGR
|
| 66 |
enhanced_image_bgr = cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR)
|
| 67 |
return enhanced_image_bgr
|
| 68 |
|
|
@@ -70,59 +69,58 @@ def enhance_image(image):
|
|
| 70 |
def process_image(image_path, scale=2):
|
| 71 |
# Load the image
|
| 72 |
image = load_image(image_path)
|
| 73 |
-
|
| 74 |
# Upscale the image
|
| 75 |
upscaled_image = upscale_image(image, scale)
|
| 76 |
-
|
| 77 |
# Reduce noise
|
| 78 |
denoised_image = reduce_noise(upscaled_image)
|
| 79 |
-
|
| 80 |
# Sharpen the image
|
| 81 |
sharpened_image = sharpen_image(denoised_image)
|
| 82 |
-
|
| 83 |
# Enhance the image contrast and details without changing color
|
| 84 |
final_image = enhance_image(sharpened_image)
|
| 85 |
-
|
| 86 |
-
return final_image
|
| 87 |
|
|
|
|
| 88 |
|
| 89 |
def ocr_with_paddle(img):
|
| 90 |
finaltext = ''
|
| 91 |
model_dir = os.getenv('PADDLEOCR_MODEL_DIR', '/tmp/.paddleocr')
|
| 92 |
ocr = PaddleOCR(lang='en', use_angle_cls=True, det_model_dir=model_dir)
|
| 93 |
-
# img_path = 'exp.jpeg'
|
| 94 |
result = ocr.ocr(img)
|
| 95 |
-
|
| 96 |
for i in range(len(result[0])):
|
| 97 |
text = result[0][i][1][0]
|
| 98 |
-
finaltext += ' '+ text
|
| 99 |
return finaltext
|
| 100 |
|
| 101 |
def extract_text_from_images(image_paths, RESULT_FOLDER):
|
| 102 |
all_extracted_texts = {}
|
| 103 |
-
all_extracted_imgs={}
|
| 104 |
for image_path in image_paths:
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
# Draw boxes on the processed image
|
| 109 |
-
img_result = Image.fromarray(enhanced_image)
|
| 110 |
-
#draw_boxes(img_result, bounds)
|
| 111 |
-
|
| 112 |
-
result_image_path = os.path.join(RESULT_FOLDER, f'result_{os.path.basename(image_path)}')
|
| 113 |
-
img_result.save(result_image_path) # Save the processed image
|
| 114 |
-
|
| 115 |
-
# Perform OCR on the enhanced image
|
| 116 |
-
result=ocr_with_paddle(enhanced_image)
|
| 117 |
-
# results = reader.readtext(enhanced_image)
|
| 118 |
-
# extracted_text = " ".join([res[1] for res in results])
|
| 119 |
-
|
| 120 |
-
all_extracted_texts[image_path] =result
|
| 121 |
-
all_extracted_imgs[image_path] = result_image_path
|
| 122 |
-
# Convert to JSON-compatible structure
|
| 123 |
-
all_extracted_imgs_json = {str(k): str(v) for k, v in all_extracted_imgs.items()}
|
| 124 |
-
return all_extracted_texts,all_extracted_imgs_json
|
| 125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
# Function to call the Gemma model and process the output as Json
|
| 128 |
def Data_Extractor(data, client=client):
|
|
|
|
| 32 |
draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
|
| 33 |
return image
|
| 34 |
|
|
|
|
| 35 |
# Load image using OpenCV
|
| 36 |
def load_image(image_path):
|
| 37 |
+
image = cv2.imread(image_path)
|
| 38 |
+
if image is None:
|
| 39 |
+
raise ValueError(f"Could not load image from {image_path}. It may be corrupted or the path is incorrect.")
|
| 40 |
+
return image
|
| 41 |
|
| 42 |
+
# Function for upscaling image using OpenCV's INTER_CUBIC
|
| 43 |
def upscale_image(image, scale=2):
|
| 44 |
height, width = image.shape[:2]
|
|
|
|
| 45 |
upscaled_image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC)
|
| 46 |
return upscaled_image
|
| 47 |
|
|
|
|
| 59 |
|
| 60 |
# Function to increase contrast and enhance details without changing color
|
| 61 |
def enhance_image(image):
|
|
|
|
| 62 |
pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
| 63 |
enhancer = ImageEnhance.Contrast(pil_img)
|
| 64 |
enhanced_image = enhancer.enhance(1.5)
|
|
|
|
| 65 |
enhanced_image_bgr = cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR)
|
| 66 |
return enhanced_image_bgr
|
| 67 |
|
|
|
|
| 69 |
def process_image(image_path, scale=2):
|
| 70 |
# Load the image
|
| 71 |
image = load_image(image_path)
|
| 72 |
+
|
| 73 |
# Upscale the image
|
| 74 |
upscaled_image = upscale_image(image, scale)
|
| 75 |
+
|
| 76 |
# Reduce noise
|
| 77 |
denoised_image = reduce_noise(upscaled_image)
|
| 78 |
+
|
| 79 |
# Sharpen the image
|
| 80 |
sharpened_image = sharpen_image(denoised_image)
|
| 81 |
+
|
| 82 |
# Enhance the image contrast and details without changing color
|
| 83 |
final_image = enhance_image(sharpened_image)
|
|
|
|
|
|
|
| 84 |
|
| 85 |
+
return final_image
|
| 86 |
|
| 87 |
def ocr_with_paddle(img):
|
| 88 |
finaltext = ''
|
| 89 |
model_dir = os.getenv('PADDLEOCR_MODEL_DIR', '/tmp/.paddleocr')
|
| 90 |
ocr = PaddleOCR(lang='en', use_angle_cls=True, det_model_dir=model_dir)
|
|
|
|
| 91 |
result = ocr.ocr(img)
|
| 92 |
+
|
| 93 |
for i in range(len(result[0])):
|
| 94 |
text = result[0][i][1][0]
|
| 95 |
+
finaltext += ' ' + text
|
| 96 |
return finaltext
|
| 97 |
|
| 98 |
def extract_text_from_images(image_paths, RESULT_FOLDER):
|
| 99 |
all_extracted_texts = {}
|
| 100 |
+
all_extracted_imgs = {}
|
| 101 |
for image_path in image_paths:
|
| 102 |
+
try:
|
| 103 |
+
# Enhance the image before OCR
|
| 104 |
+
enhanced_image = process_image(image_path, scale=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
+
# Draw boxes on the processed image (optional, requires bounds)
|
| 107 |
+
img_result = Image.fromarray(enhanced_image)
|
| 108 |
+
|
| 109 |
+
result_image_path = os.path.join(RESULT_FOLDER, f'result_{os.path.basename(image_path)}')
|
| 110 |
+
img_result.save(result_image_path) # Save the processed image
|
| 111 |
+
|
| 112 |
+
# Perform OCR on the enhanced image
|
| 113 |
+
result = ocr_with_paddle(enhanced_image)
|
| 114 |
+
|
| 115 |
+
all_extracted_texts[image_path] = result
|
| 116 |
+
all_extracted_imgs[image_path] = result_image_path
|
| 117 |
+
except ValueError as ve:
|
| 118 |
+
print(f"Error processing image {image_path}: {ve}")
|
| 119 |
+
continue # Continue to the next image if there's an error
|
| 120 |
+
|
| 121 |
+
# Convert to JSON-compatible structure
|
| 122 |
+
all_extracted_imgs_json = {str(k): str(v) for k, v in all_extracted_imgs.items()}
|
| 123 |
+
return all_extracted_texts, all_extracted_imgs_json
|
| 124 |
|
| 125 |
# Function to call the Gemma model and process the output as Json
|
| 126 |
def Data_Extractor(data, client=client):
|