Spaces:
Runtime error
Runtime error
Update utility/utils.py
Browse files- utility/utils.py +23 -1
utility/utils.py
CHANGED
|
@@ -400,9 +400,25 @@ def process_extracted_text(extracted_text):
|
|
| 400 |
|
| 401 |
return combined_results_json
|
| 402 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
# Process the model output for parsed result
|
| 404 |
def process_resume_data(LLMdata,cont_data,extracted_text):
|
| 405 |
|
|
|
|
|
|
|
| 406 |
# Removing duplicate emails
|
| 407 |
unique_emails = []
|
| 408 |
for email in cont_data['emails']:
|
|
@@ -421,7 +437,13 @@ def process_resume_data(LLMdata,cont_data,extracted_text):
|
|
| 421 |
for num in cont_data['phone_numbers']:
|
| 422 |
if num[-10:] not in normalized_contact:
|
| 423 |
unique_numbers.append(num)
|
| 424 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
# Add unique emails, links, and phone numbers to the original LLMdata
|
| 426 |
LLMdata['Email'] += unique_emails
|
| 427 |
LLMdata['Link'] += unique_links
|
|
|
|
| 400 |
|
| 401 |
return combined_results_json
|
| 402 |
|
| 403 |
+
# Function to remove duplicates (case-insensitive) from each list in the dictionary
|
| 404 |
+
def remove_duplicates_case_insensitive(data_dict):
|
| 405 |
+
for key, value_list in data_dict.items():
|
| 406 |
+
seen = set()
|
| 407 |
+
unique_list = []
|
| 408 |
+
|
| 409 |
+
for item in value_list:
|
| 410 |
+
if item.lower() not in seen:
|
| 411 |
+
unique_list.append(item) # Add original item (preserving its case)
|
| 412 |
+
seen.add(item.lower()) # Track lowercase version
|
| 413 |
+
|
| 414 |
+
# Update the dictionary with unique values
|
| 415 |
+
data_dict[key] = unique_list
|
| 416 |
+
|
| 417 |
# Process the model output for parsed result
|
| 418 |
def process_resume_data(LLMdata,cont_data,extracted_text):
|
| 419 |
|
| 420 |
+
# Apply the function to the data
|
| 421 |
+
LLMdata=remove_duplicates_case_insensitive(LLMdata)
|
| 422 |
# Removing duplicate emails
|
| 423 |
unique_emails = []
|
| 424 |
for email in cont_data['emails']:
|
|
|
|
| 437 |
for num in cont_data['phone_numbers']:
|
| 438 |
if num[-10:] not in normalized_contact:
|
| 439 |
unique_numbers.append(num)
|
| 440 |
+
|
| 441 |
+
# Removing duplicate Compani
|
| 442 |
+
unique_emails = []
|
| 443 |
+
for email in cont_data['emails']:
|
| 444 |
+
if email.lower() not in LLMdata['Email'].lower():
|
| 445 |
+
unique_emails.append(email)
|
| 446 |
+
|
| 447 |
# Add unique emails, links, and phone numbers to the original LLMdata
|
| 448 |
LLMdata['Email'] += unique_emails
|
| 449 |
LLMdata['Link'] += unique_links
|