Spaces:
Sleeping
Sleeping
| import re | |
| def check_integer(string): | |
| if string.isdigit(): | |
| return True | |
| for char in string: | |
| if char.isdigit(): | |
| return True | |
| return False | |
| def check_alpha(string): | |
| for char in string: | |
| if not ((char >= 'a' and char <= 'z') or (char >= 'A' and char <= 'Z') or char == ' '): | |
| return False | |
| return True | |
| def is_chinese_name(text): | |
| substrings = [text[:1], text[:2], text[:3], text[:4], text[:5], text[:6], text[:7], text[:8]] | |
| if len(text) > 40: | |
| return False | |
| for substring in substrings: | |
| upper_case_sum = 0 | |
| lower_case_sum = 0 | |
| space = 0 | |
| for char in substring: | |
| if char >= 'A' and char <= 'Z': | |
| upper_case_sum += 1 | |
| if char >= 'a' and char <= 'z': | |
| lower_case_sum += 1 | |
| if char == ' ': | |
| space += 1 | |
| if upper_case_sum >= 3 and lower_case_sum >= 2 and space >= 1: | |
| return True | |
| return False | |
| def seperate_name(text): | |
| word1 = "" | |
| word2 = "" | |
| word3 = "" | |
| name = text.replace(' ', '') | |
| # l = 0 | |
| # space = 0 | |
| # for char in text: | |
| # if char >= 'A' and char <= 'Z': | |
| # l += 1 | |
| # if char != ' ': | |
| # space += 1 | |
| # else: | |
| # word2 = text[l-1:space] | |
| # word3 = text[space+1::] | |
| # word1 = text[:l - 2] | |
| # # only two characters | |
| # if space == len(text): | |
| # word1 = text[:l-1] | |
| # word2 = text[l-1::] | |
| # name = word1 + ' ' + word2 | |
| # else: | |
| # name = word1 + ' ' + word2 + ' ' + word3 | |
| return name.lower() | |
| def validate_hkid(hkid): # omit parentheses | |
| hkid = hkid.replace('(', '').replace(')', '') | |
| weight = [9, 8, 7, 6, 5, 4, 3, 2, 1] | |
| values = list('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ') + [None] | |
| match = re.match('^([A-Z])?([A-Z])([0-9]{6})([0-9A])$', hkid) | |
| if not match: return False | |
| hkidArr = [] | |
| for g in match.groups(): | |
| hkidArr += list(g) if g else [g] | |
| r = sum([values.index(i) * w for i, w in zip(hkidArr, weight)]) % 11 | |
| return r == 0 | |
| def format_HKID(hkid): | |
| hkid = hkid.replace('(', '').replace(')', '') | |
| idlen = len(hkid) | |
| match = re.match('^([A-Z])?([A-Z])([0-9]{6})([0-9A])$', hkid) | |
| hkidArr = [] | |
| for g in match.groups(): | |
| hkidArr += list(g) if g else [g] | |
| formatted_hkid = '' | |
| index = 0 | |
| for char in hkidArr: | |
| if char != None: | |
| formatted_hkid += char | |
| if index == idlen - 1: | |
| formatted_hkid += '(' | |
| if index == idlen: | |
| formatted_hkid += ')' | |
| index += 1 | |
| return formatted_hkid | |
| def format_issuedate(issuedate): | |
| formatted_issuedate = issuedate.replace('(', '').replace(')', '') | |
| formatted_issuedate = formatted_issuedate.replace('C', '') | |
| return formatted_issuedate | |
| def is_string_integer(string): | |
| try: | |
| int(string) # Attempt to convert the string to an integer | |
| return True # If successful, the string only contains integers | |
| except ValueError: | |
| return False # If a ValueError occurs, the string doesn't only contain integers | |
| def check_issuedate(text): | |
| if len(text) < 5 and len(text) > 7 : | |
| return False | |
| if len(text) > 0 and text[0] == '(': | |
| text = text.replace('(', '') | |
| elif len(text) > 0 and text[0] == 'C': | |
| text = text.replace('C', '') | |
| if len(text) > 0 and text[-1] == ')': | |
| text = text.replace(')', '') | |
| if len(text) != 5: | |
| return False | |
| if text[2] != '-': | |
| return False | |
| text = text.replace('-', '') | |
| if not is_string_integer(text): | |
| return False | |
| return True | |
| def print_info(name, valid_hkid, hkid, issuedate): | |
| print(f'Name: {name}') | |
| print(f'HKID: {hkid} and validity: {valid_hkid}') | |
| print(f'Date of issue: {issuedate}') | |
| def is_comma_present(string): | |
| return ',' in string | |
| def longest_common_subsequence(s1, s2): | |
| m, n = len(s1), len(s2) | |
| # Create a 2D table to store the lengths of common subsequences | |
| dp = [[0] * (n + 1) for _ in range(m + 1)] | |
| # Build the table in a bottom-up manner | |
| for i in range(1, m + 1): | |
| for j in range(1, n + 1): | |
| if s1[i - 1] == s2[j - 1]: | |
| dp[i][j] = dp[i - 1][j - 1] + 1 | |
| else: | |
| dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]) | |
| # Retrieve the longest common subsequence | |
| lcs = [] | |
| i, j = m, n | |
| while i > 0 and j > 0: | |
| if s1[i - 1] == s2[j - 1]: | |
| lcs.append(s1[i - 1]) | |
| i -= 1 | |
| j -= 1 | |
| elif dp[i - 1][j] > dp[i][j - 1]: | |
| i -= 1 | |
| else: | |
| j -= 1 | |
| # Reverse the sequence to get the correct order | |
| lcs.reverse() | |
| return ''.join(lcs) | |
| def combine_info(info1, info2): | |
| combined_info = [] | |
| print(info1) | |
| print(info2) | |
| if info1[0] == info2[0]: | |
| combined_info.append(info1[0]) # Append the variable as-is if it's the same in both models | |
| elif info1[0] == '': | |
| combined_info.append(info2[0]) | |
| elif info2[0] == '': | |
| combined_info.append(info1[0]) | |
| else: | |
| subseq = longest_common_subsequence(info1[0], info2[0]) | |
| combined_info.append(subseq) | |
| if info1[1] == 'True' and info2[1] == 'False': | |
| combined_info.append(info1[1]) | |
| combined_info.append(info1[2]) | |
| elif info1[1] == 'False' and info2[1] == 'True': | |
| combined_info.append(info2[1]) | |
| combined_info.append(info2[2]) | |
| elif info1[1] == 'True' and info2[1] == 'True': | |
| if info1[2] == info2[2]: | |
| combined_info.append(info1[1]) | |
| combined_info.append(info1[2]) | |
| else: | |
| combined_info.append('False') | |
| combined_info.append('Suspicous HKID') | |
| if info1[3] == info2[3]: | |
| combined_info.append(info1[3]) | |
| else: | |
| combined_info.append('Unmatched issuedate') | |
| # print(combined_info) | |
| return combined_info | |
| # info1 = ['', 'True', 'Z683365(5)', '06-96'] | |
| # info2 = ['lok wing', 'False', 'Z68336505)', '06-96'] | |
| # info = combine_info(info1, info2) | |
| # print_info(*info) | |
| # text = 'TAMKing Man' | |
| # if is_comma_present(text): | |
| # text = text.replace(',', '') | |
| # if not check_integer(text): | |
| # if check_alpha(text) and is_chinese_name(text): | |
| # name = seperate_name(text) |