Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	| import argparse | |
| import os | |
| #from utils import * | |
| from typing import Dict, List | |
| from tqdm import tqdm | |
| import re | |
| class ParseError(Exception): | |
| def __init__(self, msg): | |
| self.msg = msg | |
| HEADER = '#include "/home/ReSym/clang-parser/defs.hh"\n' | |
| def process_funname(raw_addr:str) -> str: | |
| # sub_401220 -> 401220 | |
| if raw_addr == 'main': | |
| return raw_addr | |
| match = re.search(r'^sub_([\w\d]+)$', raw_addr) | |
| if match: | |
| return match.group(1) | |
| else: | |
| return None | |
| def hex_to_decimal(hex_str : str) -> int: | |
| # Check if the input hex string is valid | |
| if not re.match(r'^-?[0-9a-fA-F]+$', hex_str): | |
| return None | |
| # Convert the hex string to decimal | |
| decimal_num = int(hex_str, 16) | |
| return decimal_num | |
| def extract_comments(fun_content:List[str]) -> List[Dict]: | |
| var_decl_pattern = r'^(.+?\s+\**)(\S+);\s+\/\/(.*)$' # <g1> <g2>; // <g3> | |
| rbp_offset_pattern = r'\[rbp(-[\d\w]+?)h\]' # [rbp-<g1>h] | |
| array_name_pattern = r'^(.*?)\[(\d+)\]$' # <g1>[<g2>] | |
| var_decl_info = [] | |
| for line in fun_content: | |
| match = re.match(var_decl_pattern, line.strip()) | |
| if match: | |
| var_type = match.group(1).strip() | |
| var_name = match.group(2).strip() | |
| comment = match.group(3).strip() | |
| # parse var_name (handle array) | |
| array_name_match = re.match(array_name_pattern, var_name) | |
| if array_name_match: | |
| var_name = array_name_match.group(1) | |
| array_size = int(array_name_match.group(2)) | |
| else: | |
| array_size = None | |
| # parse comment, get rbp offset | |
| rbp_offset = None | |
| rbp_offset_match = re.search(rbp_offset_pattern, comment) | |
| if rbp_offset_match: | |
| rbp_offset = rbp_offset_match.group(1) | |
| rbp_offset_dec = hex_to_decimal(rbp_offset) if rbp_offset is not None else None | |
| # handle * | |
| ptr_level = var_name.count("*") | |
| var_name = var_name.replace('*', "") | |
| var_decl_info.append({ | |
| 'name': var_name, | |
| 'type': var_type, | |
| 'comment': comment.strip().replace('"',"`").replace("'", '`'), | |
| 'array_size': array_size, | |
| 'ptr_level': ptr_level, | |
| 'rbp_offset_hex': rbp_offset, | |
| 'rbp_offset_dec': rbp_offset_dec, | |
| 'original_line': line.strip().replace('"',"`").replace("'", '`') | |
| }) | |
| return var_decl_info | |
| def parse_signature(file_content:List[str], funname:str=None) -> List[Dict]: | |
| arg_info = [] | |
| if not funname: | |
| pattern = r'((sub_[\d\w]+)|main)\((.*?)\)' # <g1> (<g2>) | |
| else: | |
| pattern = r'(({})|main)\((.*?)\)'.format(funname) # <g1> (<g2>) | |
| if isinstance(file_content, str): | |
| file_content = file_content.split('\n') | |
| found = False | |
| for l_index in range(3): | |
| line = file_content[l_index] | |
| match = re.search(pattern, line) | |
| if match: | |
| funname, arglist = match.group(1), match.group(3) | |
| found = True | |
| break | |
| if not found: | |
| raise ParseError('Fail to parse the signature.') | |
| if not arglist: | |
| return arg_info | |
| arg_pattern = r'^(.*?)(a\d+)$' # xxxx a1: <g1><g2> | |
| arg_pattern2 = r'^((struct\s|const\s)?\w+?\s+\*?)(\w+)$' # (struct/const )?xxx *?<g3> | |
| for arg in arglist.split(','): | |
| if arg.strip() == '...': | |
| arg_info.append({ | |
| 'name': arg.strip(), | |
| 'original_line': arg.strip() | |
| }) | |
| continue | |
| if arg.strip() == 'void': | |
| continue | |
| arg_match = re.match(arg_pattern, arg.strip()) | |
| if arg_match: | |
| argtype, argname = arg_match.group(1).strip(), arg_match.group(2) | |
| else: | |
| arg_match = re.match(arg_pattern2, arg.strip()) | |
| if arg_match: | |
| argtype, argname = arg_match.group(1).strip(), arg_match.group(3) | |
| else: | |
| raise ParseError(f'Cannot find the declaration of argument {arg.strip()}.') | |
| if argname in arg_info: | |
| raise ParseError(f'{argname} duplicate') | |
| arg_info.append({ | |
| 'name': argname, | |
| 'type': argtype, | |
| 'original_line': arg.strip() | |
| }) | |
| return arg_info | |