# pyinstaller --onefile --distpath ./parse_xml --workpath ./parse_xml/build --specpath ./build parse_xml/Src/parse_xml.py # python -m nuitka --standalone --onefile --output-dir=./parse_xml parse_xml/Src/parse_xml.py import xml.etree.ElementTree as ET import xml.dom.minidom import sys import os if len(sys.argv) < 3: print("Usage: python parse_xml.exe [output.xml]") sys.exit(1) input_path = sys.argv[1] info_path = sys.argv[2] base_type_sizes = { "char": 2, "short": 2, "int": 2, "long": 4, "long long": 8, "float": 4, "double": 8, } if len(sys.argv) >= 4: output_path = sys.argv[3] else: input_dir = os.path.dirname(os.path.abspath(input_path)) output_path = os.path.join(input_dir, "simplified.xml") tree = ET.parse(input_path) root = tree.getroot() def extract_timestamp(info_path): with open(info_path, "r", encoding="utf-8") as f: for line in f: if "Time Stamp:" in line: parts = line.split("Time Stamp:") if len(parts) > 1: timestamp = parts[1].strip() return timestamp die_by_id = {die.attrib.get("id"): die for die in root.iter("die") if "id" in die.attrib} def get_attr(die, attr_type): for attr in die.findall("attribute"): type_elem = attr.find("type") if type_elem is not None and type_elem.text == attr_type: return attr.find("value") return None def get_die_size(die): """Вернуть размер DIE в байтах из атрибута DW_AT_byte_size или по ключевым словам имени типа.""" # Сначала пытаемся получить размер из DW_AT_byte_size for attr in die.findall("attribute"): type_elem = attr.find("type") if type_elem is not None and type_elem.text == "DW_AT_byte_size": const_elem = attr.find("value/const") if const_elem is not None: return int(const_elem.text, 0) # Если не нашли, пробуем определить размер по ключевым словам в имени типа name_elem = die.find("attribute[@name='DW_AT_name']/value/const") if name_elem is not None: type_name = name_elem.text.lower() for key, size in base_type_sizes.items(): if key in type_name: return size return None def resolve_type_die(type_id): """Получить DIE типа, разрешая typedef, const и volatile.""" visited = set() while type_id and type_id not in visited: visited.add(type_id) die = die_by_id.get(type_id) if die is None: return None tag = die.findtext("tag") if tag in ("DW_TAG_volatile_type", "DW_TAG_const_type", "DW_TAG_typedef", "DW_TAG_TI_far_type"): ref = get_attr(die, "DW_AT_type") if ref is not None and ref.find("ref") is not None: type_id = ref.find("ref").attrib.get("idref") else: return None else: return die return None # Словарь для простых базовых типов по тегам (пример) base_types_map = { "DW_TAG_base_type": lambda die: die.find("attribute[@type='DW_AT_name']/value/string").text if die.find("attribute[@type='DW_AT_name']/value/string") is not None else "unknown", "DW_TAG_structure_type": lambda die: "struct", "DW_TAG_union_type": lambda die: "union", "DW_TAG_pointer_type": lambda die: "pointer", "DW_TAG_array_type": lambda die: "array", } def get_type_name(type_id): die = resolve_type_die(type_id) if die is None: return "unknown" tag = die.findtext("tag") if tag == "DW_TAG_pointer_type": ref = get_attr(die, "DW_AT_type") if ref is not None and ref.find("ref") is not None: pointee_id = ref.find("ref").attrib.get("idref") name = get_type_name(pointee_id) return name + "*" if name != "unknown" else name else: return "void*" elif tag == "DW_TAG_base_type": name_attr = get_attr(die, "DW_AT_name") if name_attr is not None: return name_attr.findtext("string") else: return "base_type_unknown" elif tag == "DW_TAG_structure_type": name_attr = get_attr(die, "DW_AT_name") name = name_attr.findtext("string") if name_attr is not None else "anonymous_struct" return f"struct {name}" elif tag == "DW_TAG_union_type": name_attr = get_attr(die, "DW_AT_name") name = name_attr.findtext("string") if name_attr is not None else "anonymous_union" return f"union {name}" elif tag == "DW_TAG_array_type": ref = get_attr(die, "DW_AT_type") if ref is not None and ref.find("ref") is not None: element_type_id = ref.find("ref").attrib.get("idref") element_type_name = get_type_name(element_type_id) return f"{element_type_name}[]" else: return "array[]" # Добавляем поддержку enum elif tag == "DW_TAG_enumeration_type": name_attr = get_attr(die, "DW_AT_name") name = name_attr.findtext("string") if name_attr is not None else "anonymous_enum" return f"enum {name}" else: return "unknown" def parse_offset(offset_text): if offset_text and offset_text.startswith("DW_OP_plus_uconst "): return int(offset_text.split()[-1], 0) return 0 def get_base_type_die(array_die): """Спускаемся по цепочке DW_AT_type, пока не дойдем до не-массива (базового типа).""" current_die = array_die while True: ref = get_attr(current_die, "DW_AT_type") if ref is None or ref.find("ref") is None: break next_die = resolve_type_die(ref.find("ref").attrib.get("idref")) if next_die is None: break if next_die.findtext("tag") == "DW_TAG_array_type": current_die = next_die else: return next_die return current_die def get_array_dimensions(array_die): dims = [] # Итерируем по всем DIE с тегом DW_TAG_subrange_type, потомки текущего массива for child in array_die.findall("die"): if child.findtext("tag") != "DW_TAG_subrange_type": continue dim_size = None ub_attr = get_attr(child, "DW_AT_upper_bound") if ub_attr is not None: # Попробуем разные варианты получить значение upper_bound # 1) value/const val_const = ub_attr.find("const") if val_const is not None: try: dim_size = int(val_const.text, 0) + 1 #print(f"[DEBUG] Found DW_AT_upper_bound const: {val_const.text}, size={dim_size}") except Exception as e: a=1#print(f"[WARN] Error parsing upper_bound const: {e}") else: # 2) value/block (DW_OP_constu / DW_OP_plus_uconst, etc.) val_block = ub_attr.find("block") if val_block is not None: block_text = val_block.text # Можно попытаться парсить DWARF expr (например DW_OP_plus_uconst 7) if block_text and "DW_OP_plus_uconst" in block_text: try: parts = block_text.split() val = int(parts[-1], 0) dim_size = val + 1 #print(f"[DEBUG] Parsed upper_bound block: {val} + 1 = {dim_size}") except Exception as e: a=1#print(f"[WARN] Error parsing upper_bound block: {e}") else: a=1#print(f"[WARN] Unexpected DW_AT_upper_bound block content: {block_text}") else: a=1#print(f"[WARN] DW_AT_upper_bound has no const or block value") if dim_size is None: # fallback по DW_AT_count — редко встречается ct_attr = get_attr(child, "DW_AT_count") if ct_attr is not None: val_const = ct_attr.find("value/const") if val_const is not None: try: dim_size = int(val_const.text, 0) #print(f"[DEBUG] Found DW_AT_count: {dim_size}") except Exception as e: a=1#print(f"[WARN] Error parsing DW_AT_count const: {e}") if dim_size is None: print("[DEBUG] No dimension size found for this subrange, defaulting to 0") dim_size = 0 dims.append(dim_size) # Если не нашли измерений — пытаемся вычислить размер массива по общему размеру if not dims: arr_size = get_die_size(array_die) elem_size = None element_type_ref = get_attr(array_die, "DW_AT_type") if element_type_ref is not None and element_type_ref.find("ref") is not None: element_type_id = element_type_ref.find("ref").attrib.get("idref") elem_die = resolve_type_die(element_type_id) if elem_die is not None: elem_size = get_die_size(elem_die) #print(f"[DEBUG] Fallback: arr_size={arr_size}, elem_size={elem_size}") if arr_size is not None and elem_size: dim_calc = arr_size // elem_size dims.append(dim_calc) #print(f"[DEBUG] Calculated dimension size from total size: {dim_calc}") else: dims.append(0) print("[DEBUG] Could not calculate dimension size, set 0") # Рекурсивно обрабатываем вложенные массивы element_type_ref = get_attr(array_die, "DW_AT_type") if element_type_ref is not None and element_type_ref.find("ref") is not None: element_type_id = element_type_ref.find("ref").attrib.get("idref") element_type_die = resolve_type_die(element_type_id) if element_type_die is not None and element_type_die.findtext("tag") == "DW_TAG_array_type": dims.extend(get_array_dimensions(element_type_die)) #print(f"[DEBUG] Array dimensions: {dims}") return dims def handle_array_type(member_elem, resolved_type, offset=0): dims = get_array_dimensions(resolved_type) base_die = get_base_type_die(resolved_type) base_name = "unknown" base_size = None if base_die is not None: base_id = base_die.attrib.get("id") if base_id: base_name = get_type_name(base_id) base_size = get_die_size(base_die) else: base_name = get_type_name(base_die.attrib.get("id", "")) #print(f"[DEBUG] Base type name: {base_name}, base size: {base_size}") member_elem.set("type", base_name + "[]" * len(dims)) if base_size is None: base_size = 0 total_elements = 1 for d in dims: if d == 0: total_elements = 0 print(f"[WARN] Dimension size is zero, setting total elements to 0") break total_elements *= d total_size = total_elements * base_size if base_size is not None else 0 if total_size: member_elem.set("size", str(base_size if base_size is not None else 1)) else: arr_size = get_die_size(resolved_type) if arr_size: member_elem.set("size", str(arr_size)) #print(f"[DEBUG] Used fallback size from resolved_type: {arr_size}") else: print(f"[WARN] Could not determine total size for array") for i, dim in enumerate(dims, 1): member_elem.set(f"size{i}", str(dim)) #print(f"[DEBUG] Setting size{i} = {dim}") member_elem.set("kind", "array") if base_die is not None and base_die.findtext("tag") == "DW_TAG_structure_type": add_members_recursive(member_elem, base_die, offset) def add_members_recursive(parent_elem, struct_die, base_offset=0): is_union = struct_die.findtext("tag") == "DW_TAG_union_type" size = get_die_size(struct_die) if size is not None: parent_elem.set("size", hex(size)) for member in struct_die.findall("die"): if member.findtext("tag") != "DW_TAG_member": continue name_attr = get_attr(member, "DW_AT_name") offset_attr = get_attr(member, "DW_AT_data_member_location") type_attr = get_attr(member, "DW_AT_type") if name_attr is None or offset_attr is None or type_attr is None: continue name = name_attr.findtext("string") offset = parse_offset(offset_attr.findtext("block")) + base_offset type_id = type_attr.find("ref").attrib.get("idref") resolved_type = resolve_type_die(type_id) type_name = get_type_name(type_id) if type_name == "unknown": continue member_elem = ET.SubElement(parent_elem, "member", name=name, offset=hex(offset), type=type_name) if is_union: member_elem.set("kind", "union") if resolved_type is not None: tag = resolved_type.findtext("tag") if tag == "DW_TAG_array_type": handle_array_type(member_elem, resolved_type, offset) elif tag in ("DW_TAG_structure_type", "DW_TAG_union_type"): member_elem.set("type", type_name) add_members_recursive(member_elem, resolved_type, offset) elif tag == "DW_TAG_pointer_type": # Проверяем тип, на который указывает указатель pointee_ref = get_attr(resolved_type, "DW_AT_type") if pointee_ref is not None and pointee_ref.find("ref") is not None: pointee_id = pointee_ref.find("ref").attrib.get("idref") pointee_die = resolve_type_die(pointee_id) if pointee_die is not None: pointee_tag = pointee_die.findtext("tag") if pointee_tag in ("DW_TAG_structure_type", "DW_TAG_union_type"): # Добавляем подэлементы для структуры, на которую указывает указатель pointer_elem = ET.SubElement(member_elem, "pointee", type=get_type_name(pointee_id)) add_members_recursive(pointer_elem, pointee_die, 0) output_root = ET.Element("variables") for die in root.iter("die"): if die.findtext("tag") != "DW_TAG_variable": continue name_attr = get_attr(die, "DW_AT_name") addr_attr = get_attr(die, "DW_AT_location") type_attr = get_attr(die, "DW_AT_type") if name_attr is None or addr_attr is None or type_attr is None: continue name = name_attr.findtext("string") if "$" in name: continue addr_text = addr_attr.findtext("block") if not addr_text or not addr_text.startswith("DW_OP_addr "): continue addr = int(addr_text.split()[-1], 0) type_id = type_attr.find("ref").attrib.get("idref") resolved_type = resolve_type_die(type_id) type_name = get_type_name(type_id) if 0x800 <= addr < 0x8000 or type_name == "unknown": continue var_elem = ET.SubElement(output_root, "variable", name=name, address=hex(addr), type=type_name) if resolved_type is not None: tag = resolved_type.findtext("tag") if tag == "DW_TAG_array_type": handle_array_type(var_elem, resolved_type) elif tag in ("DW_TAG_structure_type", "DW_TAG_union_type"): add_members_recursive(var_elem, resolved_type) timestamp = extract_timestamp(info_path) timestamp_elem = ET.Element("timestamp") timestamp_elem.text = timestamp output_root.insert(0, timestamp_elem) rough_string = ET.tostring(output_root, encoding="utf-8") pretty_xml = xml.dom.minidom.parseString(rough_string).toprettyxml(indent=" ") with open(output_path, "w", encoding="utf-8") as f: f.write(pretty_xml) os.remove(input_path) os.remove(info_path) print(f"Simplified and formatted XML saved to: {output_path}")