diff --git a/.gitignore b/.gitignore index e323536..8bf8914 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ /build_temp /DebugVarEdit_GUI.build /DebugVarEdit_GUI.dist -/DebugVarEdit_GUI.onefile-build \ No newline at end of file +/DebugVarEdit_GUI.onefile-build +/parse_xml/build/ diff --git a/parse_xml/Src/parse_xml.py b/parse_xml/Src/parse_xml.py new file mode 100644 index 0000000..a6b9be0 --- /dev/null +++ b/parse_xml/Src/parse_xml.py @@ -0,0 +1,352 @@ +# pyinstaller --onefile --distpath . --workpath ./build --specpath ./build parse_xml.py +# python -m nuitka --standalone --onefile --output-dir=. --output-dir=./build parse_xml.py +import xml.etree.ElementTree as ET +import xml.dom.minidom +import sys +import os + +if len(sys.argv) < 3: + print("Usage: python simplify_dwarf.py [output.xml]") + sys.exit(1) + +input_path = sys.argv[1] +info_path = sys.argv[2] + +if len(sys.argv) >= 4: + output_path = sys.argv[3] +else: + input_dir = os.path.dirname(os.path.abspath(input_path)) + output_path = os.path.join(input_dir, "simplified.xml") + +tree = ET.parse(input_path) +root = tree.getroot() + +def extract_timestamp(info_path): + with open(info_path, "r", encoding="utf-8") as f: + for line in f: + if "Time Stamp:" in line: + parts = line.split("Time Stamp:") + if len(parts) > 1: + timestamp = parts[1].strip() + return timestamp + + + +die_by_id = {die.attrib.get("id"): die for die in root.iter("die") if "id" in die.attrib} + +def get_attr(die, attr_type): + for attr in die.findall("attribute"): + type_elem = attr.find("type") + if type_elem is not None and type_elem.text == attr_type: + return attr.find("value") + return None + +def get_die_size(die): + """Вернуть размер DIE в байтах из атрибута DW_AT_byte_size.""" + for attr in die.findall("attribute"): + type_elem = attr.find("type") + if type_elem is not None and type_elem.text == "DW_AT_byte_size": + const_elem = attr.find("value/const") + if const_elem is not None: + return int(const_elem.text, 0) + return None + +def resolve_type_die(type_id): + """Получить DIE типа, разрешая typedef, const и volatile.""" + visited = set() + while type_id and type_id not in visited: + visited.add(type_id) + die = die_by_id.get(type_id) + if die is None: + return None + tag = die.findtext("tag") + if tag in ("DW_TAG_volatile_type", "DW_TAG_const_type", "DW_TAG_typedef", "DW_TAG_TI_far_type"): + ref = get_attr(die, "DW_AT_type") + if ref is not None and ref.find("ref") is not None: + type_id = ref.find("ref").attrib.get("idref") + else: + return None + else: + return die + return None + +# Словарь для простых базовых типов по тегам (пример) +base_types_map = { + "DW_TAG_base_type": lambda die: die.find("attribute[@type='DW_AT_name']/value/string").text if die.find("attribute[@type='DW_AT_name']/value/string") is not None else "unknown", + "DW_TAG_structure_type": lambda die: "struct", + "DW_TAG_union_type": lambda die: "union", + "DW_TAG_pointer_type": lambda die: "pointer", + "DW_TAG_array_type": lambda die: "array", +} + +def get_type_name(type_id): + die = resolve_type_die(type_id) + if die is None: + return "unknown" + + tag = die.findtext("tag") + + if tag == "DW_TAG_pointer_type": + ref = get_attr(die, "DW_AT_type") + if ref is not None and ref.find("ref") is not None: + pointee_id = ref.find("ref").attrib.get("idref") + name = get_type_name(pointee_id) + return name + "*" if name != "unknown" else name + else: + return "void*" + + elif tag == "DW_TAG_base_type": + name_attr = get_attr(die, "DW_AT_name") + if name_attr is not None: + return name_attr.findtext("string") + else: + return "base_type_unknown" + + elif tag == "DW_TAG_structure_type": + name_attr = get_attr(die, "DW_AT_name") + name = name_attr.findtext("string") if name_attr is not None else "anonymous_struct" + return f"struct {name}" + + elif tag == "DW_TAG_union_type": + name_attr = get_attr(die, "DW_AT_name") + name = name_attr.findtext("string") if name_attr is not None else "anonymous_union" + return f"union {name}" + + elif tag == "DW_TAG_array_type": + ref = get_attr(die, "DW_AT_type") + if ref is not None and ref.find("ref") is not None: + element_type_id = ref.find("ref").attrib.get("idref") + element_type_name = get_type_name(element_type_id) + return f"{element_type_name}[]" + else: + return "array[]" + + # Добавляем поддержку enum + elif tag == "DW_TAG_enumeration_type": + name_attr = get_attr(die, "DW_AT_name") + name = name_attr.findtext("string") if name_attr is not None else "anonymous_enum" + return f"enum {name}" + + else: + return "unknown" + +def parse_offset(offset_text): + if offset_text and offset_text.startswith("DW_OP_plus_uconst "): + return int(offset_text.split()[-1], 0) + return 0 + + +def get_array_dimensions(array_die): + """Рекурсивно получить размеры всех измерений массива из DIE с тегом DW_TAG_array_type.""" + dims = [] + + # Ищем размер текущего измерения + # Размер может быть в DW_AT_upper_bound, либо вычисляться из DW_AT_byte_size и типа элемента + # Но часто в DWARF размер указывается через дочерние die с тегом DW_TAG_subrange_type + + subrange = None + for child in array_die.findall("die"): + if child.findtext("tag") == "DW_TAG_subrange_type": + subrange = child + break + + dim_size = None + if subrange is not None: + # Ищем атрибут DW_AT_upper_bound + ub_attr = get_attr(subrange, "DW_AT_upper_bound") + if ub_attr is not None: + val = ub_attr.find("value/const") + if val is not None: + # Размер измерения равен верхней границе + 1 (т.к. верхняя граница индексируется с 0) + dim_size = int(val.text, 0) + 1 + + if dim_size is None: + # Если размер не нашли, попробуем вычислить через общий размер / размер элемента + arr_size = get_die_size(array_die) + element_type_ref = get_attr(array_die, "DW_AT_type") + if element_type_ref is not None and element_type_ref.find("ref") is not None: + element_type_id = element_type_ref.find("ref").attrib.get("idref") + element_type_die = resolve_type_die(element_type_id) + elem_size = get_die_size(element_type_die) if element_type_die is not None else None + + if arr_size is not None and elem_size: + dim_size = arr_size // elem_size + + if dim_size is None: + dim_size = 0 # Неизвестно + + dims.append(dim_size) + + # Рекурсивно проверяем, если элемент типа тоже массив (многомерный) + element_type_ref = get_attr(array_die, "DW_AT_type") + if element_type_ref is not None and element_type_ref.find("ref") is not None: + element_type_id = element_type_ref.find("ref").attrib.get("idref") + element_type_die = resolve_type_die(element_type_id) + if element_type_die is not None and element_type_die.findtext("tag") == "DW_TAG_array_type": + dims.extend(get_array_dimensions(element_type_die)) + + return dims + + + + + +def handle_array_type(member_elem, resolved_type, offset=0): + dims = get_array_dimensions(resolved_type) + + # Получаем элементарный тип массива (наибольший элемент в цепочке массивов) + def get_base_element_type(die): + ref = get_attr(die, "DW_AT_type") + if ref is not None and ref.find("ref") is not None: + type_id = ref.find("ref").attrib.get("idref") + type_die = resolve_type_die(type_id) + if type_die is not None and type_die.findtext("tag") == "DW_TAG_array_type": + return get_base_element_type(type_die) + else: + return type_die + return None + + element_type_die = get_base_element_type(resolved_type) + element_type_name = get_type_name(element_type_die.attrib.get("id")) if element_type_die is not None else "unknown" + + # Формируем строку типа с нужным количеством [] + type_with_array = element_type_name + "[]" * len(dims) + member_elem.set("type", type_with_array) + + # Размер всего массива + arr_size = get_die_size(resolved_type) + if arr_size is not None: + member_elem.set("size", str(arr_size)) + + # Добавляем атрибуты size1, size2, ... + for i, dim in enumerate(dims, 1): + member_elem.set(f"size{i}", str(dim)) + + member_elem.set("kind", "array") + + # Если базовый элемент - структура, рекурсивно добавляем её члены + if element_type_die is not None and element_type_die.findtext("tag") == "DW_TAG_structure_type": + add_members_recursive(member_elem, element_type_die, offset) + + + + +def add_members_recursive(parent_elem, struct_die, base_offset=0): + tag = struct_die.findtext("tag") + is_union = tag == "DW_TAG_union_type" + + + # Получаем размер структуры/объединения + size = get_die_size(struct_die) + if size is not None: + parent_elem.set("size", hex(size)) + + + for member in struct_die.findall("die"): + if member.findtext("tag") != "DW_TAG_member": + continue + + name_attr = get_attr(member, "DW_AT_name") + offset_attr = get_attr(member, "DW_AT_data_member_location") + type_attr = get_attr(member, "DW_AT_type") + + if name_attr is None or offset_attr is None or type_attr is None: + continue + + name = name_attr.findtext("string") + offset_text = offset_attr.findtext("block") + offset = parse_offset(offset_text) + base_offset + type_id = type_attr.find("ref").attrib.get("idref") + resolved_type = resolve_type_die(type_id) + type_name = get_type_name(type_id) + + if type_name == "unknown": + continue + + member_elem = ET.SubElement( + parent_elem, "member", name=name, offset=hex(offset), type=type_name + ) + + if is_union: + member_elem.set("kind", "union") + + if resolved_type is not None: + subtag = resolved_type.findtext("tag") + + # Обработка массива + if subtag == "DW_TAG_array_type": + handle_array_type(member_elem, resolved_type, offset) + # Обработка структур и объединений + elif subtag in ("DW_TAG_structure_type", "DW_TAG_union_type"): + member_elem.set("type", type_name) + add_members_recursive(member_elem, resolved_type, offset) + else: + member_elem.set("type", type_name) + + +output_root = ET.Element("variables") +for die in root.iter("die"): + if die.findtext("tag") != "DW_TAG_variable": + continue + + name_attr = get_attr(die, "DW_AT_name") + addr_attr = get_attr(die, "DW_AT_location") + type_attr = get_attr(die, "DW_AT_type") + + if name_attr is None or addr_attr is None or type_attr is None: + continue + + name = name_attr.findtext("string") + + # Пропускаем переменные с '$' в имени + if "$" in name: + continue + + addr_text = addr_attr.findtext("block") + if not addr_text or not addr_text.startswith("DW_OP_addr "): + continue + + addr = int(addr_text.split()[-1], 0) + type_id = type_attr.find("ref").attrib.get("idref") + resolved_type = resolve_type_die(type_id) + type_name = get_type_name(type_id) + # Пропускаем переменные, находящиеся в памяти периферии + if 0x800 <= addr < 0x8000: + continue + + # Проверка на DW_TAG_subroutine_type - пропускаем такие переменные + if type_name == "unknown": + continue + + var_elem = ET.SubElement(output_root, "variable", name=name, address=hex(addr), type=type_name) + if resolved_type is not None: + tag = resolved_type.findtext("tag") + + if tag == "DW_TAG_array_type": + handle_array_type(var_elem, resolved_type) + + elif tag in ("DW_TAG_structure_type", "DW_TAG_union_type"): + add_members_recursive(var_elem, resolved_type) + + +timestamp = extract_timestamp(info_path) + +# Создаём новый элемент с текстом timestamp +timestamp_elem = ET.Element("timestamp") +timestamp_elem.text = timestamp + +# Вставляем тег timestamp в начало (или куда хочешь) +output_root.insert(0, timestamp_elem) # В начало списка дочерних элементов + +# Красивый вывод + +rough_string = ET.tostring(output_root, encoding="utf-8") +reparsed = xml.dom.minidom.parseString(rough_string) +pretty_xml = reparsed.toprettyxml(indent=" ") + +with open(output_path, "w", encoding="utf-8") as f: + f.write(pretty_xml) + +os.remove(input_path) +#os.remove(info_path) +print(f"Simplified and formatted XML saved to: {output_path}") diff --git a/parse_xml/parse_xml.exe b/parse_xml/parse_xml.exe new file mode 100644 index 0000000..cfee623 Binary files /dev/null and b/parse_xml/parse_xml.exe differ