debugVarTool/parse_xml/Src/parse_xml.py

353 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# pyinstaller --onefile --distpath . --workpath ./build --specpath ./build parse_xml.py
# python -m nuitka --standalone --onefile --output-dir=./build parse_xml.py
import xml.etree.ElementTree as ET
import xml.dom.minidom
import sys
import os
if len(sys.argv) < 3:
print("Usage: python simplify_dwarf.py <input.xml> <info.txt> [output.xml]")
sys.exit(1)
input_path = sys.argv[1]
info_path = sys.argv[2]
if len(sys.argv) >= 4:
output_path = sys.argv[3]
else:
input_dir = os.path.dirname(os.path.abspath(input_path))
output_path = os.path.join(input_dir, "simplified.xml")
tree = ET.parse(input_path)
root = tree.getroot()
def extract_timestamp(info_path):
with open(info_path, "r", encoding="utf-8") as f:
for line in f:
if "Time Stamp:" in line:
parts = line.split("Time Stamp:")
if len(parts) > 1:
timestamp = parts[1].strip()
return timestamp
die_by_id = {die.attrib.get("id"): die for die in root.iter("die") if "id" in die.attrib}
def get_attr(die, attr_type):
for attr in die.findall("attribute"):
type_elem = attr.find("type")
if type_elem is not None and type_elem.text == attr_type:
return attr.find("value")
return None
def get_die_size(die):
"""Вернуть размер DIE в байтах из атрибута DW_AT_byte_size."""
for attr in die.findall("attribute"):
type_elem = attr.find("type")
if type_elem is not None and type_elem.text == "DW_AT_byte_size":
const_elem = attr.find("value/const")
if const_elem is not None:
return int(const_elem.text, 0)
return None
def resolve_type_die(type_id):
"""Получить DIE типа, разрешая typedef, const и volatile."""
visited = set()
while type_id and type_id not in visited:
visited.add(type_id)
die = die_by_id.get(type_id)
if die is None:
return None
tag = die.findtext("tag")
if tag in ("DW_TAG_volatile_type", "DW_TAG_const_type", "DW_TAG_typedef", "DW_TAG_TI_far_type"):
ref = get_attr(die, "DW_AT_type")
if ref is not None and ref.find("ref") is not None:
type_id = ref.find("ref").attrib.get("idref")
else:
return None
else:
return die
return None
# Словарь для простых базовых типов по тегам (пример)
base_types_map = {
"DW_TAG_base_type": lambda die: die.find("attribute[@type='DW_AT_name']/value/string").text if die.find("attribute[@type='DW_AT_name']/value/string") is not None else "unknown",
"DW_TAG_structure_type": lambda die: "struct",
"DW_TAG_union_type": lambda die: "union",
"DW_TAG_pointer_type": lambda die: "pointer",
"DW_TAG_array_type": lambda die: "array",
}
def get_type_name(type_id):
die = resolve_type_die(type_id)
if die is None:
return "unknown"
tag = die.findtext("tag")
if tag == "DW_TAG_pointer_type":
ref = get_attr(die, "DW_AT_type")
if ref is not None and ref.find("ref") is not None:
pointee_id = ref.find("ref").attrib.get("idref")
name = get_type_name(pointee_id)
return name + "*" if name != "unknown" else name
else:
return "void*"
elif tag == "DW_TAG_base_type":
name_attr = get_attr(die, "DW_AT_name")
if name_attr is not None:
return name_attr.findtext("string")
else:
return "base_type_unknown"
elif tag == "DW_TAG_structure_type":
name_attr = get_attr(die, "DW_AT_name")
name = name_attr.findtext("string") if name_attr is not None else "anonymous_struct"
return f"struct {name}"
elif tag == "DW_TAG_union_type":
name_attr = get_attr(die, "DW_AT_name")
name = name_attr.findtext("string") if name_attr is not None else "anonymous_union"
return f"union {name}"
elif tag == "DW_TAG_array_type":
ref = get_attr(die, "DW_AT_type")
if ref is not None and ref.find("ref") is not None:
element_type_id = ref.find("ref").attrib.get("idref")
element_type_name = get_type_name(element_type_id)
return f"{element_type_name}[]"
else:
return "array[]"
# Добавляем поддержку enum
elif tag == "DW_TAG_enumeration_type":
name_attr = get_attr(die, "DW_AT_name")
name = name_attr.findtext("string") if name_attr is not None else "anonymous_enum"
return f"enum {name}"
else:
return "unknown"
def parse_offset(offset_text):
if offset_text and offset_text.startswith("DW_OP_plus_uconst "):
return int(offset_text.split()[-1], 0)
return 0
def get_array_dimensions(array_die):
"""Рекурсивно получить размеры всех измерений массива из DIE с тегом DW_TAG_array_type."""
dims = []
# Ищем размер текущего измерения
# Размер может быть в DW_AT_upper_bound, либо вычисляться из DW_AT_byte_size и типа элемента
# Но часто в DWARF размер указывается через дочерние die с тегом DW_TAG_subrange_type
subrange = None
for child in array_die.findall("die"):
if child.findtext("tag") == "DW_TAG_subrange_type":
subrange = child
break
dim_size = None
if subrange is not None:
# Ищем атрибут DW_AT_upper_bound
ub_attr = get_attr(subrange, "DW_AT_upper_bound")
if ub_attr is not None:
val = ub_attr.find("value/const")
if val is not None:
# Размер измерения равен верхней границе + 1 (т.к. верхняя граница индексируется с 0)
dim_size = int(val.text, 0) + 1
if dim_size is None:
# Если размер не нашли, попробуем вычислить через общий размер / размер элемента
arr_size = get_die_size(array_die)
element_type_ref = get_attr(array_die, "DW_AT_type")
if element_type_ref is not None and element_type_ref.find("ref") is not None:
element_type_id = element_type_ref.find("ref").attrib.get("idref")
element_type_die = resolve_type_die(element_type_id)
elem_size = get_die_size(element_type_die) if element_type_die is not None else None
if arr_size is not None and elem_size:
dim_size = arr_size // elem_size
if dim_size is None:
dim_size = 0 # Неизвестно
dims.append(dim_size)
# Рекурсивно проверяем, если элемент типа тоже массив (многомерный)
element_type_ref = get_attr(array_die, "DW_AT_type")
if element_type_ref is not None and element_type_ref.find("ref") is not None:
element_type_id = element_type_ref.find("ref").attrib.get("idref")
element_type_die = resolve_type_die(element_type_id)
if element_type_die is not None and element_type_die.findtext("tag") == "DW_TAG_array_type":
dims.extend(get_array_dimensions(element_type_die))
return dims
def handle_array_type(member_elem, resolved_type, offset=0):
dims = get_array_dimensions(resolved_type)
# Получаем элементарный тип массива (наибольший элемент в цепочке массивов)
def get_base_element_type(die):
ref = get_attr(die, "DW_AT_type")
if ref is not None and ref.find("ref") is not None:
type_id = ref.find("ref").attrib.get("idref")
type_die = resolve_type_die(type_id)
if type_die is not None and type_die.findtext("tag") == "DW_TAG_array_type":
return get_base_element_type(type_die)
else:
return type_die
return None
element_type_die = get_base_element_type(resolved_type)
element_type_name = get_type_name(element_type_die.attrib.get("id")) if element_type_die is not None else "unknown"
# Формируем строку типа с нужным количеством []
type_with_array = element_type_name + "[]" * len(dims)
member_elem.set("type", type_with_array)
# Размер всего массива
arr_size = get_die_size(resolved_type)
if arr_size is not None:
member_elem.set("size", str(arr_size))
# Добавляем атрибуты size1, size2, ...
for i, dim in enumerate(dims, 1):
member_elem.set(f"size{i}", str(dim))
member_elem.set("kind", "array")
# Если базовый элемент - структура, рекурсивно добавляем её члены
if element_type_die is not None and element_type_die.findtext("tag") == "DW_TAG_structure_type":
add_members_recursive(member_elem, element_type_die, offset)
def add_members_recursive(parent_elem, struct_die, base_offset=0):
tag = struct_die.findtext("tag")
is_union = tag == "DW_TAG_union_type"
# Получаем размер структуры/объединения
size = get_die_size(struct_die)
if size is not None:
parent_elem.set("size", hex(size))
for member in struct_die.findall("die"):
if member.findtext("tag") != "DW_TAG_member":
continue
name_attr = get_attr(member, "DW_AT_name")
offset_attr = get_attr(member, "DW_AT_data_member_location")
type_attr = get_attr(member, "DW_AT_type")
if name_attr is None or offset_attr is None or type_attr is None:
continue
name = name_attr.findtext("string")
offset_text = offset_attr.findtext("block")
offset = parse_offset(offset_text) + base_offset
type_id = type_attr.find("ref").attrib.get("idref")
resolved_type = resolve_type_die(type_id)
type_name = get_type_name(type_id)
if type_name == "unknown":
continue
member_elem = ET.SubElement(
parent_elem, "member", name=name, offset=hex(offset), type=type_name
)
if is_union:
member_elem.set("kind", "union")
if resolved_type is not None:
subtag = resolved_type.findtext("tag")
# Обработка массива
if subtag == "DW_TAG_array_type":
handle_array_type(member_elem, resolved_type, offset)
# Обработка структур и объединений
elif subtag in ("DW_TAG_structure_type", "DW_TAG_union_type"):
member_elem.set("type", type_name)
add_members_recursive(member_elem, resolved_type, offset)
else:
member_elem.set("type", type_name)
output_root = ET.Element("variables")
for die in root.iter("die"):
if die.findtext("tag") != "DW_TAG_variable":
continue
name_attr = get_attr(die, "DW_AT_name")
addr_attr = get_attr(die, "DW_AT_location")
type_attr = get_attr(die, "DW_AT_type")
if name_attr is None or addr_attr is None or type_attr is None:
continue
name = name_attr.findtext("string")
# Пропускаем переменные с '$' в имени
if "$" in name:
continue
addr_text = addr_attr.findtext("block")
if not addr_text or not addr_text.startswith("DW_OP_addr "):
continue
addr = int(addr_text.split()[-1], 0)
type_id = type_attr.find("ref").attrib.get("idref")
resolved_type = resolve_type_die(type_id)
type_name = get_type_name(type_id)
# Пропускаем переменные, находящиеся в памяти периферии
if 0x800 <= addr < 0x8000:
continue
# Проверка на DW_TAG_subroutine_type - пропускаем такие переменные
if type_name == "unknown":
continue
var_elem = ET.SubElement(output_root, "variable", name=name, address=hex(addr), type=type_name)
if resolved_type is not None:
tag = resolved_type.findtext("tag")
if tag == "DW_TAG_array_type":
handle_array_type(var_elem, resolved_type)
elif tag in ("DW_TAG_structure_type", "DW_TAG_union_type"):
add_members_recursive(var_elem, resolved_type)
timestamp = extract_timestamp(info_path)
# Создаём новый элемент <timestamp> с текстом timestamp
timestamp_elem = ET.Element("timestamp")
timestamp_elem.text = timestamp
# Вставляем тег timestamp в начало (или куда хочешь)
output_root.insert(0, timestamp_elem) # В начало списка дочерних элементов
# Красивый вывод
rough_string = ET.tostring(output_root, encoding="utf-8")
reparsed = xml.dom.minidom.parseString(rough_string)
pretty_xml = reparsed.toprettyxml(indent=" ")
with open(output_path, "w", encoding="utf-8") as f:
f.write(pretty_xml)
os.remove(input_path)
os.remove(info_path)
print(f"Simplified and formatted XML saved to: {output_path}")