353 lines
13 KiB
Python
353 lines
13 KiB
Python
# pyinstaller --onefile --distpath . --workpath ./build --specpath ./build parse_xml.py
|
||
# python -m nuitka --standalone --onefile --output-dir=./build parse_xml.py
|
||
import xml.etree.ElementTree as ET
|
||
import xml.dom.minidom
|
||
import sys
|
||
import os
|
||
|
||
if len(sys.argv) < 3:
|
||
print("Usage: python simplify_dwarf.py <input.xml> <info.txt> [output.xml]")
|
||
sys.exit(1)
|
||
|
||
input_path = sys.argv[1]
|
||
info_path = sys.argv[2]
|
||
|
||
if len(sys.argv) >= 4:
|
||
output_path = sys.argv[3]
|
||
else:
|
||
input_dir = os.path.dirname(os.path.abspath(input_path))
|
||
output_path = os.path.join(input_dir, "simplified.xml")
|
||
|
||
tree = ET.parse(input_path)
|
||
root = tree.getroot()
|
||
|
||
def extract_timestamp(info_path):
|
||
with open(info_path, "r", encoding="utf-8") as f:
|
||
for line in f:
|
||
if "Time Stamp:" in line:
|
||
parts = line.split("Time Stamp:")
|
||
if len(parts) > 1:
|
||
timestamp = parts[1].strip()
|
||
return timestamp
|
||
|
||
|
||
|
||
die_by_id = {die.attrib.get("id"): die for die in root.iter("die") if "id" in die.attrib}
|
||
|
||
def get_attr(die, attr_type):
|
||
for attr in die.findall("attribute"):
|
||
type_elem = attr.find("type")
|
||
if type_elem is not None and type_elem.text == attr_type:
|
||
return attr.find("value")
|
||
return None
|
||
|
||
def get_die_size(die):
|
||
"""Вернуть размер DIE в байтах из атрибута DW_AT_byte_size."""
|
||
for attr in die.findall("attribute"):
|
||
type_elem = attr.find("type")
|
||
if type_elem is not None and type_elem.text == "DW_AT_byte_size":
|
||
const_elem = attr.find("value/const")
|
||
if const_elem is not None:
|
||
return int(const_elem.text, 0)
|
||
return None
|
||
|
||
def resolve_type_die(type_id):
|
||
"""Получить DIE типа, разрешая typedef, const и volatile."""
|
||
visited = set()
|
||
while type_id and type_id not in visited:
|
||
visited.add(type_id)
|
||
die = die_by_id.get(type_id)
|
||
if die is None:
|
||
return None
|
||
tag = die.findtext("tag")
|
||
if tag in ("DW_TAG_volatile_type", "DW_TAG_const_type", "DW_TAG_typedef", "DW_TAG_TI_far_type"):
|
||
ref = get_attr(die, "DW_AT_type")
|
||
if ref is not None and ref.find("ref") is not None:
|
||
type_id = ref.find("ref").attrib.get("idref")
|
||
else:
|
||
return None
|
||
else:
|
||
return die
|
||
return None
|
||
|
||
# Словарь для простых базовых типов по тегам (пример)
|
||
base_types_map = {
|
||
"DW_TAG_base_type": lambda die: die.find("attribute[@type='DW_AT_name']/value/string").text if die.find("attribute[@type='DW_AT_name']/value/string") is not None else "unknown",
|
||
"DW_TAG_structure_type": lambda die: "struct",
|
||
"DW_TAG_union_type": lambda die: "union",
|
||
"DW_TAG_pointer_type": lambda die: "pointer",
|
||
"DW_TAG_array_type": lambda die: "array",
|
||
}
|
||
|
||
def get_type_name(type_id):
|
||
die = resolve_type_die(type_id)
|
||
if die is None:
|
||
return "unknown"
|
||
|
||
tag = die.findtext("tag")
|
||
|
||
if tag == "DW_TAG_pointer_type":
|
||
ref = get_attr(die, "DW_AT_type")
|
||
if ref is not None and ref.find("ref") is not None:
|
||
pointee_id = ref.find("ref").attrib.get("idref")
|
||
name = get_type_name(pointee_id)
|
||
return name + "*" if name != "unknown" else name
|
||
else:
|
||
return "void*"
|
||
|
||
elif tag == "DW_TAG_base_type":
|
||
name_attr = get_attr(die, "DW_AT_name")
|
||
if name_attr is not None:
|
||
return name_attr.findtext("string")
|
||
else:
|
||
return "base_type_unknown"
|
||
|
||
elif tag == "DW_TAG_structure_type":
|
||
name_attr = get_attr(die, "DW_AT_name")
|
||
name = name_attr.findtext("string") if name_attr is not None else "anonymous_struct"
|
||
return f"struct {name}"
|
||
|
||
elif tag == "DW_TAG_union_type":
|
||
name_attr = get_attr(die, "DW_AT_name")
|
||
name = name_attr.findtext("string") if name_attr is not None else "anonymous_union"
|
||
return f"union {name}"
|
||
|
||
elif tag == "DW_TAG_array_type":
|
||
ref = get_attr(die, "DW_AT_type")
|
||
if ref is not None and ref.find("ref") is not None:
|
||
element_type_id = ref.find("ref").attrib.get("idref")
|
||
element_type_name = get_type_name(element_type_id)
|
||
return f"{element_type_name}[]"
|
||
else:
|
||
return "array[]"
|
||
|
||
# Добавляем поддержку enum
|
||
elif tag == "DW_TAG_enumeration_type":
|
||
name_attr = get_attr(die, "DW_AT_name")
|
||
name = name_attr.findtext("string") if name_attr is not None else "anonymous_enum"
|
||
return f"enum {name}"
|
||
|
||
else:
|
||
return "unknown"
|
||
|
||
def parse_offset(offset_text):
|
||
if offset_text and offset_text.startswith("DW_OP_plus_uconst "):
|
||
return int(offset_text.split()[-1], 0)
|
||
return 0
|
||
|
||
|
||
def get_array_dimensions(array_die):
|
||
"""Рекурсивно получить размеры всех измерений массива из DIE с тегом DW_TAG_array_type."""
|
||
dims = []
|
||
|
||
# Ищем размер текущего измерения
|
||
# Размер может быть в DW_AT_upper_bound, либо вычисляться из DW_AT_byte_size и типа элемента
|
||
# Но часто в DWARF размер указывается через дочерние die с тегом DW_TAG_subrange_type
|
||
|
||
subrange = None
|
||
for child in array_die.findall("die"):
|
||
if child.findtext("tag") == "DW_TAG_subrange_type":
|
||
subrange = child
|
||
break
|
||
|
||
dim_size = None
|
||
if subrange is not None:
|
||
# Ищем атрибут DW_AT_upper_bound
|
||
ub_attr = get_attr(subrange, "DW_AT_upper_bound")
|
||
if ub_attr is not None:
|
||
val = ub_attr.find("value/const")
|
||
if val is not None:
|
||
# Размер измерения равен верхней границе + 1 (т.к. верхняя граница индексируется с 0)
|
||
dim_size = int(val.text, 0) + 1
|
||
|
||
if dim_size is None:
|
||
# Если размер не нашли, попробуем вычислить через общий размер / размер элемента
|
||
arr_size = get_die_size(array_die)
|
||
element_type_ref = get_attr(array_die, "DW_AT_type")
|
||
if element_type_ref is not None and element_type_ref.find("ref") is not None:
|
||
element_type_id = element_type_ref.find("ref").attrib.get("idref")
|
||
element_type_die = resolve_type_die(element_type_id)
|
||
elem_size = get_die_size(element_type_die) if element_type_die is not None else None
|
||
|
||
if arr_size is not None and elem_size:
|
||
dim_size = arr_size // elem_size
|
||
|
||
if dim_size is None:
|
||
dim_size = 0 # Неизвестно
|
||
|
||
dims.append(dim_size)
|
||
|
||
# Рекурсивно проверяем, если элемент типа тоже массив (многомерный)
|
||
element_type_ref = get_attr(array_die, "DW_AT_type")
|
||
if element_type_ref is not None and element_type_ref.find("ref") is not None:
|
||
element_type_id = element_type_ref.find("ref").attrib.get("idref")
|
||
element_type_die = resolve_type_die(element_type_id)
|
||
if element_type_die is not None and element_type_die.findtext("tag") == "DW_TAG_array_type":
|
||
dims.extend(get_array_dimensions(element_type_die))
|
||
|
||
return dims
|
||
|
||
|
||
|
||
|
||
|
||
def handle_array_type(member_elem, resolved_type, offset=0):
|
||
dims = get_array_dimensions(resolved_type)
|
||
|
||
# Получаем элементарный тип массива (наибольший элемент в цепочке массивов)
|
||
def get_base_element_type(die):
|
||
ref = get_attr(die, "DW_AT_type")
|
||
if ref is not None and ref.find("ref") is not None:
|
||
type_id = ref.find("ref").attrib.get("idref")
|
||
type_die = resolve_type_die(type_id)
|
||
if type_die is not None and type_die.findtext("tag") == "DW_TAG_array_type":
|
||
return get_base_element_type(type_die)
|
||
else:
|
||
return type_die
|
||
return None
|
||
|
||
element_type_die = get_base_element_type(resolved_type)
|
||
element_type_name = get_type_name(element_type_die.attrib.get("id")) if element_type_die is not None else "unknown"
|
||
|
||
# Формируем строку типа с нужным количеством []
|
||
type_with_array = element_type_name + "[]" * len(dims)
|
||
member_elem.set("type", type_with_array)
|
||
|
||
# Размер всего массива
|
||
arr_size = get_die_size(resolved_type)
|
||
if arr_size is not None:
|
||
member_elem.set("size", str(arr_size))
|
||
|
||
# Добавляем атрибуты size1, size2, ...
|
||
for i, dim in enumerate(dims, 1):
|
||
member_elem.set(f"size{i}", str(dim))
|
||
|
||
member_elem.set("kind", "array")
|
||
|
||
# Если базовый элемент - структура, рекурсивно добавляем её члены
|
||
if element_type_die is not None and element_type_die.findtext("tag") == "DW_TAG_structure_type":
|
||
add_members_recursive(member_elem, element_type_die, offset)
|
||
|
||
|
||
|
||
|
||
def add_members_recursive(parent_elem, struct_die, base_offset=0):
|
||
tag = struct_die.findtext("tag")
|
||
is_union = tag == "DW_TAG_union_type"
|
||
|
||
|
||
# Получаем размер структуры/объединения
|
||
size = get_die_size(struct_die)
|
||
if size is not None:
|
||
parent_elem.set("size", hex(size))
|
||
|
||
|
||
for member in struct_die.findall("die"):
|
||
if member.findtext("tag") != "DW_TAG_member":
|
||
continue
|
||
|
||
name_attr = get_attr(member, "DW_AT_name")
|
||
offset_attr = get_attr(member, "DW_AT_data_member_location")
|
||
type_attr = get_attr(member, "DW_AT_type")
|
||
|
||
if name_attr is None or offset_attr is None or type_attr is None:
|
||
continue
|
||
|
||
name = name_attr.findtext("string")
|
||
offset_text = offset_attr.findtext("block")
|
||
offset = parse_offset(offset_text) + base_offset
|
||
type_id = type_attr.find("ref").attrib.get("idref")
|
||
resolved_type = resolve_type_die(type_id)
|
||
type_name = get_type_name(type_id)
|
||
|
||
if type_name == "unknown":
|
||
continue
|
||
|
||
member_elem = ET.SubElement(
|
||
parent_elem, "member", name=name, offset=hex(offset), type=type_name
|
||
)
|
||
|
||
if is_union:
|
||
member_elem.set("kind", "union")
|
||
|
||
if resolved_type is not None:
|
||
subtag = resolved_type.findtext("tag")
|
||
|
||
# Обработка массива
|
||
if subtag == "DW_TAG_array_type":
|
||
handle_array_type(member_elem, resolved_type, offset)
|
||
# Обработка структур и объединений
|
||
elif subtag in ("DW_TAG_structure_type", "DW_TAG_union_type"):
|
||
member_elem.set("type", type_name)
|
||
add_members_recursive(member_elem, resolved_type, offset)
|
||
else:
|
||
member_elem.set("type", type_name)
|
||
|
||
|
||
output_root = ET.Element("variables")
|
||
for die in root.iter("die"):
|
||
if die.findtext("tag") != "DW_TAG_variable":
|
||
continue
|
||
|
||
name_attr = get_attr(die, "DW_AT_name")
|
||
addr_attr = get_attr(die, "DW_AT_location")
|
||
type_attr = get_attr(die, "DW_AT_type")
|
||
|
||
if name_attr is None or addr_attr is None or type_attr is None:
|
||
continue
|
||
|
||
name = name_attr.findtext("string")
|
||
|
||
# Пропускаем переменные с '$' в имени
|
||
if "$" in name:
|
||
continue
|
||
|
||
addr_text = addr_attr.findtext("block")
|
||
if not addr_text or not addr_text.startswith("DW_OP_addr "):
|
||
continue
|
||
|
||
addr = int(addr_text.split()[-1], 0)
|
||
type_id = type_attr.find("ref").attrib.get("idref")
|
||
resolved_type = resolve_type_die(type_id)
|
||
type_name = get_type_name(type_id)
|
||
# Пропускаем переменные, находящиеся в памяти периферии
|
||
if 0x800 <= addr < 0x8000:
|
||
continue
|
||
|
||
# Проверка на DW_TAG_subroutine_type - пропускаем такие переменные
|
||
if type_name == "unknown":
|
||
continue
|
||
|
||
var_elem = ET.SubElement(output_root, "variable", name=name, address=hex(addr), type=type_name)
|
||
if resolved_type is not None:
|
||
tag = resolved_type.findtext("tag")
|
||
|
||
if tag == "DW_TAG_array_type":
|
||
handle_array_type(var_elem, resolved_type)
|
||
|
||
elif tag in ("DW_TAG_structure_type", "DW_TAG_union_type"):
|
||
add_members_recursive(var_elem, resolved_type)
|
||
|
||
|
||
timestamp = extract_timestamp(info_path)
|
||
|
||
# Создаём новый элемент <timestamp> с текстом timestamp
|
||
timestamp_elem = ET.Element("timestamp")
|
||
timestamp_elem.text = timestamp
|
||
|
||
# Вставляем тег timestamp в начало (или куда хочешь)
|
||
output_root.insert(0, timestamp_elem) # В начало списка дочерних элементов
|
||
|
||
# Красивый вывод
|
||
|
||
rough_string = ET.tostring(output_root, encoding="utf-8")
|
||
reparsed = xml.dom.minidom.parseString(rough_string)
|
||
pretty_xml = reparsed.toprettyxml(indent=" ")
|
||
|
||
with open(output_path, "w", encoding="utf-8") as f:
|
||
f.write(pretty_xml)
|
||
|
||
os.remove(input_path)
|
||
os.remove(info_path)
|
||
print(f"Simplified and formatted XML saved to: {output_path}")
|