debugVarTool/parse_xml/Src/parse_xml.py
Razvalyaev 502046091c опять кууууча всего:
базово доделаны терминалки до более менее итогового состояния
2025-07-23 17:13:28 +03:00

417 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# pyinstaller --onefile --distpath ./parse_xml --workpath ./parse_xml/build --specpath ./build parse_xml/Src/parse_xml.py
# python -m nuitka --standalone --onefile --output-dir=./parse_xml parse_xml/Src/parse_xml.py
import xml.etree.ElementTree as ET
import xml.dom.minidom
import sys
import os
if len(sys.argv) < 3:
print("Usage: python parse_xml.exe <input.xml> <info.txt> [output.xml]")
sys.exit(1)
input_path = sys.argv[1]
info_path = sys.argv[2]
base_type_sizes = {
"char": 2,
"short": 2,
"int": 2,
"long": 4,
"long long": 8,
"float": 4,
"double": 8,
}
if len(sys.argv) >= 4:
output_path = sys.argv[3]
else:
input_dir = os.path.dirname(os.path.abspath(input_path))
output_path = os.path.join(input_dir, "simplified.xml")
tree = ET.parse(input_path)
root = tree.getroot()
def extract_timestamp(info_path):
with open(info_path, "r", encoding="utf-8") as f:
for line in f:
if "Time Stamp:" in line:
parts = line.split("Time Stamp:")
if len(parts) > 1:
timestamp = parts[1].strip()
return timestamp
die_by_id = {die.attrib.get("id"): die for die in root.iter("die") if "id" in die.attrib}
def get_attr(die, attr_type):
for attr in die.findall("attribute"):
type_elem = attr.find("type")
if type_elem is not None and type_elem.text == attr_type:
return attr.find("value")
return None
def get_die_size(die):
"""Вернуть размер DIE в байтах из атрибута DW_AT_byte_size или по ключевым словам имени типа."""
# Сначала пытаемся получить размер из DW_AT_byte_size
for attr in die.findall("attribute"):
type_elem = attr.find("type")
if type_elem is not None and type_elem.text == "DW_AT_byte_size":
const_elem = attr.find("value/const")
if const_elem is not None:
return int(const_elem.text, 0)
# Если не нашли, пробуем определить размер по ключевым словам в имени типа
name_elem = die.find("attribute[@name='DW_AT_name']/value/const")
if name_elem is not None:
type_name = name_elem.text.lower()
for key, size in base_type_sizes.items():
if key in type_name:
return size
return None
def resolve_type_die(type_id):
"""Получить DIE типа, разрешая typedef, const и volatile."""
visited = set()
while type_id and type_id not in visited:
visited.add(type_id)
die = die_by_id.get(type_id)
if die is None:
return None
tag = die.findtext("tag")
if tag in ("DW_TAG_volatile_type", "DW_TAG_const_type", "DW_TAG_typedef", "DW_TAG_TI_far_type"):
ref = get_attr(die, "DW_AT_type")
if ref is not None and ref.find("ref") is not None:
type_id = ref.find("ref").attrib.get("idref")
else:
return None
else:
return die
return None
# Словарь для простых базовых типов по тегам (пример)
base_types_map = {
"DW_TAG_base_type": lambda die: die.find("attribute[@type='DW_AT_name']/value/string").text if die.find("attribute[@type='DW_AT_name']/value/string") is not None else "unknown",
"DW_TAG_structure_type": lambda die: "struct",
"DW_TAG_union_type": lambda die: "union",
"DW_TAG_pointer_type": lambda die: "pointer",
"DW_TAG_array_type": lambda die: "array",
}
def get_type_name(type_id):
die = resolve_type_die(type_id)
if die is None:
return "unknown"
tag = die.findtext("tag")
if tag == "DW_TAG_pointer_type":
ref = get_attr(die, "DW_AT_type")
if ref is not None and ref.find("ref") is not None:
pointee_id = ref.find("ref").attrib.get("idref")
name = get_type_name(pointee_id)
return name + "*" if name != "unknown" else name
else:
return "void*"
elif tag == "DW_TAG_base_type":
name_attr = get_attr(die, "DW_AT_name")
if name_attr is not None:
return name_attr.findtext("string")
else:
return "base_type_unknown"
elif tag == "DW_TAG_structure_type":
name_attr = get_attr(die, "DW_AT_name")
name = name_attr.findtext("string") if name_attr is not None else "anonymous_struct"
return f"struct {name}"
elif tag == "DW_TAG_union_type":
name_attr = get_attr(die, "DW_AT_name")
name = name_attr.findtext("string") if name_attr is not None else "anonymous_union"
return f"union {name}"
elif tag == "DW_TAG_array_type":
ref = get_attr(die, "DW_AT_type")
if ref is not None and ref.find("ref") is not None:
element_type_id = ref.find("ref").attrib.get("idref")
element_type_name = get_type_name(element_type_id)
return f"{element_type_name}[]"
else:
return "array[]"
# Добавляем поддержку enum
elif tag == "DW_TAG_enumeration_type":
name_attr = get_attr(die, "DW_AT_name")
name = name_attr.findtext("string") if name_attr is not None else "anonymous_enum"
return f"enum {name}"
else:
return "unknown"
def parse_offset(offset_text):
if offset_text and offset_text.startswith("DW_OP_plus_uconst "):
return int(offset_text.split()[-1], 0)
return 0
def get_base_type_die(array_die):
"""Спускаемся по цепочке DW_AT_type, пока не дойдем до не-массива (базового типа)."""
current_die = array_die
while True:
ref = get_attr(current_die, "DW_AT_type")
if ref is None or ref.find("ref") is None:
break
next_die = resolve_type_die(ref.find("ref").attrib.get("idref"))
if next_die is None:
break
if next_die.findtext("tag") == "DW_TAG_array_type":
current_die = next_die
else:
return next_die
return current_die
def get_array_dimensions(array_die):
dims = []
# Итерируем по всем DIE с тегом DW_TAG_subrange_type, потомки текущего массива
for child in array_die.findall("die"):
if child.findtext("tag") != "DW_TAG_subrange_type":
continue
dim_size = None
ub_attr = get_attr(child, "DW_AT_upper_bound")
if ub_attr is not None:
# Попробуем разные варианты получить значение upper_bound
# 1) value/const
val_const = ub_attr.find("const")
if val_const is not None:
try:
dim_size = int(val_const.text, 0) + 1
#print(f"[DEBUG] Found DW_AT_upper_bound const: {val_const.text}, size={dim_size}")
except Exception as e:
a=1#print(f"[WARN] Error parsing upper_bound const: {e}")
else:
# 2) value/block (DW_OP_constu / DW_OP_plus_uconst, etc.)
val_block = ub_attr.find("block")
if val_block is not None:
block_text = val_block.text
# Можно попытаться парсить DWARF expr (например DW_OP_plus_uconst 7)
if block_text and "DW_OP_plus_uconst" in block_text:
try:
parts = block_text.split()
val = int(parts[-1], 0)
dim_size = val + 1
#print(f"[DEBUG] Parsed upper_bound block: {val} + 1 = {dim_size}")
except Exception as e:
a=1#print(f"[WARN] Error parsing upper_bound block: {e}")
else:
a=1#print(f"[WARN] Unexpected DW_AT_upper_bound block content: {block_text}")
else:
a=1#print(f"[WARN] DW_AT_upper_bound has no const or block value")
if dim_size is None:
# fallback по DW_AT_count — редко встречается
ct_attr = get_attr(child, "DW_AT_count")
if ct_attr is not None:
val_const = ct_attr.find("value/const")
if val_const is not None:
try:
dim_size = int(val_const.text, 0)
#print(f"[DEBUG] Found DW_AT_count: {dim_size}")
except Exception as e:
a=1#print(f"[WARN] Error parsing DW_AT_count const: {e}")
if dim_size is None:
print("[DEBUG] No dimension size found for this subrange, defaulting to 0")
dim_size = 0
dims.append(dim_size)
# Если не нашли измерений — пытаемся вычислить размер массива по общему размеру
if not dims:
arr_size = get_die_size(array_die)
elem_size = None
element_type_ref = get_attr(array_die, "DW_AT_type")
if element_type_ref is not None and element_type_ref.find("ref") is not None:
element_type_id = element_type_ref.find("ref").attrib.get("idref")
elem_die = resolve_type_die(element_type_id)
if elem_die is not None:
elem_size = get_die_size(elem_die)
#print(f"[DEBUG] Fallback: arr_size={arr_size}, elem_size={elem_size}")
if arr_size is not None and elem_size:
dim_calc = arr_size // elem_size
dims.append(dim_calc)
#print(f"[DEBUG] Calculated dimension size from total size: {dim_calc}")
else:
dims.append(0)
print("[DEBUG] Could not calculate dimension size, set 0")
# Рекурсивно обрабатываем вложенные массивы
element_type_ref = get_attr(array_die, "DW_AT_type")
if element_type_ref is not None and element_type_ref.find("ref") is not None:
element_type_id = element_type_ref.find("ref").attrib.get("idref")
element_type_die = resolve_type_die(element_type_id)
if element_type_die is not None and element_type_die.findtext("tag") == "DW_TAG_array_type":
dims.extend(get_array_dimensions(element_type_die))
#print(f"[DEBUG] Array dimensions: {dims}")
return dims
def handle_array_type(member_elem, resolved_type, offset=0):
dims = get_array_dimensions(resolved_type)
base_die = get_base_type_die(resolved_type)
base_name = "unknown"
base_size = None
if base_die is not None:
base_id = base_die.attrib.get("id")
if base_id:
base_name = get_type_name(base_id)
base_size = get_die_size(base_die)
else:
base_name = get_type_name(base_die.attrib.get("id", ""))
#print(f"[DEBUG] Base type name: {base_name}, base size: {base_size}")
member_elem.set("type", base_name + "[]" * len(dims))
if base_size is None:
base_size = 0
total_elements = 1
for d in dims:
if d == 0:
total_elements = 0
print(f"[WARN] Dimension size is zero, setting total elements to 0")
break
total_elements *= d
total_size = total_elements * base_size if base_size is not None else 0
if total_size:
member_elem.set("size", str(base_size if base_size is not None else 1))
else:
arr_size = get_die_size(resolved_type)
if arr_size:
member_elem.set("size", str(arr_size))
#print(f"[DEBUG] Used fallback size from resolved_type: {arr_size}")
else:
print(f"[WARN] Could not determine total size for array")
for i, dim in enumerate(dims, 1):
member_elem.set(f"size{i}", str(dim))
#print(f"[DEBUG] Setting size{i} = {dim}")
member_elem.set("kind", "array")
if base_die is not None and base_die.findtext("tag") == "DW_TAG_structure_type":
add_members_recursive(member_elem, base_die, offset)
def add_members_recursive(parent_elem, struct_die, base_offset=0):
is_union = struct_die.findtext("tag") == "DW_TAG_union_type"
size = get_die_size(struct_die)
if size is not None:
parent_elem.set("size", hex(size))
for member in struct_die.findall("die"):
if member.findtext("tag") != "DW_TAG_member":
continue
name_attr = get_attr(member, "DW_AT_name")
offset_attr = get_attr(member, "DW_AT_data_member_location")
type_attr = get_attr(member, "DW_AT_type")
if name_attr is None or offset_attr is None or type_attr is None:
continue
name = name_attr.findtext("string")
offset = parse_offset(offset_attr.findtext("block")) + base_offset
type_id = type_attr.find("ref").attrib.get("idref")
resolved_type = resolve_type_die(type_id)
type_name = get_type_name(type_id)
if type_name == "unknown":
continue
member_elem = ET.SubElement(parent_elem, "member", name=name, offset=hex(offset), type=type_name)
if is_union:
member_elem.set("kind", "union")
if resolved_type is not None:
tag = resolved_type.findtext("tag")
if tag == "DW_TAG_array_type":
handle_array_type(member_elem, resolved_type, offset)
elif tag in ("DW_TAG_structure_type", "DW_TAG_union_type"):
member_elem.set("type", type_name)
add_members_recursive(member_elem, resolved_type, offset)
elif tag == "DW_TAG_pointer_type":
# Проверяем тип, на который указывает указатель
pointee_ref = get_attr(resolved_type, "DW_AT_type")
if pointee_ref is not None and pointee_ref.find("ref") is not None:
pointee_id = pointee_ref.find("ref").attrib.get("idref")
pointee_die = resolve_type_die(pointee_id)
if pointee_die is not None:
pointee_tag = pointee_die.findtext("tag")
if pointee_tag in ("DW_TAG_structure_type", "DW_TAG_union_type"):
# Добавляем подэлементы для структуры, на которую указывает указатель
pointer_elem = ET.SubElement(member_elem, "pointee", type=get_type_name(pointee_id))
add_members_recursive(pointer_elem, pointee_die, 0)
output_root = ET.Element("variables")
for die in root.iter("die"):
if die.findtext("tag") != "DW_TAG_variable":
continue
name_attr = get_attr(die, "DW_AT_name")
addr_attr = get_attr(die, "DW_AT_location")
type_attr = get_attr(die, "DW_AT_type")
if name_attr is None or addr_attr is None or type_attr is None:
continue
name = name_attr.findtext("string")
if "$" in name:
continue
addr_text = addr_attr.findtext("block")
if not addr_text or not addr_text.startswith("DW_OP_addr "):
continue
addr = int(addr_text.split()[-1], 0)
type_id = type_attr.find("ref").attrib.get("idref")
resolved_type = resolve_type_die(type_id)
type_name = get_type_name(type_id)
if 0x800 <= addr < 0x8000 or type_name == "unknown":
continue
var_elem = ET.SubElement(output_root, "variable", name=name, address=hex(addr), type=type_name)
if resolved_type is not None:
tag = resolved_type.findtext("tag")
if tag == "DW_TAG_array_type":
handle_array_type(var_elem, resolved_type)
elif tag in ("DW_TAG_structure_type", "DW_TAG_union_type"):
add_members_recursive(var_elem, resolved_type)
timestamp = extract_timestamp(info_path)
timestamp_elem = ET.Element("timestamp")
timestamp_elem.text = timestamp
output_root.insert(0, timestamp_elem)
rough_string = ET.tostring(output_root, encoding="utf-8")
pretty_xml = xml.dom.minidom.parseString(rough_string).toprettyxml(indent=" ")
with open(output_path, "w", encoding="utf-8") as f:
f.write(pretty_xml)
os.remove(input_path)
os.remove(info_path)
print(f"Simplified and formatted XML saved to: {output_path}")