From ca81ce72b4d12494424d1813c6437035c1f89a8c Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 22 Jan 2016 13:08:35 +0100 Subject: scripts/dump-guest-memory.py: Move constants to the top The constants bloated the class definition and were therefore moved to the top. Reviewed-by: Laszlo Ersek Signed-off-by: Janosch Frank Message-Id: <1453464520-3882-2-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/dump-guest-memory.py | 126 +++++++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 63 deletions(-) (limited to 'scripts/dump-guest-memory.py') diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index 08796fff8c..e49c835185 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -17,6 +17,55 @@ import struct +TARGET_PAGE_SIZE = 0x1000 +TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000 + +# Various ELF constants +EM_X86_64 = 62 # AMD x86-64 target machine +ELFDATA2LSB = 1 # little endian +ELFCLASS64 = 2 +ELFMAG = "\x7FELF" +EV_CURRENT = 1 +ET_CORE = 4 +PT_LOAD = 1 +PT_NOTE = 4 + +# Special value for e_phnum. This indicates that the real number of +# program headers is too large to fit into e_phnum. Instead the real +# value is in the field sh_info of section 0. +PN_XNUM = 0xFFFF + +# Format strings for packing and header size calculation. +ELF64_EHDR = ("4s" # e_ident/magic + "B" # e_ident/class + "B" # e_ident/data + "B" # e_ident/version + "B" # e_ident/osabi + "8s" # e_ident/pad + "H" # e_type + "H" # e_machine + "I" # e_version + "Q" # e_entry + "Q" # e_phoff + "Q" # e_shoff + "I" # e_flags + "H" # e_ehsize + "H" # e_phentsize + "H" # e_phnum + "H" # e_shentsize + "H" # e_shnum + "H" # e_shstrndx + ) +ELF64_PHDR = ("I" # p_type + "I" # p_flags + "Q" # p_offset + "Q" # p_vaddr + "Q" # p_paddr + "Q" # p_filesz + "Q" # p_memsz + "Q" # p_align + ) + class DumpGuestMemory(gdb.Command): """Extract guest vmcore from qemu process coredump. @@ -47,62 +96,13 @@ deliberately called abort(), or it was dumped in response to a signal at a halfway fortunate point, then its coredump should be in reasonable shape and this command should mostly work.""" - TARGET_PAGE_SIZE = 0x1000 - TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000 - - # Various ELF constants - EM_X86_64 = 62 # AMD x86-64 target machine - ELFDATA2LSB = 1 # little endian - ELFCLASS64 = 2 - ELFMAG = "\x7FELF" - EV_CURRENT = 1 - ET_CORE = 4 - PT_LOAD = 1 - PT_NOTE = 4 - - # Special value for e_phnum. This indicates that the real number of - # program headers is too large to fit into e_phnum. Instead the real - # value is in the field sh_info of section 0. - PN_XNUM = 0xFFFF - - # Format strings for packing and header size calculation. - ELF64_EHDR = ("4s" # e_ident/magic - "B" # e_ident/class - "B" # e_ident/data - "B" # e_ident/version - "B" # e_ident/osabi - "8s" # e_ident/pad - "H" # e_type - "H" # e_machine - "I" # e_version - "Q" # e_entry - "Q" # e_phoff - "Q" # e_shoff - "I" # e_flags - "H" # e_ehsize - "H" # e_phentsize - "H" # e_phnum - "H" # e_shentsize - "H" # e_shnum - "H" # e_shstrndx - ) - ELF64_PHDR = ("I" # p_type - "I" # p_flags - "Q" # p_offset - "Q" # p_vaddr - "Q" # p_paddr - "Q" # p_filesz - "Q" # p_memsz - "Q" # p_align - ) - def __init__(self): super(DumpGuestMemory, self).__init__("dump-guest-memory", gdb.COMMAND_DATA, gdb.COMPLETE_FILENAME) self.uintptr_t = gdb.lookup_type("uintptr_t") - self.elf64_ehdr_le = struct.Struct("<%s" % self.ELF64_EHDR) - self.elf64_phdr_le = struct.Struct("<%s" % self.ELF64_PHDR) + self.elf64_ehdr_le = struct.Struct("<%s" % ELF64_EHDR) + self.elf64_phdr_le = struct.Struct("<%s" % ELF64_PHDR) def int128_get64(self, val): assert (val["hi"] == 0) @@ -130,7 +130,7 @@ shape and this command should mostly work.""" if (mr["alias"] != 0): return (self.memory_region_get_ram_ptr(mr["alias"].dereference()) + mr["alias_offset"]) - return self.qemu_get_ram_ptr(mr["ram_addr"] & self.TARGET_PAGE_MASK) + return self.qemu_get_ram_ptr(mr["ram_addr"] & TARGET_PAGE_MASK) def guest_phys_blocks_init(self): self.guest_phys_blocks = [] @@ -198,21 +198,21 @@ shape and this command should mostly work.""" # most common values. This also means that instruction pointer # etc. will be bogus in the dump, but at least the RAM contents # should be valid. - self.dump_info = {"d_machine": self.EM_X86_64, - "d_endian" : self.ELFDATA2LSB, - "d_class" : self.ELFCLASS64} + self.dump_info = {"d_machine": EM_X86_64, + "d_endian" : ELFDATA2LSB, + "d_class" : ELFCLASS64} def encode_elf64_ehdr_le(self): return self.elf64_ehdr_le.pack( - self.ELFMAG, # e_ident/magic + ELFMAG, # e_ident/magic self.dump_info["d_class"], # e_ident/class self.dump_info["d_endian"], # e_ident/data - self.EV_CURRENT, # e_ident/version + EV_CURRENT, # e_ident/version 0, # e_ident/osabi "", # e_ident/pad - self.ET_CORE, # e_type + ET_CORE, # e_type self.dump_info["d_machine"], # e_machine - self.EV_CURRENT, # e_version + EV_CURRENT, # e_version 0, # e_entry self.elf64_ehdr_le.size, # e_phoff 0, # e_shoff @@ -226,7 +226,7 @@ shape and this command should mostly work.""" ) def encode_elf64_note_le(self): - return self.elf64_phdr_le.pack(self.PT_NOTE, # p_type + return self.elf64_phdr_le.pack(PT_NOTE, # p_type 0, # p_flags (self.memory_offset - len(self.note)), # p_offset @@ -238,7 +238,7 @@ shape and this command should mostly work.""" ) def encode_elf64_load_le(self, offset, start_hwaddr, range_size): - return self.elf64_phdr_le.pack(self.PT_LOAD, # p_type + return self.elf64_phdr_le.pack(PT_LOAD, # p_type 0, # p_flags offset, # p_offset 0, # p_vaddr @@ -276,7 +276,7 @@ shape and this command should mostly work.""" # We should never reach PN_XNUM for paging=false dumps: there's # just a handful of discontiguous ranges after merging. self.phdr_num += len(self.guest_phys_blocks) - assert (self.phdr_num < self.PN_XNUM) + assert (self.phdr_num < PN_XNUM) # Calculate the ELF file offset where the memory dump commences: # @@ -312,7 +312,7 @@ shape and this command should mostly work.""" print ("dumping range at %016x for length %016x" % (cur.cast(self.uintptr_t), left)) while (left > 0): - chunk_size = min(self.TARGET_PAGE_SIZE, left) + chunk_size = min(TARGET_PAGE_SIZE, left) chunk = qemu_core.read_memory(cur, chunk_size) vmcore.write(chunk) cur += chunk_size -- cgit v1.2.3-55-g7522 From 47890203842de8b29716bdffb406ca851e70829d Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 22 Jan 2016 13:08:36 +0100 Subject: scripts/dump-guest-memory.py: Make methods functions The functions dealing with qemu components rarely used parts of the class, so they were moved out of the class. As the uintptr_t variable is needed both within and outside the class, it was made a constant and moved to the top. Reviewed-by: Laszlo Ersek Signed-off-by: Janosch Frank Message-Id: <1453464520-3882-3-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/dump-guest-memory.py | 184 ++++++++++++++++++++++--------------------- 1 file changed, 93 insertions(+), 91 deletions(-) (limited to 'scripts/dump-guest-memory.py') diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index e49c835185..d0b927a2bc 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -17,6 +17,8 @@ import struct +UINTPTR_T = gdb.lookup_type("uintptr_t") + TARGET_PAGE_SIZE = 0x1000 TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000 @@ -66,6 +68,94 @@ ELF64_PHDR = ("I" # p_type "Q" # p_align ) +def int128_get64(val): + assert (val["hi"] == 0) + return val["lo"] + +def qlist_foreach(head, field_str): + var_p = head["lh_first"] + while (var_p != 0): + var = var_p.dereference() + yield var + var_p = var[field_str]["le_next"] + +def qemu_get_ram_block(ram_addr): + ram_blocks = gdb.parse_and_eval("ram_list.blocks") + for block in qlist_foreach(ram_blocks, "next"): + if (ram_addr - block["offset"] < block["used_length"]): + return block + raise gdb.GdbError("Bad ram offset %x" % ram_addr) + +def qemu_get_ram_ptr(ram_addr): + block = qemu_get_ram_block(ram_addr) + return block["host"] + (ram_addr - block["offset"]) + +def memory_region_get_ram_ptr(mr): + if (mr["alias"] != 0): + return (memory_region_get_ram_ptr(mr["alias"].dereference()) + + mr["alias_offset"]) + return qemu_get_ram_ptr(mr["ram_addr"] & TARGET_PAGE_MASK) + +def get_guest_phys_blocks(): + guest_phys_blocks = [] + print "guest RAM blocks:" + print ("target_start target_end host_addr message " + "count") + print ("---------------- ---------------- ---------------- ------- " + "-----") + + current_map_p = gdb.parse_and_eval("address_space_memory.current_map") + current_map = current_map_p.dereference() + for cur in range(current_map["nr"]): + flat_range = (current_map["ranges"] + cur).dereference() + mr = flat_range["mr"].dereference() + + # we only care about RAM + if (not mr["ram"]): + continue + + section_size = int128_get64(flat_range["addr"]["size"]) + target_start = int128_get64(flat_range["addr"]["start"]) + target_end = target_start + section_size + host_addr = (memory_region_get_ram_ptr(mr) + + flat_range["offset_in_region"]) + predecessor = None + + # find continuity in guest physical address space + if (len(guest_phys_blocks) > 0): + predecessor = guest_phys_blocks[-1] + predecessor_size = (predecessor["target_end"] - + predecessor["target_start"]) + + # the memory API guarantees monotonically increasing + # traversal + assert (predecessor["target_end"] <= target_start) + + # we want continuity in both guest-physical and + # host-virtual memory + if (predecessor["target_end"] < target_start or + predecessor["host_addr"] + predecessor_size != host_addr): + predecessor = None + + if (predecessor is None): + # isolated mapping, add it to the list + guest_phys_blocks.append({"target_start": target_start, + "target_end" : target_end, + "host_addr" : host_addr}) + message = "added" + else: + # expand predecessor until @target_end; predecessor's + # start doesn't change + predecessor["target_end"] = target_end + message = "joined" + + print ("%016x %016x %016x %-7s %5u" % + (target_start, target_end, host_addr.cast(UINTPTR_T), + message, len(guest_phys_blocks))) + + return guest_phys_blocks + + class DumpGuestMemory(gdb.Command): """Extract guest vmcore from qemu process coredump. @@ -100,96 +190,9 @@ shape and this command should mostly work.""" super(DumpGuestMemory, self).__init__("dump-guest-memory", gdb.COMMAND_DATA, gdb.COMPLETE_FILENAME) - self.uintptr_t = gdb.lookup_type("uintptr_t") self.elf64_ehdr_le = struct.Struct("<%s" % ELF64_EHDR) self.elf64_phdr_le = struct.Struct("<%s" % ELF64_PHDR) - - def int128_get64(self, val): - assert (val["hi"] == 0) - return val["lo"] - - def qlist_foreach(self, head, field_str): - var_p = head["lh_first"] - while (var_p != 0): - var = var_p.dereference() - yield var - var_p = var[field_str]["le_next"] - - def qemu_get_ram_block(self, ram_addr): - ram_blocks = gdb.parse_and_eval("ram_list.blocks") - for block in self.qlist_foreach(ram_blocks, "next"): - if (ram_addr - block["offset"] < block["used_length"]): - return block - raise gdb.GdbError("Bad ram offset %x" % ram_addr) - - def qemu_get_ram_ptr(self, ram_addr): - block = self.qemu_get_ram_block(ram_addr) - return block["host"] + (ram_addr - block["offset"]) - - def memory_region_get_ram_ptr(self, mr): - if (mr["alias"] != 0): - return (self.memory_region_get_ram_ptr(mr["alias"].dereference()) + - mr["alias_offset"]) - return self.qemu_get_ram_ptr(mr["ram_addr"] & TARGET_PAGE_MASK) - - def guest_phys_blocks_init(self): - self.guest_phys_blocks = [] - - def guest_phys_blocks_append(self): - print "guest RAM blocks:" - print ("target_start target_end host_addr message " - "count") - print ("---------------- ---------------- ---------------- ------- " - "-----") - - current_map_p = gdb.parse_and_eval("address_space_memory.current_map") - current_map = current_map_p.dereference() - for cur in range(current_map["nr"]): - flat_range = (current_map["ranges"] + cur).dereference() - mr = flat_range["mr"].dereference() - - # we only care about RAM - if (not mr["ram"]): - continue - - section_size = self.int128_get64(flat_range["addr"]["size"]) - target_start = self.int128_get64(flat_range["addr"]["start"]) - target_end = target_start + section_size - host_addr = (self.memory_region_get_ram_ptr(mr) + - flat_range["offset_in_region"]) - predecessor = None - - # find continuity in guest physical address space - if (len(self.guest_phys_blocks) > 0): - predecessor = self.guest_phys_blocks[-1] - predecessor_size = (predecessor["target_end"] - - predecessor["target_start"]) - - # the memory API guarantees monotonically increasing - # traversal - assert (predecessor["target_end"] <= target_start) - - # we want continuity in both guest-physical and - # host-virtual memory - if (predecessor["target_end"] < target_start or - predecessor["host_addr"] + predecessor_size != host_addr): - predecessor = None - - if (predecessor is None): - # isolated mapping, add it to the list - self.guest_phys_blocks.append({"target_start": target_start, - "target_end" : target_end, - "host_addr" : host_addr}) - message = "added" - else: - # expand predecessor until @target_end; predecessor's - # start doesn't change - predecessor["target_end"] = target_end - message = "joined" - - print ("%016x %016x %016x %-7s %5u" % - (target_start, target_end, host_addr.cast(self.uintptr_t), - message, len(self.guest_phys_blocks))) + self.guest_phys_blocks = None def cpu_get_dump_info(self): # We can't synchronize the registers with KVM post-mortem, and @@ -263,8 +266,7 @@ shape and this command should mostly work.""" len(name) + 1, len(desc), type, name, desc) def dump_init(self): - self.guest_phys_blocks_init() - self.guest_phys_blocks_append() + self.guest_phys_blocks = get_guest_phys_blocks() self.cpu_get_dump_info() # we have no way to retrieve the VCPU status from KVM # post-mortem @@ -310,7 +312,7 @@ shape and this command should mostly work.""" cur = block["host_addr"] left = block["target_end"] - block["target_start"] print ("dumping range at %016x for length %016x" % - (cur.cast(self.uintptr_t), left)) + (cur.cast(UINTPTR_T), left)) while (left > 0): chunk_size = min(TARGET_PAGE_SIZE, left) chunk = qemu_core.read_memory(cur, chunk_size) -- cgit v1.2.3-55-g7522 From 7cb1089d5fbd7b2d9497f111ce948edef41df32d Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 22 Jan 2016 13:08:37 +0100 Subject: scripts/dump-guest-memory.py: Improve python 3 compatibility This commit does not make the script python 3 compatible, it is a preparation that fixes the easy and common incompatibilities. Print is a function in python 3 and therefore needs braces around its arguments. Range does not cast a gdb.Value object to int in python 3, we have to do it ourselves. Reviewed-by: Laszlo Ersek Signed-off-by: Janosch Frank Message-Id: <1453464520-3882-4-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/dump-guest-memory.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'scripts/dump-guest-memory.py') diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index d0b927a2bc..bb4ca8e3d4 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -98,15 +98,19 @@ def memory_region_get_ram_ptr(mr): def get_guest_phys_blocks(): guest_phys_blocks = [] - print "guest RAM blocks:" - print ("target_start target_end host_addr message " - "count") - print ("---------------- ---------------- ---------------- ------- " - "-----") + print("guest RAM blocks:") + print("target_start target_end host_addr message " + "count") + print("---------------- ---------------- ---------------- ------- " + "-----") current_map_p = gdb.parse_and_eval("address_space_memory.current_map") current_map = current_map_p.dereference() - for cur in range(current_map["nr"]): + + # Conversion to int is needed for python 3 + # compatibility. Otherwise range doesn't cast the value itself and + # breaks. + for cur in range(int(current_map["nr"])): flat_range = (current_map["ranges"] + cur).dereference() mr = flat_range["mr"].dereference() @@ -149,9 +153,9 @@ def get_guest_phys_blocks(): predecessor["target_end"] = target_end message = "joined" - print ("%016x %016x %016x %-7s %5u" % - (target_start, target_end, host_addr.cast(UINTPTR_T), - message, len(guest_phys_blocks))) + print("%016x %016x %016x %-7s %5u" % + (target_start, target_end, host_addr.cast(UINTPTR_T), + message, len(guest_phys_blocks))) return guest_phys_blocks @@ -311,8 +315,8 @@ shape and this command should mostly work.""" for block in self.guest_phys_blocks: cur = block["host_addr"] left = block["target_end"] - block["target_start"] - print ("dumping range at %016x for length %016x" % - (cur.cast(UINTPTR_T), left)) + print("dumping range at %016x for length %016x" % + (cur.cast(UINTPTR_T), left)) while (left > 0): chunk_size = min(TARGET_PAGE_SIZE, left) chunk = qemu_core.read_memory(cur, chunk_size) -- cgit v1.2.3-55-g7522 From 6782c0e785a0ba48cd96d99f2402cb87af027d26 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 22 Jan 2016 13:08:38 +0100 Subject: scripts/dump-guest-memory.py: Cleanup functions Increase readability by adding newlines and comments, as well as removing wrong whitespaces and C style braces around conditionals and loops. Reviewed-by: Laszlo Ersek Signed-off-by: Janosch Frank Message-Id: <1453464520-3882-5-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/dump-guest-memory.py | 75 +++++++++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 25 deletions(-) (limited to 'scripts/dump-guest-memory.py') diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index bb4ca8e3d4..2cf73659a1 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -69,35 +69,60 @@ ELF64_PHDR = ("I" # p_type ) def int128_get64(val): - assert (val["hi"] == 0) + """Returns low 64bit part of Int128 struct.""" + + assert val["hi"] == 0 return val["lo"] + def qlist_foreach(head, field_str): + """Generator for qlists.""" + var_p = head["lh_first"] - while (var_p != 0): + while var_p != 0: var = var_p.dereference() - yield var var_p = var[field_str]["le_next"] + yield var + def qemu_get_ram_block(ram_addr): + """Returns the RAMBlock struct to which the given address belongs.""" + ram_blocks = gdb.parse_and_eval("ram_list.blocks") + for block in qlist_foreach(ram_blocks, "next"): - if (ram_addr - block["offset"] < block["used_length"]): + if (ram_addr - block["offset"]) < block["used_length"]: return block + raise gdb.GdbError("Bad ram offset %x" % ram_addr) + def qemu_get_ram_ptr(ram_addr): + """Returns qemu vaddr for given guest physical address.""" + block = qemu_get_ram_block(ram_addr) return block["host"] + (ram_addr - block["offset"]) -def memory_region_get_ram_ptr(mr): - if (mr["alias"] != 0): - return (memory_region_get_ram_ptr(mr["alias"].dereference()) + - mr["alias_offset"]) - return qemu_get_ram_ptr(mr["ram_addr"] & TARGET_PAGE_MASK) + +def memory_region_get_ram_ptr(memory_region): + if memory_region["alias"] != 0: + return (memory_region_get_ram_ptr(memory_region["alias"].dereference()) + + memory_region["alias_offset"]) + + return qemu_get_ram_ptr(memory_region["ram_addr"] & TARGET_PAGE_MASK) + def get_guest_phys_blocks(): + """Returns a list of ram blocks. + + Each block entry contains: + 'target_start': guest block phys start address + 'target_end': guest block phys end address + 'host_addr': qemu vaddr of the block's start + """ + guest_phys_blocks = [] + print("guest RAM blocks:") print("target_start target_end host_addr message " "count") @@ -111,29 +136,29 @@ def get_guest_phys_blocks(): # compatibility. Otherwise range doesn't cast the value itself and # breaks. for cur in range(int(current_map["nr"])): - flat_range = (current_map["ranges"] + cur).dereference() - mr = flat_range["mr"].dereference() + flat_range = (current_map["ranges"] + cur).dereference() + memory_region = flat_range["mr"].dereference() # we only care about RAM - if (not mr["ram"]): + if not memory_region["ram"]: continue section_size = int128_get64(flat_range["addr"]["size"]) target_start = int128_get64(flat_range["addr"]["start"]) - target_end = target_start + section_size - host_addr = (memory_region_get_ram_ptr(mr) + - flat_range["offset_in_region"]) + target_end = target_start + section_size + host_addr = (memory_region_get_ram_ptr(memory_region) + + flat_range["offset_in_region"]) predecessor = None # find continuity in guest physical address space - if (len(guest_phys_blocks) > 0): + if len(guest_phys_blocks) > 0: predecessor = guest_phys_blocks[-1] predecessor_size = (predecessor["target_end"] - predecessor["target_start"]) # the memory API guarantees monotonically increasing # traversal - assert (predecessor["target_end"] <= target_start) + assert predecessor["target_end"] <= target_start # we want continuity in both guest-physical and # host-virtual memory @@ -141,11 +166,11 @@ def get_guest_phys_blocks(): predecessor["host_addr"] + predecessor_size != host_addr): predecessor = None - if (predecessor is None): + if predecessor is None: # isolated mapping, add it to the list guest_phys_blocks.append({"target_start": target_start, - "target_end" : target_end, - "host_addr" : host_addr}) + "target_end": target_end, + "host_addr": host_addr}) message = "added" else: # expand predecessor until @target_end; predecessor's @@ -282,7 +307,7 @@ shape and this command should mostly work.""" # We should never reach PN_XNUM for paging=false dumps: there's # just a handful of discontiguous ranges after merging. self.phdr_num += len(self.guest_phys_blocks) - assert (self.phdr_num < PN_XNUM) + assert self.phdr_num < PN_XNUM # Calculate the ELF file offset where the memory dump commences: # @@ -313,15 +338,15 @@ shape and this command should mostly work.""" def dump_iterate(self, vmcore): qemu_core = gdb.inferiors()[0] for block in self.guest_phys_blocks: - cur = block["host_addr"] + cur = block["host_addr"] left = block["target_end"] - block["target_start"] print("dumping range at %016x for length %016x" % (cur.cast(UINTPTR_T), left)) - while (left > 0): + while left > 0: chunk_size = min(TARGET_PAGE_SIZE, left) chunk = qemu_core.read_memory(cur, chunk_size) vmcore.write(chunk) - cur += chunk_size + cur += chunk_size left -= chunk_size def create_vmcore(self, filename): @@ -336,7 +361,7 @@ shape and this command should mostly work.""" self.dont_repeat() argv = gdb.string_to_argv(args) - if (len(argv) != 1): + if len(argv) != 1: raise gdb.GdbError("usage: dump-guest-memory FILE") self.dump_init() -- cgit v1.2.3-55-g7522 From 368e3adc8928b2786939a25a336527f83f18e926 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 22 Jan 2016 13:08:39 +0100 Subject: scripts/dump-guest-memory.py: Introduce multi-arch support By modelling the ELF with ctypes we not only gain full python 3 support but can also create dumps for different architectures more easily. Tested-by: Andrew Jones Acked-by: Laszlo Ersek Signed-off-by: Janosch Frank Message-Id: <1453464520-3882-6-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/dump-guest-memory.py | 492 ++++++++++++++++++++++++++++--------------- 1 file changed, 325 insertions(+), 167 deletions(-) (limited to 'scripts/dump-guest-memory.py') diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index 2cf73659a1..7acce654bc 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -6,6 +6,7 @@ # # Authors: # Laszlo Ersek +# Janosch Frank # # This work is licensed under the terms of the GNU GPL, version 2 or later. See # the COPYING file in the top-level directory. @@ -15,58 +16,303 @@ # "help data" summary), and it should match how other help texts look in # gdb. -import struct +import ctypes UINTPTR_T = gdb.lookup_type("uintptr_t") TARGET_PAGE_SIZE = 0x1000 TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000 -# Various ELF constants -EM_X86_64 = 62 # AMD x86-64 target machine -ELFDATA2LSB = 1 # little endian -ELFCLASS64 = 2 -ELFMAG = "\x7FELF" -EV_CURRENT = 1 -ET_CORE = 4 -PT_LOAD = 1 -PT_NOTE = 4 - # Special value for e_phnum. This indicates that the real number of # program headers is too large to fit into e_phnum. Instead the real # value is in the field sh_info of section 0. PN_XNUM = 0xFFFF -# Format strings for packing and header size calculation. -ELF64_EHDR = ("4s" # e_ident/magic - "B" # e_ident/class - "B" # e_ident/data - "B" # e_ident/version - "B" # e_ident/osabi - "8s" # e_ident/pad - "H" # e_type - "H" # e_machine - "I" # e_version - "Q" # e_entry - "Q" # e_phoff - "Q" # e_shoff - "I" # e_flags - "H" # e_ehsize - "H" # e_phentsize - "H" # e_phnum - "H" # e_shentsize - "H" # e_shnum - "H" # e_shstrndx - ) -ELF64_PHDR = ("I" # p_type - "I" # p_flags - "Q" # p_offset - "Q" # p_vaddr - "Q" # p_paddr - "Q" # p_filesz - "Q" # p_memsz - "Q" # p_align - ) +EV_CURRENT = 1 + +ELFCLASS32 = 1 +ELFCLASS64 = 2 + +ELFDATA2LSB = 1 +ELFDATA2MSB = 2 + +ET_CORE = 4 + +PT_LOAD = 1 +PT_NOTE = 4 + +EM_386 = 3 +EM_PPC = 20 +EM_PPC64 = 21 +EM_S390 = 22 +EM_AARCH = 183 +EM_X86_64 = 62 + +class ELF(object): + """Representation of a ELF file.""" + + def __init__(self, arch): + self.ehdr = None + self.notes = [] + self.segments = [] + self.notes_size = 0 + self.endianess = None + self.elfclass = ELFCLASS64 + + if arch == 'aarch64-le': + self.endianess = ELFDATA2LSB + self.elfclass = ELFCLASS64 + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_AARCH + + elif arch == 'aarch64-be': + self.endianess = ELFDATA2MSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_AARCH + + elif arch == 'X86_64': + self.endianess = ELFDATA2LSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_X86_64 + + elif arch == '386': + self.endianess = ELFDATA2LSB + self.elfclass = ELFCLASS32 + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_386 + + elif arch == 's390': + self.endianess = ELFDATA2MSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_S390 + + elif arch == 'ppc64-le': + self.endianess = ELFDATA2LSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_PPC64 + + elif arch == 'ppc64-be': + self.endianess = ELFDATA2MSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_PPC64 + + else: + raise gdb.GdbError("No valid arch type specified.\n" + "Currently supported types:\n" + "aarch64-be, aarch64-le, X86_64, 386, s390, " + "ppc64-be, ppc64-le") + + self.add_segment(PT_NOTE, 0, 0) + + def add_note(self, n_name, n_desc, n_type): + """Adds a note to the ELF.""" + + note = get_arch_note(self.endianess, len(n_name), len(n_desc)) + note.n_namesz = len(n_name) + 1 + note.n_descsz = len(n_desc) + note.n_name = n_name.encode() + note.n_type = n_type + + # Desc needs to be 4 byte aligned (although the 64bit spec + # specifies 8 byte). When defining n_desc as uint32 it will be + # automatically aligned but we need the memmove to copy the + # string into it. + ctypes.memmove(note.n_desc, n_desc.encode(), len(n_desc)) + + self.notes.append(note) + self.segments[0].p_filesz += ctypes.sizeof(note) + self.segments[0].p_memsz += ctypes.sizeof(note) + + def add_segment(self, p_type, p_paddr, p_size): + """Adds a segment to the elf.""" + + phdr = get_arch_phdr(self.endianess, self.elfclass) + phdr.p_type = p_type + phdr.p_paddr = p_paddr + phdr.p_filesz = p_size + phdr.p_memsz = p_size + self.segments.append(phdr) + self.ehdr.e_phnum += 1 + + def to_file(self, elf_file): + """Writes all ELF structures to the the passed file. + + Structure: + Ehdr + Segment 0:PT_NOTE + Segment 1:PT_LOAD + Segment N:PT_LOAD + Note 0..N + Dump contents + """ + elf_file.write(self.ehdr) + off = ctypes.sizeof(self.ehdr) + \ + len(self.segments) * ctypes.sizeof(self.segments[0]) + + for phdr in self.segments: + phdr.p_offset = off + elf_file.write(phdr) + off += phdr.p_filesz + + for note in self.notes: + elf_file.write(note) + + +def get_arch_note(endianess, len_name, len_desc): + """Returns a Note class with the specified endianess.""" + + if endianess == ELFDATA2LSB: + superclass = ctypes.LittleEndianStructure + else: + superclass = ctypes.BigEndianStructure + + len_name = len_name + 1 + + class Note(superclass): + """Represents an ELF note, includes the content.""" + + _fields_ = [("n_namesz", ctypes.c_uint32), + ("n_descsz", ctypes.c_uint32), + ("n_type", ctypes.c_uint32), + ("n_name", ctypes.c_char * len_name), + ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))] + return Note() + + +class Ident(ctypes.Structure): + """Represents the ELF ident array in the ehdr structure.""" + + _fields_ = [('ei_mag0', ctypes.c_ubyte), + ('ei_mag1', ctypes.c_ubyte), + ('ei_mag2', ctypes.c_ubyte), + ('ei_mag3', ctypes.c_ubyte), + ('ei_class', ctypes.c_ubyte), + ('ei_data', ctypes.c_ubyte), + ('ei_version', ctypes.c_ubyte), + ('ei_osabi', ctypes.c_ubyte), + ('ei_abiversion', ctypes.c_ubyte), + ('ei_pad', ctypes.c_ubyte * 7)] + + def __init__(self, endianess, elfclass): + self.ei_mag0 = 0x7F + self.ei_mag1 = ord('E') + self.ei_mag2 = ord('L') + self.ei_mag3 = ord('F') + self.ei_class = elfclass + self.ei_data = endianess + self.ei_version = EV_CURRENT + + +def get_arch_ehdr(endianess, elfclass): + """Returns a EHDR64 class with the specified endianess.""" + + if endianess == ELFDATA2LSB: + superclass = ctypes.LittleEndianStructure + else: + superclass = ctypes.BigEndianStructure + + class EHDR64(superclass): + """Represents the 64 bit ELF header struct.""" + + _fields_ = [('e_ident', Ident), + ('e_type', ctypes.c_uint16), + ('e_machine', ctypes.c_uint16), + ('e_version', ctypes.c_uint32), + ('e_entry', ctypes.c_uint64), + ('e_phoff', ctypes.c_uint64), + ('e_shoff', ctypes.c_uint64), + ('e_flags', ctypes.c_uint32), + ('e_ehsize', ctypes.c_uint16), + ('e_phentsize', ctypes.c_uint16), + ('e_phnum', ctypes.c_uint16), + ('e_shentsize', ctypes.c_uint16), + ('e_shnum', ctypes.c_uint16), + ('e_shstrndx', ctypes.c_uint16)] + + def __init__(self): + super(superclass, self).__init__() + self.e_ident = Ident(endianess, elfclass) + self.e_type = ET_CORE + self.e_version = EV_CURRENT + self.e_ehsize = ctypes.sizeof(self) + self.e_phoff = ctypes.sizeof(self) + self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass)) + self.e_phnum = 0 + + + class EHDR32(superclass): + """Represents the 32 bit ELF header struct.""" + + _fields_ = [('e_ident', Ident), + ('e_type', ctypes.c_uint16), + ('e_machine', ctypes.c_uint16), + ('e_version', ctypes.c_uint32), + ('e_entry', ctypes.c_uint32), + ('e_phoff', ctypes.c_uint32), + ('e_shoff', ctypes.c_uint32), + ('e_flags', ctypes.c_uint32), + ('e_ehsize', ctypes.c_uint16), + ('e_phentsize', ctypes.c_uint16), + ('e_phnum', ctypes.c_uint16), + ('e_shentsize', ctypes.c_uint16), + ('e_shnum', ctypes.c_uint16), + ('e_shstrndx', ctypes.c_uint16)] + + def __init__(self): + super(superclass, self).__init__() + self.e_ident = Ident(endianess, elfclass) + self.e_type = ET_CORE + self.e_version = EV_CURRENT + self.e_ehsize = ctypes.sizeof(self) + self.e_phoff = ctypes.sizeof(self) + self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass)) + self.e_phnum = 0 + + # End get_arch_ehdr + if elfclass == ELFCLASS64: + return EHDR64() + else: + return EHDR32() + + +def get_arch_phdr(endianess, elfclass): + """Returns a 32 or 64 bit PHDR class with the specified endianess.""" + + if endianess == ELFDATA2LSB: + superclass = ctypes.LittleEndianStructure + else: + superclass = ctypes.BigEndianStructure + + class PHDR64(superclass): + """Represents the 64 bit ELF program header struct.""" + + _fields_ = [('p_type', ctypes.c_uint32), + ('p_flags', ctypes.c_uint32), + ('p_offset', ctypes.c_uint64), + ('p_vaddr', ctypes.c_uint64), + ('p_paddr', ctypes.c_uint64), + ('p_filesz', ctypes.c_uint64), + ('p_memsz', ctypes.c_uint64), + ('p_align', ctypes.c_uint64)] + + class PHDR32(superclass): + """Represents the 32 bit ELF program header struct.""" + + _fields_ = [('p_type', ctypes.c_uint32), + ('p_offset', ctypes.c_uint32), + ('p_vaddr', ctypes.c_uint32), + ('p_paddr', ctypes.c_uint32), + ('p_filesz', ctypes.c_uint32), + ('p_memsz', ctypes.c_uint32), + ('p_flags', ctypes.c_uint32), + ('p_align', ctypes.c_uint32)] + + # End get_arch_phdr + if elfclass == ELFCLASS64: + return PHDR64() + else: + return PHDR32() + def int128_get64(val): """Returns low 64bit part of Int128 struct.""" @@ -188,20 +434,22 @@ def get_guest_phys_blocks(): class DumpGuestMemory(gdb.Command): """Extract guest vmcore from qemu process coredump. -The sole argument is FILE, identifying the target file to write the -guest vmcore to. +The two required arguments are FILE and ARCH: +FILE identifies the target file to write the guest vmcore to. +ARCH specifies the architecture for which the core will be generated. This GDB command reimplements the dump-guest-memory QMP command in python, using the representation of guest memory as captured in the qemu coredump. The qemu process that has been dumped must have had the -command line option "-machine dump-guest-core=on". +command line option "-machine dump-guest-core=on" which is the default. For simplicity, the "paging", "begin" and "end" parameters of the QMP command are not supported -- no attempt is made to get the guest's internal paging structures (ie. paging=false is hard-wired), and guest memory is always fully dumped. -Only x86_64 guests are supported. +Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be, +ppc64-le guests are supported. The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are not written to the vmcore. Preparing these would require context that is @@ -219,129 +467,39 @@ shape and this command should mostly work.""" super(DumpGuestMemory, self).__init__("dump-guest-memory", gdb.COMMAND_DATA, gdb.COMPLETE_FILENAME) - self.elf64_ehdr_le = struct.Struct("<%s" % ELF64_EHDR) - self.elf64_phdr_le = struct.Struct("<%s" % ELF64_PHDR) + self.elf = None self.guest_phys_blocks = None - def cpu_get_dump_info(self): - # We can't synchronize the registers with KVM post-mortem, and - # the bits in (first_x86_cpu->env.hflags) seem to be stale; they - # may not reflect long mode for example. Hence just assume the - # most common values. This also means that instruction pointer - # etc. will be bogus in the dump, but at least the RAM contents - # should be valid. - self.dump_info = {"d_machine": EM_X86_64, - "d_endian" : ELFDATA2LSB, - "d_class" : ELFCLASS64} - - def encode_elf64_ehdr_le(self): - return self.elf64_ehdr_le.pack( - ELFMAG, # e_ident/magic - self.dump_info["d_class"], # e_ident/class - self.dump_info["d_endian"], # e_ident/data - EV_CURRENT, # e_ident/version - 0, # e_ident/osabi - "", # e_ident/pad - ET_CORE, # e_type - self.dump_info["d_machine"], # e_machine - EV_CURRENT, # e_version - 0, # e_entry - self.elf64_ehdr_le.size, # e_phoff - 0, # e_shoff - 0, # e_flags - self.elf64_ehdr_le.size, # e_ehsize - self.elf64_phdr_le.size, # e_phentsize - self.phdr_num, # e_phnum - 0, # e_shentsize - 0, # e_shnum - 0 # e_shstrndx - ) - - def encode_elf64_note_le(self): - return self.elf64_phdr_le.pack(PT_NOTE, # p_type - 0, # p_flags - (self.memory_offset - - len(self.note)), # p_offset - 0, # p_vaddr - 0, # p_paddr - len(self.note), # p_filesz - len(self.note), # p_memsz - 0 # p_align - ) - - def encode_elf64_load_le(self, offset, start_hwaddr, range_size): - return self.elf64_phdr_le.pack(PT_LOAD, # p_type - 0, # p_flags - offset, # p_offset - 0, # p_vaddr - start_hwaddr, # p_paddr - range_size, # p_filesz - range_size, # p_memsz - 0 # p_align - ) - - def note_init(self, name, desc, type): - # name must include a trailing NUL - namesz = (len(name) + 1 + 3) / 4 * 4 - descsz = (len(desc) + 3) / 4 * 4 - fmt = ("<" # little endian - "I" # n_namesz - "I" # n_descsz - "I" # n_type - "%us" # name - "%us" # desc - % (namesz, descsz)) - self.note = struct.pack(fmt, - len(name) + 1, len(desc), type, name, desc) - - def dump_init(self): - self.guest_phys_blocks = get_guest_phys_blocks() - self.cpu_get_dump_info() - # we have no way to retrieve the VCPU status from KVM - # post-mortem - self.note_init("NONE", "EMPTY", 0) - - # Account for PT_NOTE. - self.phdr_num = 1 - - # We should never reach PN_XNUM for paging=false dumps: there's - # just a handful of discontiguous ranges after merging. - self.phdr_num += len(self.guest_phys_blocks) - assert self.phdr_num < PN_XNUM - - # Calculate the ELF file offset where the memory dump commences: - # - # ELF header - # PT_NOTE - # PT_LOAD: 1 - # PT_LOAD: 2 - # ... - # PT_LOAD: len(self.guest_phys_blocks) - # ELF note - # memory dump - self.memory_offset = (self.elf64_ehdr_le.size + - self.elf64_phdr_le.size * self.phdr_num + - len(self.note)) - - def dump_begin(self, vmcore): - vmcore.write(self.encode_elf64_ehdr_le()) - vmcore.write(self.encode_elf64_note_le()) - running = self.memory_offset + def dump_init(self, vmcore): + """Prepares and writes ELF structures to core file.""" + + # Needed to make crash happy, data for more useful notes is + # not available in a qemu core. + self.elf.add_note("NONE", "EMPTY", 0) + + # We should never reach PN_XNUM for paging=false dumps, + # there's just a handful of discontiguous ranges after + # merging. + # The constant is needed to account for the PT_NOTE segment. + phdr_num = len(self.guest_phys_blocks) + 1 + assert phdr_num < PN_XNUM + for block in self.guest_phys_blocks: - range_size = block["target_end"] - block["target_start"] - vmcore.write(self.encode_elf64_load_le(running, - block["target_start"], - range_size)) - running += range_size - vmcore.write(self.note) + block_size = block["target_end"] - block["target_start"] + self.elf.add_segment(PT_LOAD, block["target_start"], block_size) + + self.elf.to_file(vmcore) def dump_iterate(self, vmcore): + """Writes guest core to file.""" + qemu_core = gdb.inferiors()[0] for block in self.guest_phys_blocks: cur = block["host_addr"] left = block["target_end"] - block["target_start"] print("dumping range at %016x for length %016x" % (cur.cast(UINTPTR_T), left)) + while left > 0: chunk_size = min(TARGET_PAGE_SIZE, left) chunk = qemu_core.read_memory(cur, chunk_size) @@ -349,22 +507,22 @@ shape and this command should mostly work.""" cur += chunk_size left -= chunk_size - def create_vmcore(self, filename): - vmcore = open(filename, "wb") - self.dump_begin(vmcore) - self.dump_iterate(vmcore) - vmcore.close() - def invoke(self, args, from_tty): + """Handles command invocation from gdb.""" + # Unwittingly pressing the Enter key after the command should # not dump the same multi-gig coredump to the same file. self.dont_repeat() argv = gdb.string_to_argv(args) - if len(argv) != 1: - raise gdb.GdbError("usage: dump-guest-memory FILE") + if len(argv) != 2: + raise gdb.GdbError("usage: dump-guest-memory FILE ARCH") + + self.elf = ELF(argv[1]) + self.guest_phys_blocks = get_guest_phys_blocks() - self.dump_init() - self.create_vmcore(argv[0]) + with open(argv[0], "wb") as vmcore: + self.dump_init(vmcore) + self.dump_iterate(vmcore) DumpGuestMemory() -- cgit v1.2.3-55-g7522 From 28fbf8f67b078f738e790f3c3a56aeab2c0ea5d6 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 22 Jan 2016 13:08:40 +0100 Subject: scripts/dump-guest-memory.py: Fix module docstring The module docstring is changed into a multi-line comment to comply with pep 257. The comment about the docstring that gets used by gdb to print the help is moved to the location of the docstring. Signed-off-by: Janosch Frank Message-Id: <1453464520-3882-7-git-send-email-frankja@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini --- scripts/dump-guest-memory.py | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'scripts/dump-guest-memory.py') diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index 7acce654bc..f274bf80fa 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -1,20 +1,17 @@ -# This python script adds a new gdb command, "dump-guest-memory". It -# should be loaded with "source dump-guest-memory.py" at the (gdb) -# prompt. -# -# Copyright (C) 2013, Red Hat, Inc. -# -# Authors: -# Laszlo Ersek -# Janosch Frank -# -# This work is licensed under the terms of the GNU GPL, version 2 or later. See -# the COPYING file in the top-level directory. -# -# The leading docstring doesn't have idiomatic Python formatting. It is -# printed by gdb's "help" command (the first line is printed in the -# "help data" summary), and it should match how other help texts look in -# gdb. +""" +This python script adds a new gdb command, "dump-guest-memory". It +should be loaded with "source dump-guest-memory.py" at the (gdb) +prompt. + +Copyright (C) 2013, Red Hat, Inc. + +Authors: + Laszlo Ersek + Janosch Frank + +This work is licensed under the terms of the GNU GPL, version 2 or later. See +the COPYING file in the top-level directory. +""" import ctypes @@ -431,6 +428,10 @@ def get_guest_phys_blocks(): return guest_phys_blocks +# The leading docstring doesn't have idiomatic Python formatting. It is +# printed by gdb's "help" command (the first line is printed in the +# "help data" summary), and it should match how other help texts look in +# gdb. class DumpGuestMemory(gdb.Command): """Extract guest vmcore from qemu process coredump. -- cgit v1.2.3-55-g7522