summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/dump-guest-memory.py762
-rwxr-xr-xscripts/kvm/kvm_stat1199
2 files changed, 1159 insertions, 802 deletions
diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py
index 08796fff8c..f274bf80fa 100644
--- a/scripts/dump-guest-memory.py
+++ b/scripts/dump-guest-memory.py
@@ -1,39 +1,456 @@
-# This python script adds a new gdb command, "dump-guest-memory". It
-# should be loaded with "source dump-guest-memory.py" at the (gdb)
-# prompt.
-#
-# Copyright (C) 2013, Red Hat, Inc.
-#
-# Authors:
-# Laszlo Ersek <lersek@redhat.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or later. See
-# the COPYING file in the top-level directory.
-#
+"""
+This python script adds a new gdb command, "dump-guest-memory". It
+should be loaded with "source dump-guest-memory.py" at the (gdb)
+prompt.
+
+Copyright (C) 2013, Red Hat, Inc.
+
+Authors:
+ Laszlo Ersek <lersek@redhat.com>
+ Janosch Frank <frankja@linux.vnet.ibm.com>
+
+This work is licensed under the terms of the GNU GPL, version 2 or later. See
+the COPYING file in the top-level directory.
+"""
+
+import ctypes
+
+UINTPTR_T = gdb.lookup_type("uintptr_t")
+
+TARGET_PAGE_SIZE = 0x1000
+TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
+
+# Special value for e_phnum. This indicates that the real number of
+# program headers is too large to fit into e_phnum. Instead the real
+# value is in the field sh_info of section 0.
+PN_XNUM = 0xFFFF
+
+EV_CURRENT = 1
+
+ELFCLASS32 = 1
+ELFCLASS64 = 2
+
+ELFDATA2LSB = 1
+ELFDATA2MSB = 2
+
+ET_CORE = 4
+
+PT_LOAD = 1
+PT_NOTE = 4
+
+EM_386 = 3
+EM_PPC = 20
+EM_PPC64 = 21
+EM_S390 = 22
+EM_AARCH = 183
+EM_X86_64 = 62
+
+class ELF(object):
+ """Representation of a ELF file."""
+
+ def __init__(self, arch):
+ self.ehdr = None
+ self.notes = []
+ self.segments = []
+ self.notes_size = 0
+ self.endianess = None
+ self.elfclass = ELFCLASS64
+
+ if arch == 'aarch64-le':
+ self.endianess = ELFDATA2LSB
+ self.elfclass = ELFCLASS64
+ self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+ self.ehdr.e_machine = EM_AARCH
+
+ elif arch == 'aarch64-be':
+ self.endianess = ELFDATA2MSB
+ self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+ self.ehdr.e_machine = EM_AARCH
+
+ elif arch == 'X86_64':
+ self.endianess = ELFDATA2LSB
+ self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+ self.ehdr.e_machine = EM_X86_64
+
+ elif arch == '386':
+ self.endianess = ELFDATA2LSB
+ self.elfclass = ELFCLASS32
+ self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+ self.ehdr.e_machine = EM_386
+
+ elif arch == 's390':
+ self.endianess = ELFDATA2MSB
+ self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+ self.ehdr.e_machine = EM_S390
+
+ elif arch == 'ppc64-le':
+ self.endianess = ELFDATA2LSB
+ self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+ self.ehdr.e_machine = EM_PPC64
+
+ elif arch == 'ppc64-be':
+ self.endianess = ELFDATA2MSB
+ self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+ self.ehdr.e_machine = EM_PPC64
+
+ else:
+ raise gdb.GdbError("No valid arch type specified.\n"
+ "Currently supported types:\n"
+ "aarch64-be, aarch64-le, X86_64, 386, s390, "
+ "ppc64-be, ppc64-le")
+
+ self.add_segment(PT_NOTE, 0, 0)
+
+ def add_note(self, n_name, n_desc, n_type):
+ """Adds a note to the ELF."""
+
+ note = get_arch_note(self.endianess, len(n_name), len(n_desc))
+ note.n_namesz = len(n_name) + 1
+ note.n_descsz = len(n_desc)
+ note.n_name = n_name.encode()
+ note.n_type = n_type
+
+ # Desc needs to be 4 byte aligned (although the 64bit spec
+ # specifies 8 byte). When defining n_desc as uint32 it will be
+ # automatically aligned but we need the memmove to copy the
+ # string into it.
+ ctypes.memmove(note.n_desc, n_desc.encode(), len(n_desc))
+
+ self.notes.append(note)
+ self.segments[0].p_filesz += ctypes.sizeof(note)
+ self.segments[0].p_memsz += ctypes.sizeof(note)
+
+ def add_segment(self, p_type, p_paddr, p_size):
+ """Adds a segment to the elf."""
+
+ phdr = get_arch_phdr(self.endianess, self.elfclass)
+ phdr.p_type = p_type
+ phdr.p_paddr = p_paddr
+ phdr.p_filesz = p_size
+ phdr.p_memsz = p_size
+ self.segments.append(phdr)
+ self.ehdr.e_phnum += 1
+
+ def to_file(self, elf_file):
+ """Writes all ELF structures to the the passed file.
+
+ Structure:
+ Ehdr
+ Segment 0:PT_NOTE
+ Segment 1:PT_LOAD
+ Segment N:PT_LOAD
+ Note 0..N
+ Dump contents
+ """
+ elf_file.write(self.ehdr)
+ off = ctypes.sizeof(self.ehdr) + \
+ len(self.segments) * ctypes.sizeof(self.segments[0])
+
+ for phdr in self.segments:
+ phdr.p_offset = off
+ elf_file.write(phdr)
+ off += phdr.p_filesz
+
+ for note in self.notes:
+ elf_file.write(note)
+
+
+def get_arch_note(endianess, len_name, len_desc):
+ """Returns a Note class with the specified endianess."""
+
+ if endianess == ELFDATA2LSB:
+ superclass = ctypes.LittleEndianStructure
+ else:
+ superclass = ctypes.BigEndianStructure
+
+ len_name = len_name + 1
+
+ class Note(superclass):
+ """Represents an ELF note, includes the content."""
+
+ _fields_ = [("n_namesz", ctypes.c_uint32),
+ ("n_descsz", ctypes.c_uint32),
+ ("n_type", ctypes.c_uint32),
+ ("n_name", ctypes.c_char * len_name),
+ ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))]
+ return Note()
+
+
+class Ident(ctypes.Structure):
+ """Represents the ELF ident array in the ehdr structure."""
+
+ _fields_ = [('ei_mag0', ctypes.c_ubyte),
+ ('ei_mag1', ctypes.c_ubyte),
+ ('ei_mag2', ctypes.c_ubyte),
+ ('ei_mag3', ctypes.c_ubyte),
+ ('ei_class', ctypes.c_ubyte),
+ ('ei_data', ctypes.c_ubyte),
+ ('ei_version', ctypes.c_ubyte),
+ ('ei_osabi', ctypes.c_ubyte),
+ ('ei_abiversion', ctypes.c_ubyte),
+ ('ei_pad', ctypes.c_ubyte * 7)]
+
+ def __init__(self, endianess, elfclass):
+ self.ei_mag0 = 0x7F
+ self.ei_mag1 = ord('E')
+ self.ei_mag2 = ord('L')
+ self.ei_mag3 = ord('F')
+ self.ei_class = elfclass
+ self.ei_data = endianess
+ self.ei_version = EV_CURRENT
+
+
+def get_arch_ehdr(endianess, elfclass):
+ """Returns a EHDR64 class with the specified endianess."""
+
+ if endianess == ELFDATA2LSB:
+ superclass = ctypes.LittleEndianStructure
+ else:
+ superclass = ctypes.BigEndianStructure
+
+ class EHDR64(superclass):
+ """Represents the 64 bit ELF header struct."""
+
+ _fields_ = [('e_ident', Ident),
+ ('e_type', ctypes.c_uint16),
+ ('e_machine', ctypes.c_uint16),
+ ('e_version', ctypes.c_uint32),
+ ('e_entry', ctypes.c_uint64),
+ ('e_phoff', ctypes.c_uint64),
+ ('e_shoff', ctypes.c_uint64),
+ ('e_flags', ctypes.c_uint32),
+ ('e_ehsize', ctypes.c_uint16),
+ ('e_phentsize', ctypes.c_uint16),
+ ('e_phnum', ctypes.c_uint16),
+ ('e_shentsize', ctypes.c_uint16),
+ ('e_shnum', ctypes.c_uint16),
+ ('e_shstrndx', ctypes.c_uint16)]
+
+ def __init__(self):
+ super(superclass, self).__init__()
+ self.e_ident = Ident(endianess, elfclass)
+ self.e_type = ET_CORE
+ self.e_version = EV_CURRENT
+ self.e_ehsize = ctypes.sizeof(self)
+ self.e_phoff = ctypes.sizeof(self)
+ self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass))
+ self.e_phnum = 0
+
+
+ class EHDR32(superclass):
+ """Represents the 32 bit ELF header struct."""
+
+ _fields_ = [('e_ident', Ident),
+ ('e_type', ctypes.c_uint16),
+ ('e_machine', ctypes.c_uint16),
+ ('e_version', ctypes.c_uint32),
+ ('e_entry', ctypes.c_uint32),
+ ('e_phoff', ctypes.c_uint32),
+ ('e_shoff', ctypes.c_uint32),
+ ('e_flags', ctypes.c_uint32),
+ ('e_ehsize', ctypes.c_uint16),
+ ('e_phentsize', ctypes.c_uint16),
+ ('e_phnum', ctypes.c_uint16),
+ ('e_shentsize', ctypes.c_uint16),
+ ('e_shnum', ctypes.c_uint16),
+ ('e_shstrndx', ctypes.c_uint16)]
+
+ def __init__(self):
+ super(superclass, self).__init__()
+ self.e_ident = Ident(endianess, elfclass)
+ self.e_type = ET_CORE
+ self.e_version = EV_CURRENT
+ self.e_ehsize = ctypes.sizeof(self)
+ self.e_phoff = ctypes.sizeof(self)
+ self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass))
+ self.e_phnum = 0
+
+ # End get_arch_ehdr
+ if elfclass == ELFCLASS64:
+ return EHDR64()
+ else:
+ return EHDR32()
+
+
+def get_arch_phdr(endianess, elfclass):
+ """Returns a 32 or 64 bit PHDR class with the specified endianess."""
+
+ if endianess == ELFDATA2LSB:
+ superclass = ctypes.LittleEndianStructure
+ else:
+ superclass = ctypes.BigEndianStructure
+
+ class PHDR64(superclass):
+ """Represents the 64 bit ELF program header struct."""
+
+ _fields_ = [('p_type', ctypes.c_uint32),
+ ('p_flags', ctypes.c_uint32),
+ ('p_offset', ctypes.c_uint64),
+ ('p_vaddr', ctypes.c_uint64),
+ ('p_paddr', ctypes.c_uint64),
+ ('p_filesz', ctypes.c_uint64),
+ ('p_memsz', ctypes.c_uint64),
+ ('p_align', ctypes.c_uint64)]
+
+ class PHDR32(superclass):
+ """Represents the 32 bit ELF program header struct."""
+
+ _fields_ = [('p_type', ctypes.c_uint32),
+ ('p_offset', ctypes.c_uint32),
+ ('p_vaddr', ctypes.c_uint32),
+ ('p_paddr', ctypes.c_uint32),
+ ('p_filesz', ctypes.c_uint32),
+ ('p_memsz', ctypes.c_uint32),
+ ('p_flags', ctypes.c_uint32),
+ ('p_align', ctypes.c_uint32)]
+
+ # End get_arch_phdr
+ if elfclass == ELFCLASS64:
+ return PHDR64()
+ else:
+ return PHDR32()
+
+
+def int128_get64(val):
+ """Returns low 64bit part of Int128 struct."""
+
+ assert val["hi"] == 0
+ return val["lo"]
+
+
+def qlist_foreach(head, field_str):
+ """Generator for qlists."""
+
+ var_p = head["lh_first"]
+ while var_p != 0:
+ var = var_p.dereference()
+ var_p = var[field_str]["le_next"]
+ yield var
+
+
+def qemu_get_ram_block(ram_addr):
+ """Returns the RAMBlock struct to which the given address belongs."""
+
+ ram_blocks = gdb.parse_and_eval("ram_list.blocks")
+
+ for block in qlist_foreach(ram_blocks, "next"):
+ if (ram_addr - block["offset"]) < block["used_length"]:
+ return block
+
+ raise gdb.GdbError("Bad ram offset %x" % ram_addr)
+
+
+def qemu_get_ram_ptr(ram_addr):
+ """Returns qemu vaddr for given guest physical address."""
+
+ block = qemu_get_ram_block(ram_addr)
+ return block["host"] + (ram_addr - block["offset"])
+
+
+def memory_region_get_ram_ptr(memory_region):
+ if memory_region["alias"] != 0:
+ return (memory_region_get_ram_ptr(memory_region["alias"].dereference())
+ + memory_region["alias_offset"])
+
+ return qemu_get_ram_ptr(memory_region["ram_addr"] & TARGET_PAGE_MASK)
+
+
+def get_guest_phys_blocks():
+ """Returns a list of ram blocks.
+
+ Each block entry contains:
+ 'target_start': guest block phys start address
+ 'target_end': guest block phys end address
+ 'host_addr': qemu vaddr of the block's start
+ """
+
+ guest_phys_blocks = []
+
+ print("guest RAM blocks:")
+ print("target_start target_end host_addr message "
+ "count")
+ print("---------------- ---------------- ---------------- ------- "
+ "-----")
+
+ current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
+ current_map = current_map_p.dereference()
+
+ # Conversion to int is needed for python 3
+ # compatibility. Otherwise range doesn't cast the value itself and
+ # breaks.
+ for cur in range(int(current_map["nr"])):
+ flat_range = (current_map["ranges"] + cur).dereference()
+ memory_region = flat_range["mr"].dereference()
+
+ # we only care about RAM
+ if not memory_region["ram"]:
+ continue
+
+ section_size = int128_get64(flat_range["addr"]["size"])
+ target_start = int128_get64(flat_range["addr"]["start"])
+ target_end = target_start + section_size
+ host_addr = (memory_region_get_ram_ptr(memory_region)
+ + flat_range["offset_in_region"])
+ predecessor = None
+
+ # find continuity in guest physical address space
+ if len(guest_phys_blocks) > 0:
+ predecessor = guest_phys_blocks[-1]
+ predecessor_size = (predecessor["target_end"] -
+ predecessor["target_start"])
+
+ # the memory API guarantees monotonically increasing
+ # traversal
+ assert predecessor["target_end"] <= target_start
+
+ # we want continuity in both guest-physical and
+ # host-virtual memory
+ if (predecessor["target_end"] < target_start or
+ predecessor["host_addr"] + predecessor_size != host_addr):
+ predecessor = None
+
+ if predecessor is None:
+ # isolated mapping, add it to the list
+ guest_phys_blocks.append({"target_start": target_start,
+ "target_end": target_end,
+ "host_addr": host_addr})
+ message = "added"
+ else:
+ # expand predecessor until @target_end; predecessor's
+ # start doesn't change
+ predecessor["target_end"] = target_end
+ message = "joined"
+
+ print("%016x %016x %016x %-7s %5u" %
+ (target_start, target_end, host_addr.cast(UINTPTR_T),
+ message, len(guest_phys_blocks)))
+
+ return guest_phys_blocks
+
+
# The leading docstring doesn't have idiomatic Python formatting. It is
# printed by gdb's "help" command (the first line is printed in the
# "help data" summary), and it should match how other help texts look in
# gdb.
-
-import struct
-
class DumpGuestMemory(gdb.Command):
"""Extract guest vmcore from qemu process coredump.
-The sole argument is FILE, identifying the target file to write the
-guest vmcore to.
+The two required arguments are FILE and ARCH:
+FILE identifies the target file to write the guest vmcore to.
+ARCH specifies the architecture for which the core will be generated.
This GDB command reimplements the dump-guest-memory QMP command in
python, using the representation of guest memory as captured in the qemu
coredump. The qemu process that has been dumped must have had the
-command line option "-machine dump-guest-core=on".
+command line option "-machine dump-guest-core=on" which is the default.
For simplicity, the "paging", "begin" and "end" parameters of the QMP
command are not supported -- no attempt is made to get the guest's
internal paging structures (ie. paging=false is hard-wired), and guest
memory is always fully dumped.
-Only x86_64 guests are supported.
+Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be,
+ppc64-le guests are supported.
The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are
not written to the vmcore. Preparing these would require context that is
@@ -47,293 +464,66 @@ deliberately called abort(), or it was dumped in response to a signal at
a halfway fortunate point, then its coredump should be in reasonable
shape and this command should mostly work."""
- TARGET_PAGE_SIZE = 0x1000
- TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
-
- # Various ELF constants
- EM_X86_64 = 62 # AMD x86-64 target machine
- ELFDATA2LSB = 1 # little endian
- ELFCLASS64 = 2
- ELFMAG = "\x7FELF"
- EV_CURRENT = 1
- ET_CORE = 4
- PT_LOAD = 1
- PT_NOTE = 4
-
- # Special value for e_phnum. This indicates that the real number of
- # program headers is too large to fit into e_phnum. Instead the real
- # value is in the field sh_info of section 0.
- PN_XNUM = 0xFFFF
-
- # Format strings for packing and header size calculation.
- ELF64_EHDR = ("4s" # e_ident/magic
- "B" # e_ident/class
- "B" # e_ident/data
- "B" # e_ident/version
- "B" # e_ident/osabi
- "8s" # e_ident/pad
- "H" # e_type
- "H" # e_machine
- "I" # e_version
- "Q" # e_entry
- "Q" # e_phoff
- "Q" # e_shoff
- "I" # e_flags
- "H" # e_ehsize
- "H" # e_phentsize
- "H" # e_phnum
- "H" # e_shentsize
- "H" # e_shnum
- "H" # e_shstrndx
- )
- ELF64_PHDR = ("I" # p_type
- "I" # p_flags
- "Q" # p_offset
- "Q" # p_vaddr
- "Q" # p_paddr
- "Q" # p_filesz
- "Q" # p_memsz
- "Q" # p_align
- )
-
def __init__(self):
super(DumpGuestMemory, self).__init__("dump-guest-memory",
gdb.COMMAND_DATA,
gdb.COMPLETE_FILENAME)
- self.uintptr_t = gdb.lookup_type("uintptr_t")
- self.elf64_ehdr_le = struct.Struct("<%s" % self.ELF64_EHDR)
- self.elf64_phdr_le = struct.Struct("<%s" % self.ELF64_PHDR)
-
- def int128_get64(self, val):
- assert (val["hi"] == 0)
- return val["lo"]
-
- def qlist_foreach(self, head, field_str):
- var_p = head["lh_first"]
- while (var_p != 0):
- var = var_p.dereference()
- yield var
- var_p = var[field_str]["le_next"]
-
- def qemu_get_ram_block(self, ram_addr):
- ram_blocks = gdb.parse_and_eval("ram_list.blocks")
- for block in self.qlist_foreach(ram_blocks, "next"):
- if (ram_addr - block["offset"] < block["used_length"]):
- return block
- raise gdb.GdbError("Bad ram offset %x" % ram_addr)
-
- def qemu_get_ram_ptr(self, ram_addr):
- block = self.qemu_get_ram_block(ram_addr)
- return block["host"] + (ram_addr - block["offset"])
-
- def memory_region_get_ram_ptr(self, mr):
- if (mr["alias"] != 0):
- return (self.memory_region_get_ram_ptr(mr["alias"].dereference()) +
- mr["alias_offset"])
- return self.qemu_get_ram_ptr(mr["ram_addr"] & self.TARGET_PAGE_MASK)
-
- def guest_phys_blocks_init(self):
- self.guest_phys_blocks = []
-
- def guest_phys_blocks_append(self):
- print "guest RAM blocks:"
- print ("target_start target_end host_addr message "
- "count")
- print ("---------------- ---------------- ---------------- ------- "
- "-----")
-
- current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
- current_map = current_map_p.dereference()
- for cur in range(current_map["nr"]):
- flat_range = (current_map["ranges"] + cur).dereference()
- mr = flat_range["mr"].dereference()
-
- # we only care about RAM
- if (not mr["ram"]):
- continue
-
- section_size = self.int128_get64(flat_range["addr"]["size"])
- target_start = self.int128_get64(flat_range["addr"]["start"])
- target_end = target_start + section_size
- host_addr = (self.memory_region_get_ram_ptr(mr) +
- flat_range["offset_in_region"])
- predecessor = None
-
- # find continuity in guest physical address space
- if (len(self.guest_phys_blocks) > 0):
- predecessor = self.guest_phys_blocks[-1]
- predecessor_size = (predecessor["target_end"] -
- predecessor["target_start"])
-
- # the memory API guarantees monotonically increasing
- # traversal
- assert (predecessor["target_end"] <= target_start)
-
- # we want continuity in both guest-physical and
- # host-virtual memory
- if (predecessor["target_end"] < target_start or
- predecessor["host_addr"] + predecessor_size != host_addr):
- predecessor = None
-
- if (predecessor is None):
- # isolated mapping, add it to the list
- self.guest_phys_blocks.append({"target_start": target_start,
- "target_end" : target_end,
- "host_addr" : host_addr})
- message = "added"
- else:
- # expand predecessor until @target_end; predecessor's
- # start doesn't change
- predecessor["target_end"] = target_end
- message = "joined"
-
- print ("%016x %016x %016x %-7s %5u" %
- (target_start, target_end, host_addr.cast(self.uintptr_t),
- message, len(self.guest_phys_blocks)))
-
- def cpu_get_dump_info(self):
- # We can't synchronize the registers with KVM post-mortem, and
- # the bits in (first_x86_cpu->env.hflags) seem to be stale; they
- # may not reflect long mode for example. Hence just assume the
- # most common values. This also means that instruction pointer
- # etc. will be bogus in the dump, but at least the RAM contents
- # should be valid.
- self.dump_info = {"d_machine": self.EM_X86_64,
- "d_endian" : self.ELFDATA2LSB,
- "d_class" : self.ELFCLASS64}
-
- def encode_elf64_ehdr_le(self):
- return self.elf64_ehdr_le.pack(
- self.ELFMAG, # e_ident/magic
- self.dump_info["d_class"], # e_ident/class
- self.dump_info["d_endian"], # e_ident/data
- self.EV_CURRENT, # e_ident/version
- 0, # e_ident/osabi
- "", # e_ident/pad
- self.ET_CORE, # e_type
- self.dump_info["d_machine"], # e_machine
- self.EV_CURRENT, # e_version
- 0, # e_entry
- self.elf64_ehdr_le.size, # e_phoff
- 0, # e_shoff
- 0, # e_flags
- self.elf64_ehdr_le.size, # e_ehsize
- self.elf64_phdr_le.size, # e_phentsize
- self.phdr_num, # e_phnum
- 0, # e_shentsize
- 0, # e_shnum
- 0 # e_shstrndx
- )
-
- def encode_elf64_note_le(self):
- return self.elf64_phdr_le.pack(self.PT_NOTE, # p_type
- 0, # p_flags
- (self.memory_offset -
- len(self.note)), # p_offset
- 0, # p_vaddr
- 0, # p_paddr
- len(self.note), # p_filesz
- len(self.note), # p_memsz
- 0 # p_align
- )
-
- def encode_elf64_load_le(self, offset, start_hwaddr, range_size):
- return self.elf64_phdr_le.pack(self.PT_LOAD, # p_type
- 0, # p_flags
- offset, # p_offset
- 0, # p_vaddr
- start_hwaddr, # p_paddr
- range_size, # p_filesz
- range_size, # p_memsz
- 0 # p_align
- )
-
- def note_init(self, name, desc, type):
- # name must include a trailing NUL
- namesz = (len(name) + 1 + 3) / 4 * 4
- descsz = (len(desc) + 3) / 4 * 4
- fmt = ("<" # little endian
- "I" # n_namesz
- "I" # n_descsz
- "I" # n_type
- "%us" # name
- "%us" # desc
- % (namesz, descsz))
- self.note = struct.pack(fmt,
- len(name) + 1, len(desc), type, name, desc)
-
- def dump_init(self):
- self.guest_phys_blocks_init()
- self.guest_phys_blocks_append()
- self.cpu_get_dump_info()
- # we have no way to retrieve the VCPU status from KVM
- # post-mortem
- self.note_init("NONE", "EMPTY", 0)
-
- # Account for PT_NOTE.
- self.phdr_num = 1
-
- # We should never reach PN_XNUM for paging=false dumps: there's
- # just a handful of discontiguous ranges after merging.
- self.phdr_num += len(self.guest_phys_blocks)
- assert (self.phdr_num < self.PN_XNUM)
-
- # Calculate the ELF file offset where the memory dump commences:
- #
- # ELF header
- # PT_NOTE
- # PT_LOAD: 1
- # PT_LOAD: 2
- # ...
- # PT_LOAD: len(self.guest_phys_blocks)
- # ELF note
- # memory dump
- self.memory_offset = (self.elf64_ehdr_le.size +
- self.elf64_phdr_le.size * self.phdr_num +
- len(self.note))
-
- def dump_begin(self, vmcore):
- vmcore.write(self.encode_elf64_ehdr_le())
- vmcore.write(self.encode_elf64_note_le())
- running = self.memory_offset
+ self.elf = None
+ self.guest_phys_blocks = None
+
+ def dump_init(self, vmcore):
+ """Prepares and writes ELF structures to core file."""
+
+ # Needed to make crash happy, data for more useful notes is
+ # not available in a qemu core.
+ self.elf.add_note("NONE", "EMPTY", 0)
+
+ # We should never reach PN_XNUM for paging=false dumps,
+ # there's just a handful of discontiguous ranges after
+ # merging.
+ # The constant is needed to account for the PT_NOTE segment.
+ phdr_num = len(self.guest_phys_blocks) + 1
+ assert phdr_num < PN_XNUM
+
for block in self.guest_phys_blocks:
- range_size = block["target_end"] - block["target_start"]
- vmcore.write(self.encode_elf64_load_le(running,
- block["target_start"],
- range_size))
- running += range_size
- vmcore.write(self.note)
+ block_size = block["target_end"] - block["target_start"]
+ self.elf.add_segment(PT_LOAD, block["target_start"], block_size)
+
+ self.elf.to_file(vmcore)
def dump_iterate(self, vmcore):
+ """Writes guest core to file."""
+
qemu_core = gdb.inferiors()[0]
for block in self.guest_phys_blocks:
- cur = block["host_addr"]
+ cur = block["host_addr"]
left = block["target_end"] - block["target_start"]
- print ("dumping range at %016x for length %016x" %
- (cur.cast(self.uintptr_t), left))
- while (left > 0):
- chunk_size = min(self.TARGET_PAGE_SIZE, left)
+ print("dumping range at %016x for length %016x" %
+ (cur.cast(UINTPTR_T), left))
+
+ while left > 0:
+ chunk_size = min(TARGET_PAGE_SIZE, left)
chunk = qemu_core.read_memory(cur, chunk_size)
vmcore.write(chunk)
- cur += chunk_size
+ cur += chunk_size
left -= chunk_size
- def create_vmcore(self, filename):
- vmcore = open(filename, "wb")
- self.dump_begin(vmcore)
- self.dump_iterate(vmcore)
- vmcore.close()
-
def invoke(self, args, from_tty):
+ """Handles command invocation from gdb."""
+
# Unwittingly pressing the Enter key after the command should
# not dump the same multi-gig coredump to the same file.
self.dont_repeat()
argv = gdb.string_to_argv(args)
- if (len(argv) != 1):
- raise gdb.GdbError("usage: dump-guest-memory FILE")
+ if len(argv) != 2:
+ raise gdb.GdbError("usage: dump-guest-memory FILE ARCH")
+
+ self.elf = ELF(argv[1])
+ self.guest_phys_blocks = get_guest_phys_blocks()
- self.dump_init()
- self.create_vmcore(argv[0])
+ with open(argv[0], "wb") as vmcore:
+ self.dump_init(vmcore)
+ self.dump_iterate(vmcore)
DumpGuestMemory()
diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat
index 7e5d25612b..d43e8f3e85 100755
--- a/scripts/kvm/kvm_stat
+++ b/scripts/kvm/kvm_stat
@@ -12,285 +12,311 @@
# the COPYING file in the top-level directory.
import curses
-import sys, os, time, optparse, ctypes
-from ctypes import *
-
-class DebugfsProvider(object):
- def __init__(self):
- self.base = '/sys/kernel/debug/kvm'
- self._fields = os.listdir(self.base)
- def fields(self):
- return self._fields
- def select(self, fields):
- self._fields = fields
- def read(self):
- def val(key):
- return int(file(self.base + '/' + key).read())
- return dict([(key, val(key)) for key in self._fields])
-
-vmx_exit_reasons = {
- 0: 'EXCEPTION_NMI',
- 1: 'EXTERNAL_INTERRUPT',
- 2: 'TRIPLE_FAULT',
- 7: 'PENDING_INTERRUPT',
- 8: 'NMI_WINDOW',
- 9: 'TASK_SWITCH',
- 10: 'CPUID',
- 12: 'HLT',
- 14: 'INVLPG',
- 15: 'RDPMC',
- 16: 'RDTSC',
- 18: 'VMCALL',
- 19: 'VMCLEAR',
- 20: 'VMLAUNCH',
- 21: 'VMPTRLD',
- 22: 'VMPTRST',
- 23: 'VMREAD',
- 24: 'VMRESUME',
- 25: 'VMWRITE',
- 26: 'VMOFF',
- 27: 'VMON',
- 28: 'CR_ACCESS',
- 29: 'DR_ACCESS',
- 30: 'IO_INSTRUCTION',
- 31: 'MSR_READ',
- 32: 'MSR_WRITE',
- 33: 'INVALID_STATE',
- 36: 'MWAIT_INSTRUCTION',
- 39: 'MONITOR_INSTRUCTION',
- 40: 'PAUSE_INSTRUCTION',
- 41: 'MCE_DURING_VMENTRY',
- 43: 'TPR_BELOW_THRESHOLD',
- 44: 'APIC_ACCESS',
- 48: 'EPT_VIOLATION',
- 49: 'EPT_MISCONFIG',
- 54: 'WBINVD',
- 55: 'XSETBV',
- 56: 'APIC_WRITE',
- 58: 'INVPCID',
+import sys
+import os
+import time
+import optparse
+import ctypes
+import fcntl
+import resource
+import struct
+import re
+from collections import defaultdict
+
+VMX_EXIT_REASONS = {
+ 'EXCEPTION_NMI': 0,
+ 'EXTERNAL_INTERRUPT': 1,
+ 'TRIPLE_FAULT': 2,
+ 'PENDING_INTERRUPT': 7,
+ 'NMI_WINDOW': 8,
+ 'TASK_SWITCH': 9,
+ 'CPUID': 10,
+ 'HLT': 12,
+ 'INVLPG': 14,
+ 'RDPMC': 15,
+ 'RDTSC': 16,
+ 'VMCALL': 18,
+ 'VMCLEAR': 19,
+ 'VMLAUNCH': 20,
+ 'VMPTRLD': 21,
+ 'VMPTRST': 22,
+ 'VMREAD': 23,
+ 'VMRESUME': 24,
+ 'VMWRITE': 25,
+ 'VMOFF': 26,
+ 'VMON': 27,
+ 'CR_ACCESS': 28,
+ 'DR_ACCESS': 29,
+ 'IO_INSTRUCTION': 30,
+ 'MSR_READ': 31,
+ 'MSR_WRITE': 32,
+ 'INVALID_STATE': 33,
+ 'MWAIT_INSTRUCTION': 36,
+ 'MONITOR_INSTRUCTION': 39,
+ 'PAUSE_INSTRUCTION': 40,
+ 'MCE_DURING_VMENTRY': 41,
+ 'TPR_BELOW_THRESHOLD': 43,
+ 'APIC_ACCESS': 44,
+ 'EPT_VIOLATION': 48,
+ 'EPT_MISCONFIG': 49,
+ 'WBINVD': 54,
+ 'XSETBV': 55,
+ 'APIC_WRITE': 56,
+ 'INVPCID': 58,
}
-svm_exit_reasons = {
- 0x000: 'READ_CR0',
- 0x003: 'READ_CR3',
- 0x004: 'READ_CR4',
- 0x008: 'READ_CR8',
- 0x010: 'WRITE_CR0',
- 0x013: 'WRITE_CR3',
- 0x014: 'WRITE_CR4',
- 0x018: 'WRITE_CR8',
- 0x020: 'READ_DR0',
- 0x021: 'READ_DR1',
- 0x022: 'READ_DR2',
- 0x023: 'READ_DR3',
- 0x024: 'READ_DR4',
- 0x025: 'READ_DR5',
- 0x026: 'READ_DR6',
- 0x027: 'READ_DR7',
- 0x030: 'WRITE_DR0',
- 0x031: 'WRITE_DR1',
- 0x032: 'WRITE_DR2',
- 0x033: 'WRITE_DR3',
- 0x034: 'WRITE_DR4',
- 0x035: 'WRITE_DR5',
- 0x036: 'WRITE_DR6',
- 0x037: 'WRITE_DR7',
- 0x040: 'EXCP_BASE',
- 0x060: 'INTR',
- 0x061: 'NMI',
- 0x062: 'SMI',
- 0x063: 'INIT',
- 0x064: 'VINTR',
- 0x065: 'CR0_SEL_WRITE',
- 0x066: 'IDTR_READ',
- 0x067: 'GDTR_READ',
- 0x068: 'LDTR_READ',
- 0x069: 'TR_READ',
- 0x06a: 'IDTR_WRITE',
- 0x06b: 'GDTR_WRITE',
- 0x06c: 'LDTR_WRITE',
- 0x06d: 'TR_WRITE',
- 0x06e: 'RDTSC',
- 0x06f: 'RDPMC',
- 0x070: 'PUSHF',
- 0x071: 'POPF',
- 0x072: 'CPUID',
- 0x073: 'RSM',
- 0x074: 'IRET',
- 0x075: 'SWINT',
- 0x076: 'INVD',
- 0x077: 'PAUSE',
- 0x078: 'HLT',
- 0x079: 'INVLPG',
- 0x07a: 'INVLPGA',
- 0x07b: 'IOIO',
- 0x07c: 'MSR',
- 0x07d: 'TASK_SWITCH',
- 0x07e: 'FERR_FREEZE',
- 0x07f: 'SHUTDOWN',
- 0x080: 'VMRUN',
- 0x081: 'VMMCALL',
- 0x082: 'VMLOAD',
- 0x083: 'VMSAVE',
- 0x084: 'STGI',
- 0x085: 'CLGI',
- 0x086: 'SKINIT',
- 0x087: 'RDTSCP',
- 0x088: 'ICEBP',
- 0x089: 'WBINVD',
- 0x08a: 'MONITOR',
- 0x08b: 'MWAIT',
- 0x08c: 'MWAIT_COND',
- 0x08d: 'XSETBV',
- 0x400: 'NPF',
+SVM_EXIT_REASONS = {
+ 'READ_CR0': 0x000,
+ 'READ_CR3': 0x003,
+ 'READ_CR4': 0x004,
+ 'READ_CR8': 0x008,
+ 'WRITE_CR0': 0x010,
+ 'WRITE_CR3': 0x013,
+ 'WRITE_CR4': 0x014,
+ 'WRITE_CR8': 0x018,
+ 'READ_DR0': 0x020,
+ 'READ_DR1': 0x021,
+ 'READ_DR2': 0x022,
+ 'READ_DR3': 0x023,
+ 'READ_DR4': 0x024,
+ 'READ_DR5': 0x025,
+ 'READ_DR6': 0x026,
+ 'READ_DR7': 0x027,
+ 'WRITE_DR0': 0x030,
+ 'WRITE_DR1': 0x031,
+ 'WRITE_DR2': 0x032,
+ 'WRITE_DR3': 0x033,
+ 'WRITE_DR4': 0x034,
+ 'WRITE_DR5': 0x035,
+ 'WRITE_DR6': 0x036,
+ 'WRITE_DR7': 0x037,
+ 'EXCP_BASE': 0x040,
+ 'INTR': 0x060,
+ 'NMI': 0x061,
+ 'SMI': 0x062,
+ 'INIT': 0x063,
+ 'VINTR': 0x064,
+ 'CR0_SEL_WRITE': 0x065,
+ 'IDTR_READ': 0x066,
+ 'GDTR_READ': 0x067,
+ 'LDTR_READ': 0x068,
+ 'TR_READ': 0x069,
+ 'IDTR_WRITE': 0x06a,
+ 'GDTR_WRITE': 0x06b,
+ 'LDTR_WRITE': 0x06c,
+ 'TR_WRITE': 0x06d,
+ 'RDTSC': 0x06e,
+ 'RDPMC': 0x06f,
+ 'PUSHF': 0x070,
+ 'POPF': 0x071,
+ 'CPUID': 0x072,
+ 'RSM': 0x073,
+ 'IRET': 0x074,
+ 'SWINT': 0x075,
+ 'INVD': 0x076,
+ 'PAUSE': 0x077,
+ 'HLT': 0x078,
+ 'INVLPG': 0x079,
+ 'INVLPGA': 0x07a,
+ 'IOIO': 0x07b,
+ 'MSR': 0x07c,
+ 'TASK_SWITCH': 0x07d,
+ 'FERR_FREEZE': 0x07e,
+ 'SHUTDOWN': 0x07f,
+ 'VMRUN': 0x080,
+ 'VMMCALL': 0x081,
+ 'VMLOAD': 0x082,
+ 'VMSAVE': 0x083,
+ 'STGI': 0x084,
+ 'CLGI': 0x085,
+ 'SKINIT': 0x086,
+ 'RDTSCP': 0x087,
+ 'ICEBP': 0x088,
+ 'WBINVD': 0x089,
+ 'MONITOR': 0x08a,
+ 'MWAIT': 0x08b,
+ 'MWAIT_COND': 0x08c,
+ 'XSETBV': 0x08d,
+ 'NPF': 0x400,
}
# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
-aarch64_exit_reasons = {
- 0x00: 'UNKNOWN',
- 0x01: 'WFI',
- 0x03: 'CP15_32',
- 0x04: 'CP15_64',
- 0x05: 'CP14_MR',
- 0x06: 'CP14_LS',
- 0x07: 'FP_ASIMD',
- 0x08: 'CP10_ID',
- 0x0C: 'CP14_64',
- 0x0E: 'ILL_ISS',
- 0x11: 'SVC32',
- 0x12: 'HVC32',
- 0x13: 'SMC32',
- 0x15: 'SVC64',
- 0x16: 'HVC64',
- 0x17: 'SMC64',
- 0x18: 'SYS64',
- 0x20: 'IABT',
- 0x21: 'IABT_HYP',
- 0x22: 'PC_ALIGN',
- 0x24: 'DABT',
- 0x25: 'DABT_HYP',
- 0x26: 'SP_ALIGN',
- 0x28: 'FP_EXC32',
- 0x2C: 'FP_EXC64',
- 0x2F: 'SERROR',
- 0x30: 'BREAKPT',
- 0x31: 'BREAKPT_HYP',
- 0x32: 'SOFTSTP',
- 0x33: 'SOFTSTP_HYP',
- 0x34: 'WATCHPT',
- 0x35: 'WATCHPT_HYP',
- 0x38: 'BKPT32',
- 0x3A: 'VECTOR32',
- 0x3C: 'BRK64',
+AARCH64_EXIT_REASONS = {
+ 'UNKNOWN': 0x00,
+ 'WFI': 0x01,
+ 'CP15_32': 0x03,
+ 'CP15_64': 0x04,
+ 'CP14_MR': 0x05,
+ 'CP14_LS': 0x06,
+ 'FP_ASIMD': 0x07,
+ 'CP10_ID': 0x08,
+ 'CP14_64': 0x0C,
+ 'ILL_ISS': 0x0E,
+ 'SVC32': 0x11,
+ 'HVC32': 0x12,
+ 'SMC32': 0x13,
+ 'SVC64': 0x15,
+ 'HVC64': 0x16,
+ 'SMC64': 0x17,
+ 'SYS64': 0x18,
+ 'IABT': 0x20,
+ 'IABT_HYP': 0x21,
+ 'PC_ALIGN': 0x22,
+ 'DABT': 0x24,
+ 'DABT_HYP': 0x25,
+ 'SP_ALIGN': 0x26,
+ 'FP_EXC32': 0x28,
+ 'FP_EXC64': 0x2C,
+ 'SERROR': 0x2F,
+ 'BREAKPT': 0x30,
+ 'BREAKPT_HYP': 0x31,
+ 'SOFTSTP': 0x32,
+ 'SOFTSTP_HYP': 0x33,
+ 'WATCHPT': 0x34,
+ 'WATCHPT_HYP': 0x35,
+ 'BKPT32': 0x38,
+ 'VECTOR32': 0x3A,
+ 'BRK64': 0x3C,
}
# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
-userspace_exit_reasons = {
- 0: 'UNKNOWN',
- 1: 'EXCEPTION',
- 2: 'IO',
- 3: 'HYPERCALL',
- 4: 'DEBUG',
- 5: 'HLT',
- 6: 'MMIO',
- 7: 'IRQ_WINDOW_OPEN',
- 8: 'SHUTDOWN',
- 9: 'FAIL_ENTRY',
- 10: 'INTR',
- 11: 'SET_TPR',
- 12: 'TPR_ACCESS',
- 13: 'S390_SIEIC',
- 14: 'S390_RESET',
- 15: 'DCR',
- 16: 'NMI',
- 17: 'INTERNAL_ERROR',
- 18: 'OSI',
- 19: 'PAPR_HCALL',
- 20: 'S390_UCONTROL',
- 21: 'WATCHDOG',
- 22: 'S390_TSCH',
- 23: 'EPR',
- 24: 'SYSTEM_EVENT',
+USERSPACE_EXIT_REASONS = {
+ 'UNKNOWN': 0,
+ 'EXCEPTION': 1,
+ 'IO': 2,
+ 'HYPERCALL': 3,
+ 'DEBUG': 4,
+ 'HLT': 5,
+ 'MMIO': 6,
+ 'IRQ_WINDOW_OPEN': 7,
+ 'SHUTDOWN': 8,
+ 'FAIL_ENTRY': 9,
+ 'INTR': 10,
+ 'SET_TPR': 11,
+ 'TPR_ACCESS': 12,
+ 'S390_SIEIC': 13,
+ 'S390_RESET': 14,
+ 'DCR': 15,
+ 'NMI': 16,
+ 'INTERNAL_ERROR': 17,
+ 'OSI': 18,
+ 'PAPR_HCALL': 19,
+ 'S390_UCONTROL': 20,
+ 'WATCHDOG': 21,
+ 'S390_TSCH': 22,
+ 'EPR': 23,
+ 'SYSTEM_EVENT': 24,
}
-x86_exit_reasons = {
- 'vmx': vmx_exit_reasons,
- 'svm': svm_exit_reasons,
+IOCTL_NUMBERS = {
+ 'SET_FILTER': 0x40082406,
+ 'ENABLE': 0x00002400,
+ 'DISABLE': 0x00002401,
+ 'RESET': 0x00002403,
}
-sc_perf_evt_open = None
-exit_reasons = None
+class Arch(object):
+ """Class that encapsulates global architecture specific data like
+ syscall and ioctl numbers.
+
+ """
+ @staticmethod
+ def get_arch():
+ machine = os.uname()[4]
+
+ if machine.startswith('ppc'):
+ return ArchPPC()
+ elif machine.startswith('aarch64'):
+ return ArchA64()
+ elif machine.startswith('s390'):
+ return ArchS390()
+ else:
+ # X86_64
+ for line in open('/proc/cpuinfo'):
+ if not line.startswith('flags'):
+ continue
+
+ flags = line.split()
+ if 'vmx' in flags:
+ return ArchX86(VMX_EXIT_REASONS)
+ if 'svm' in flags:
+ return ArchX86(SVM_EXIT_REASONS)
+ return
+
+class ArchX86(Arch):
+ def __init__(self, exit_reasons):
+ self.sc_perf_evt_open = 298
+ self.ioctl_numbers = IOCTL_NUMBERS
+ self.exit_reasons = exit_reasons
+
+class ArchPPC(Arch):
+ def __init__(self):
+ self.sc_perf_evt_open = 319
+ self.ioctl_numbers = IOCTL_NUMBERS
+ self.ioctl_numbers['ENABLE'] = 0x20002400
+ self.ioctl_numbers['DISABLE'] = 0x20002401
-ioctl_numbers = {
- 'SET_FILTER' : 0x40082406,
- 'ENABLE' : 0x00002400,
- 'DISABLE' : 0x00002401,
- 'RESET' : 0x00002403,
-}
+ # PPC comes in 32 and 64 bit and some generated ioctl
+ # numbers depend on the wordsize.
+ char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
+ self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
+
+class ArchA64(Arch):
+ def __init__(self):
+ self.sc_perf_evt_open = 241
+ self.ioctl_numbers = IOCTL_NUMBERS
+ self.exit_reasons = AARCH64_EXIT_REASONS
+
+class ArchS390(Arch):
+ def __init__(self):
+ self.sc_perf_evt_open = 331
+ self.ioctl_numbers = IOCTL_NUMBERS
+ self.exit_reasons = None
+
+ARCH = Arch.get_arch()
+
+
+def walkdir(path):
+ """Returns os.walk() data for specified directory.
+
+ As it is only a wrapper it returns the same 3-tuple of (dirpath,
+ dirnames, filenames).
+ """
+ return next(os.walk(path))
+
+
+def parse_int_list(list_string):
+ """Returns an int list from a string of comma separated integers and
+ integer ranges."""
+ integers = []
+ members = list_string.split(',')
-def x86_init(flag):
- globals().update({
- 'sc_perf_evt_open' : 298,
- 'exit_reasons' : x86_exit_reasons[flag],
- })
-
-def s390_init():
- globals().update({
- 'sc_perf_evt_open' : 331
- })
-
-def ppc_init():
- globals().update({
- 'sc_perf_evt_open' : 319,
- 'ioctl_numbers' : {
- 'SET_FILTER' : 0x80002406 | (ctypes.sizeof(ctypes.c_char_p) << 16),
- 'ENABLE' : 0x20002400,
- 'DISABLE' : 0x20002401,
- }
- })
-
-def aarch64_init():
- globals().update({
- 'sc_perf_evt_open' : 241,
- 'exit_reasons' : aarch64_exit_reasons,
- })
-
-def detect_platform():
- if os.uname()[4].startswith('ppc'):
- ppc_init()
- return
- elif os.uname()[4].startswith('aarch64'):
- aarch64_init()
- return
-
- for line in file('/proc/cpuinfo').readlines():
- if line.startswith('flags'):
- for flag in line.split():
- if flag in x86_exit_reasons:
- x86_init(flag)
- return
- elif line.startswith('vendor_id'):
- for flag in line.split():
- if flag == 'IBM/S390':
- s390_init()
- return
-
-detect_platform()
-
-def invert(d):
- return dict((x[1], x[0]) for x in d.iteritems())
-
-filters = {}
-filters['kvm_userspace_exit'] = ('reason', invert(userspace_exit_reasons))
-if exit_reasons:
- filters['kvm_exit'] = ('exit_reason', invert(exit_reasons))
-
-import struct, array
-
-libc = ctypes.CDLL('libc.so.6')
+ for member in members:
+ if '-' not in member:
+ integers.append(int(member))
+ else:
+ int_range = member.split('-')
+ integers.extend(range(int(int_range[0]),
+ int(int_range[1]) + 1))
+
+ return integers
+
+
+def get_online_cpus():
+ with open('/sys/devices/system/cpu/online') as cpu_list:
+ cpu_string = cpu_list.readline()
+ return parse_int_list(cpu_string)
+
+
+def get_filters():
+ filters = {}
+ filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
+ if ARCH.exit_reasons:
+ filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
+ return filters
+
+libc = ctypes.CDLL('libc.so.6', use_errno=True)
syscall = libc.syscall
-get_errno = libc.__errno_location
-get_errno.restype = POINTER(c_int)
class perf_event_attr(ctypes.Structure):
_fields_ = [('type', ctypes.c_uint32),
@@ -305,262 +331,350 @@ class perf_event_attr(ctypes.Structure):
('bp_addr', ctypes.c_uint64),
('bp_len', ctypes.c_uint64),
]
-def _perf_event_open(attr, pid, cpu, group_fd, flags):
- return syscall(sc_perf_evt_open, ctypes.pointer(attr), ctypes.c_int(pid),
- ctypes.c_int(cpu), ctypes.c_int(group_fd),
- ctypes.c_long(flags))
-
-PERF_TYPE_HARDWARE = 0
-PERF_TYPE_SOFTWARE = 1
-PERF_TYPE_TRACEPOINT = 2
-PERF_TYPE_HW_CACHE = 3
-PERF_TYPE_RAW = 4
-PERF_TYPE_BREAKPOINT = 5
-
-PERF_SAMPLE_IP = 1 << 0
-PERF_SAMPLE_TID = 1 << 1
-PERF_SAMPLE_TIME = 1 << 2
-PERF_SAMPLE_ADDR = 1 << 3
-PERF_SAMPLE_READ = 1 << 4
-PERF_SAMPLE_CALLCHAIN = 1 << 5
-PERF_SAMPLE_ID = 1 << 6
-PERF_SAMPLE_CPU = 1 << 7
-PERF_SAMPLE_PERIOD = 1 << 8
-PERF_SAMPLE_STREAM_ID = 1 << 9
-PERF_SAMPLE_RAW = 1 << 10
-
-PERF_FORMAT_TOTAL_TIME_ENABLED = 1 << 0
-PERF_FORMAT_TOTAL_TIME_RUNNING = 1 << 1
-PERF_FORMAT_ID = 1 << 2
-PERF_FORMAT_GROUP = 1 << 3
-import re
+ def __init__(self):
+ super(self.__class__, self).__init__()
+ self.type = PERF_TYPE_TRACEPOINT
+ self.size = ctypes.sizeof(self)
+ self.read_format = PERF_FORMAT_GROUP
+
+def perf_event_open(attr, pid, cpu, group_fd, flags):
+ return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
+ ctypes.c_int(pid), ctypes.c_int(cpu),
+ ctypes.c_int(group_fd), ctypes.c_long(flags))
-sys_tracing = '/sys/kernel/debug/tracing'
+PERF_TYPE_TRACEPOINT = 2
+PERF_FORMAT_GROUP = 1 << 3
+
+PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
+PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
class Group(object):
- def __init__(self, cpu):
+ def __init__(self):
self.events = []
- self.group_leader = None
- self.cpu = cpu
- def add_event(self, name, event_set, tracepoint, filter = None):
- self.events.append(Event(group = self,
- name = name, event_set = event_set,
- tracepoint = tracepoint, filter = filter))
- if len(self.events) == 1:
- self.file = os.fdopen(self.events[0].fd)
+
+ def add_event(self, event):
+ self.events.append(event)
+
def read(self):
- bytes = 8 * (1 + len(self.events))
- fmt = 'xxxxxxxx' + 'q' * len(self.events)
+ length = 8 * (1 + len(self.events))
+ read_format = 'xxxxxxxx' + 'Q' * len(self.events)
return dict(zip([event.name for event in self.events],
- struct.unpack(fmt, self.file.read(bytes))))
+ struct.unpack(read_format,
+ os.read(self.events[0].fd, length))))
class Event(object):
- def __init__(self, group, name, event_set, tracepoint, filter = None):
+ def __init__(self, name, group, trace_cpu, trace_point, trace_filter,
+ trace_set='kvm'):
self.name = name
- attr = perf_event_attr()
- attr.type = PERF_TYPE_TRACEPOINT
- attr.size = ctypes.sizeof(attr)
- id_path = os.path.join(sys_tracing, 'events', event_set,
- tracepoint, 'id')
- id = int(file(id_path).read())
- attr.config = id
- attr.sample_type = (PERF_SAMPLE_RAW
- | PERF_SAMPLE_TIME
- | PERF_SAMPLE_CPU)
- attr.sample_period = 1
- attr.read_format = PERF_FORMAT_GROUP
+ self.fd = None
+ self.setup_event(group, trace_cpu, trace_point, trace_filter,
+ trace_set)
+
+ def setup_event_attribute(self, trace_set, trace_point):
+ id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
+ trace_point, 'id')
+
+ event_attr = perf_event_attr()
+ event_attr.config = int(open(id_path).read())
+ return event_attr
+
+ def setup_event(self, group, trace_cpu, trace_point, trace_filter,
+ trace_set):
+ event_attr = self.setup_event_attribute(trace_set, trace_point)
+
group_leader = -1
if group.events:
group_leader = group.events[0].fd
- fd = _perf_event_open(attr, -1, group.cpu, group_leader, 0)
+
+ fd = perf_event_open(event_attr, -1, trace_cpu,
+ group_leader, 0)
if fd == -1:
- err = get_errno()[0]
- raise Exception('perf_event_open failed, errno = ' + err.__str__())
- if filter:
- import fcntl
- fcntl.ioctl(fd, ioctl_numbers['SET_FILTER'], filter)
+ err = ctypes.get_errno()
+ raise OSError(err, os.strerror(err),
+ 'while calling sys_perf_event_open().')
+
+ if trace_filter:
+ fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
+ trace_filter)
+
self.fd = fd
+
def enable(self):
- import fcntl
- fcntl.ioctl(self.fd, ioctl_numbers['ENABLE'], 0)
+ fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
+
def disable(self):
- import fcntl
- fcntl.ioctl(self.fd, ioctl_numbers['DISABLE'], 0)
+ fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
+
def reset(self):
- import fcntl
- fcntl.ioctl(self.fd, ioctl_numbers['RESET'], 0)
+ fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
class TracepointProvider(object):
def __init__(self):
- path = os.path.join(sys_tracing, 'events', 'kvm')
- fields = [f
- for f in os.listdir(path)
- if os.path.isdir(os.path.join(path, f))]
+ self.group_leaders = []
+ self.filters = get_filters()
+ self._fields = self.get_available_fields()
+ self.setup_traces()
+ self.fields = self._fields
+
+ def get_available_fields(self):
+ path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
+ fields = walkdir(path)[1]
extra = []
- for f in fields:
- if f in filters:
- subfield, values = filters[f]
- for name, number in values.iteritems():
- extra.append(f + '(' + name + ')')
+ for field in fields:
+ if field in self.filters:
+ filter_name_, filter_dicts = self.filters[field]
+ for name in filter_dicts:
+ extra.append(field + '(' + name + ')')
fields += extra
- self._setup(fields)
- self.select(fields)
- def fields(self):
- return self._fields
+ return fields
+
+ def setup_traces(self):
+ cpus = get_online_cpus()
+
+ # The constant is needed as a buffer for python libs, std
+ # streams and other files that the script opens.
+ newlim = len(cpus) * len(self._fields) + 50
+ try:
+ softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
+
+ if hardlim < newlim:
+ # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
+ resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
+ else:
+ # Raising the soft limit is sufficient.
+ resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
+
+ except ValueError:
+ sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
- def _online_cpus(self):
- l = []
- pattern = r'cpu([0-9]+)'
- basedir = '/sys/devices/system/cpu'
- for entry in os.listdir(basedir):
- match = re.match(pattern, entry)
- if not match:
- continue
- path = os.path.join(basedir, entry, 'online')
- if os.path.exists(path) and open(path).read().strip() != '1':
- continue
- l.append(int(match.group(1)))
- return l
-
- def _setup(self, _fields):
- self._fields = _fields
- cpus = self._online_cpus()
- import resource
- nfiles = len(cpus) * 1000
- resource.setrlimit(resource.RLIMIT_NOFILE, (nfiles, nfiles))
- events = []
- self.group_leaders = []
for cpu in cpus:
- group = Group(cpu)
- for name in _fields:
+ group = Group()
+ for name in self._fields:
tracepoint = name
- filter = None
- m = re.match(r'(.*)\((.*)\)', name)
- if m:
- tracepoint, sub = m.groups()
- filter = '%s==%d\0' % (filters[tracepoint][0],
- filters[tracepoint][1][sub])
- event = group.add_event(name, event_set = 'kvm',
- tracepoint = tracepoint,
- filter = filter)
+ tracefilter = None
+ match = re.match(r'(.*)\((.*)\)', name)
+ if match:
+ tracepoint, sub = match.groups()
+ tracefilter = ('%s==%d\0' %
+ (self.filters[tracepoint][0],
+ self.filters[tracepoint][1][sub]))
+
+ group.add_event(Event(name=name,
+ group=group,
+ trace_cpu=cpu,
+ trace_point=tracepoint,
+ trace_filter=tracefilter))
self.group_leaders.append(group)
- def select(self, fields):
+
+ def available_fields(self):
+ return self.get_available_fields()
+
+ @property
+ def fields(self):
+ return self._fields
+
+ @fields.setter
+ def fields(self, fields):
+ self._fields = fields
for group in self.group_leaders:
- for event in group.events:
+ for index, event in enumerate(group.events):
if event.name in fields:
event.reset()
event.enable()
else:
- event.disable()
+ # Do not disable the group leader.
+ # It would disable all of its events.
+ if index != 0:
+ event.disable()
+
def read(self):
- from collections import defaultdict
ret = defaultdict(int)
for group in self.group_leaders:
for name, val in group.read().iteritems():
- ret[name] += val
+ if name in self._fields:
+ ret[name] += val
return ret
-class Stats:
- def __init__(self, providers, fields = None):
+class DebugfsProvider(object):
+ def __init__(self):
+ self._fields = self.get_available_fields()
+
+ def get_available_fields(self):
+ return walkdir(PATH_DEBUGFS_KVM)[2]
+
+ @property
+ def fields(self):
+ return self._fields
+
+ @fields.setter
+ def fields(self, fields):
+ self._fields = fields
+
+ def read(self):
+ def val(key):
+ return int(file(PATH_DEBUGFS_KVM + '/' + key).read())
+ return dict([(key, val(key)) for key in self._fields])
+
+class Stats(object):
+ def __init__(self, providers, fields=None):
self.providers = providers
- self.fields_filter = fields
- self._update()
- def _update(self):
+ self._fields_filter = fields
+ self.values = {}
+ self.update_provider_filters()
+
+ def update_provider_filters(self):
def wanted(key):
- import re
- if not self.fields_filter:
+ if not self._fields_filter:
return True
- return re.match(self.fields_filter, key) is not None
- self.values = dict()
- for d in providers:
- provider_fields = [key for key in d.fields() if wanted(key)]
- for key in provider_fields:
- self.values[key] = None
- d.select(provider_fields)
- def set_fields_filter(self, fields_filter):
- self.fields_filter = fields_filter
- self._update()
+ return re.match(self._fields_filter, key) is not None
+
+ # As we reset the counters when updating the fields we can
+ # also clear the cache of old values.
+ self.values = {}
+ for provider in self.providers:
+ provider_fields = [key for key in provider.get_available_fields()
+ if wanted(key)]
+ provider.fields = provider_fields
+
+ @property
+ def fields_filter(self):
+ return self._fields_filter
+
+ @fields_filter.setter
+ def fields_filter(self, fields_filter):
+ self._fields_filter = fields_filter
+ self.update_provider_filters()
+
def get(self):
- for d in providers:
- new = d.read()
- for key in d.fields():
+ for provider in self.providers:
+ new = provider.read()
+ for key in provider.fields:
oldval = self.values.get(key, (0, 0))
- newval = new[key]
+ newval = new.get(key, 0)
newdelta = None
if oldval is not None:
newdelta = newval - oldval[0]
self.values[key] = (newval, newdelta)
return self.values
-if not os.access('/sys/kernel/debug', os.F_OK):
- print 'Please enable CONFIG_DEBUG_FS in your kernel'
- sys.exit(1)
-if not os.access('/sys/kernel/debug/kvm', os.F_OK):
- print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')"
- print "and ensure the kvm modules are loaded"
- sys.exit(1)
-
-label_width = 40
-number_width = 10
-
-def tui(screen, stats):
- curses.use_default_colors()
- curses.noecho()
- drilldown = False
- fields_filter = stats.fields_filter
- def update_drilldown():
- if not fields_filter:
- if drilldown:
- stats.set_fields_filter(None)
- else:
- stats.set_fields_filter(r'^[^\(]*$')
- update_drilldown()
- def refresh(sleeptime):
- screen.erase()
- screen.addstr(0, 0, 'kvm statistics')
- screen.addstr(2, 1, 'Event')
- screen.addstr(2, 1 + label_width + number_width - len('Total'), 'Total')
- screen.addstr(2, 1 + label_width + number_width + 8 - len('Current'), 'Current')
+LABEL_WIDTH = 40
+NUMBER_WIDTH = 10
+
+class Tui(object):
+ def __init__(self, stats):
+ self.stats = stats
+ self.screen = None
+ self.drilldown = False
+ self.update_drilldown()
+
+ def __enter__(self):
+ """Initialises curses for later use. Based on curses.wrapper
+ implementation from the Python standard library."""
+ self.screen = curses.initscr()
+ curses.noecho()
+ curses.cbreak()
+
+ # The try/catch works around a minor bit of
+ # over-conscientiousness in the curses module, the error
+ # return from C start_color() is ignorable.
+ try:
+ curses.start_color()
+ except:
+ pass
+
+ curses.use_default_colors()
+ return self
+
+ def __exit__(self, *exception):
+ """Resets the terminal to its normal state. Based on curses.wrappre
+ implementation from the Python standard library."""
+ if self.screen:
+ self.screen.keypad(0)
+ curses.echo()
+ curses.nocbreak()
+ curses.endwin()
+
+ def update_drilldown(self):
+ if not self.stats.fields_filter:
+ self.stats.fields_filter = r'^[^\(]*$'
+
+ elif self.stats.fields_filter == r'^[^\(]*$':
+ self.stats.fields_filter = None
+
+ def refresh(self, sleeptime):
+ self.screen.erase()
+ self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
+ self.screen.addstr(2, 1, 'Event')
+ self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
+ len('Total'), 'Total')
+ self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 -
+ len('Current'), 'Current')
row = 3
- s = stats.get()
+ stats = self.stats.get()
def sortkey(x):
- if s[x][1]:
- return (-s[x][1], -s[x][0])
+ if stats[x][1]:
+ return (-stats[x][1], -stats[x][0])
else:
- return (0, -s[x][0])
- for key in sorted(s.keys(), key = sortkey):
- if row >= screen.getmaxyx()[0]:
+ return (0, -stats[x][0])
+ for key in sorted(stats.keys(), key=sortkey):
+
+ if row >= self.screen.getmaxyx()[0]:
break
- values = s[key]
+ values = stats[key]
if not values[0] and not values[1]:
break
col = 1
- screen.addstr(row, col, key)
- col += label_width
- screen.addstr(row, col, '%10d' % (values[0],))
- col += number_width
+ self.screen.addstr(row, col, key)
+ col += LABEL_WIDTH
+ self.screen.addstr(row, col, '%10d' % (values[0],))
+ col += NUMBER_WIDTH
if values[1] is not None:
- screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
+ self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
row += 1
- screen.refresh()
+ self.screen.refresh()
+
+ def show_filter_selection(self):
+ while True:
+ self.screen.erase()
+ self.screen.addstr(0, 0,
+ "Show statistics for events matching a regex.",
+ curses.A_BOLD)
+ self.screen.addstr(2, 0,
+ "Current regex: {0}"
+ .format(self.stats.fields_filter))
+ self.screen.addstr(3, 0, "New regex: ")
+ curses.echo()
+ regex = self.screen.getstr()
+ curses.noecho()
+ if len(regex) == 0:
+ return
+ try:
+ re.compile(regex)
+ self.stats.fields_filter = regex
+ return
+ except re.error:
+ continue
- sleeptime = 0.25
- while True:
- refresh(sleeptime)
- curses.halfdelay(int(sleeptime * 10))
- sleeptime = 3
- try:
- c = screen.getkey()
- if c == 'x':
- drilldown = not drilldown
- update_drilldown()
- if c == 'q':
+ def show_stats(self):
+ sleeptime = 0.25
+ while True:
+ self.refresh(sleeptime)
+ curses.halfdelay(int(sleeptime * 10))
+ sleeptime = 3
+ try:
+ char = self.screen.getkey()
+ if char == 'x':
+ self.drilldown = not self.drilldown
+ self.update_drilldown()
+ if char == 'q':
+ break
+ if char == 'f':
+ self.show_filter_selection()
+ except KeyboardInterrupt:
break
- except KeyboardInterrupt:
- break
- except curses.error:
- continue
+ except curses.error:
+ continue
def batch(stats):
s = stats.get()
@@ -568,13 +682,13 @@ def batch(stats):
s = stats.get()
for key in sorted(s.keys()):
values = s[key]
- print '%-22s%10d%10d' % (key, values[0], values[1])
+ print '%-42s%10d%10d' % (key, values[0], values[1])
def log(stats):
keys = sorted(stats.get().iterkeys())
def banner():
for k in keys:
- print '%10s' % k[0:9],
+ print '%s' % k,
print
def statline():
s = stats.get()
@@ -590,57 +704,110 @@ def log(stats):
statline()
line += 1
-options = optparse.OptionParser()
-options.add_option('-1', '--once', '--batch',
- action = 'store_true',
- default = False,
- dest = 'once',
- help = 'run in batch mode for one second',
- )
-options.add_option('-l', '--log',
- action = 'store_true',
- default = False,
- dest = 'log',
- help = 'run in logging mode (like vmstat)',
- )
-options.add_option('-t', '--tracepoints',
- action = 'store_true',
- default = False,
- dest = 'tracepoints',
- help = 'retrieve statistics from tracepoints',
- )
-options.add_option('-d', '--debugfs',
- action = 'store_true',
- default = False,
- dest = 'debugfs',
- help = 'retrieve statistics from debugfs',
- )
-options.add_option('-f', '--fields',
- action = 'store',
- default = None,
- dest = 'fields',
- help = 'fields to display (regex)',
- )
-(options, args) = options.parse_args(sys.argv)
-
-providers = []
-if options.tracepoints:
- providers.append(TracepointProvider())
-if options.debugfs:
- providers.append(DebugfsProvider())
-
-if len(providers) == 0:
- try:
- providers = [TracepointProvider()]
- except:
- providers = [DebugfsProvider()]
-
-stats = Stats(providers, fields = options.fields)
-
-if options.log:
- log(stats)
-elif not options.once:
- import curses.wrapper
- curses.wrapper(tui, stats)
-else:
- batch(stats)
+def get_options():
+ description_text = """
+This script displays various statistics about VMs running under KVM.
+The statistics are gathered from the KVM debugfs entries and / or the
+currently available perf traces.
+
+The monitoring takes additional cpu cycles and might affect the VM's
+performance.
+
+Requirements:
+- Access to:
+ /sys/kernel/debug/kvm
+ /sys/kernel/debug/trace/events/*
+ /proc/pid/task
+- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
+ CAP_SYS_ADMIN and perf events are used.
+- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
+ the large number of files that are possibly opened.
+"""
+
+ class PlainHelpFormatter(optparse.IndentedHelpFormatter):
+ def format_description(self, description):
+ if description:
+ return description + "\n"
+ else:
+ return ""
+
+ optparser = optparse.OptionParser(description=description_text,
+ formatter=PlainHelpFormatter())
+ optparser.add_option('-1', '--once', '--batch',
+ action='store_true',
+ default=False,
+ dest='once',
+ help='run in batch mode for one second',
+ )
+ optparser.add_option('-l', '--log',
+ action='store_true',
+ default=False,
+ dest='log',
+ help='run in logging mode (like vmstat)',
+ )
+ optparser.add_option('-t', '--tracepoints',
+ action='store_true',
+ default=False,
+ dest='tracepoints',
+ help='retrieve statistics from tracepoints',
+ )
+ optparser.add_option('-d', '--debugfs',
+ action='store_true',
+ default=False,
+ dest='debugfs',
+ help='retrieve statistics from debugfs',
+ )
+ optparser.add_option('-f', '--fields',
+ action='store',
+ default=None,
+ dest='fields',
+ help='fields to display (regex)',
+ )
+ (options, _) = optparser.parse_args(sys.argv)
+ return options
+
+def get_providers(options):
+ providers = []
+
+ if options.tracepoints:
+ providers.append(TracepointProvider())
+ if options.debugfs:
+ providers.append(DebugfsProvider())
+ if len(providers) == 0:
+ providers.append(TracepointProvider())
+
+ return providers
+
+def check_access():
+ if not os.path.exists('/sys/kernel/debug'):
+ sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
+ sys.exit(1)
+
+ if not os.path.exists(PATH_DEBUGFS_KVM):
+ sys.stderr.write("Please make sure, that debugfs is mounted and "
+ "readable by the current user:\n"
+ "('mount -t debugfs debugfs /sys/kernel/debug')\n"
+ "Also ensure, that the kvm modules are loaded.\n")
+ sys.exit(1)
+
+ if not os.path.exists(PATH_DEBUGFS_TRACING):
+ sys.stderr.write("Please make {0} readable by the current user.\n"
+ .format(PATH_DEBUGFS_TRACING))
+ sys.exit(1)
+
+def main():
+ check_access()
+ options = get_options()
+ providers = get_providers(options)
+ stats = Stats(providers, fields=options.fields)
+
+ if options.log:
+ log(stats)
+ elif not options.once:
+ with Tui(stats) as tui:
+ tui.show_stats()
+ else:
+ batch(stats)
+
+if __name__ == "__main__":
+ main()