#!/usr/bin/python
#
# top-like utility for displaying kvm statistics
#
# Copyright 2006-2008 Qumranet Technologies
# Copyright 2008-2011 Red Hat, Inc.
#
# Authors:
# Avi Kivity <avi@redhat.com>
#
# This work is licensed under the terms of the GNU GPL, version 2. See
# the COPYING file in the top-level directory.
import curses
import sys
import os
import time
import optparse
import ctypes
import fcntl
import resource
import struct
import re
from collections import defaultdict
class DebugfsProvider(object):
def __init__(self):
self.base = '/sys/kernel/debug/kvm'
self._fields = os.listdir(self.base)
def fields(self):
return self._fields
def select(self, fields):
self._fields = fields
def read(self):
def val(key):
return int(file(self.base + '/' + key).read())
return dict([(key, val(key)) for key in self._fields])
vmx_exit_reasons = {
0: 'EXCEPTION_NMI',
1: 'EXTERNAL_INTERRUPT',
2: 'TRIPLE_FAULT',
7: 'PENDING_INTERRUPT',
8: 'NMI_WINDOW',
9: 'TASK_SWITCH',
10: 'CPUID',
12: 'HLT',
14: 'INVLPG',
15: 'RDPMC',
16: 'RDTSC',
18: 'VMCALL',
19: 'VMCLEAR',
20: 'VMLAUNCH',
21: 'VMPTRLD',
22: 'VMPTRST',
23: 'VMREAD',
24: 'VMRESUME',
25: 'VMWRITE',
26: 'VMOFF',
27: 'VMON',
28: 'CR_ACCESS',
29: 'DR_ACCESS',
30: 'IO_INSTRUCTION',
31: 'MSR_READ',
32: 'MSR_WRITE',
33: 'INVALID_STATE',
36: 'MWAIT_INSTRUCTION',
39: 'MONITOR_INSTRUCTION',
40: 'PAUSE_INSTRUCTION',
41: 'MCE_DURING_VMENTRY',
43: 'TPR_BELOW_THRESHOLD',
44: 'APIC_ACCESS',
48: 'EPT_VIOLATION',
49: 'EPT_MISCONFIG',
54: 'WBINVD',
55: 'XSETBV',
56: 'APIC_WRITE',
58: 'INVPCID',
}
svm_exit_reasons = {
0x000: 'READ_CR0',
0x003: 'READ_CR3',
0x004: 'READ_CR4',
0x008: 'READ_CR8',
0x010: 'WRITE_CR0',
0x013: 'WRITE_CR3',
0x014: 'WRITE_CR4',
0x018: 'WRITE_CR8',
0x020: 'READ_DR0',
0x021: 'READ_DR1',
0x022: 'READ_DR2',
0x023: 'READ_DR3',
0x024: 'READ_DR4',
0x025: 'READ_DR5',
0x026: 'READ_DR6',
0x027: 'READ_DR7',
0x030: 'WRITE_DR0',
0x031: 'WRITE_DR1',
0x032: 'WRITE_DR2',
0x033: 'WRITE_DR3',
0x034: 'WRITE_DR4',
0x035: 'WRITE_DR5',
0x036: 'WRITE_DR6',
0x037: 'WRITE_DR7',
0x040: 'EXCP_BASE',
0x060: 'INTR',
0x061: 'NMI',
0x062: 'SMI',
0x063: 'INIT',
0x064: 'VINTR',
0x065: 'CR0_SEL_WRITE',
0x066: 'IDTR_READ',
0x067: 'GDTR_READ',
0x068: 'LDTR_READ',
0x069: 'TR_READ',
0x06a: 'IDTR_WRITE',
0x06b: 'GDTR_WRITE',
0x06c: 'LDTR_WRITE',
0x06d: 'TR_WRITE',
0x06e: 'RDTSC',
0x06f: 'RDPMC',
0x070: 'PUSHF',
0x071: 'POPF',
0x072: 'CPUID',
0x073: 'RSM',
0x074: 'IRET',
0x075: 'SWINT',
0x076: 'INVD',
0x077: 'PAUSE',
0x078: 'HLT',
0x079: 'INVLPG',
0x07a: 'INVLPGA',
0x07b: 'IOIO',
0x07c: 'MSR',
0x07d: 'TASK_SWITCH',
0x07e: 'FERR_FREEZE',
0x07f: 'SHUTDOWN',
0x080: 'VMRUN',
0x081: 'VMMCALL',
0x082: 'VMLOAD',
0x083: 'VMSAVE',
0x084: 'STGI',
0x085: 'CLGI',
0x086: 'SKINIT',
0x087: 'RDTSCP',
0x088: 'ICEBP',
0x089: 'WBINVD',
0x08a: 'MONITOR',
0x08b: 'MWAIT',
0x08c: 'MWAIT_COND',
0x08d: 'XSETBV',
0x400: 'NPF',
}
# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
aarch64_exit_reasons = {
0x00: 'UNKNOWN',
0x01: 'WFI',
0x03: 'CP15_32',
0x04: 'CP15_64',
0x05: 'CP14_MR',
0x06: 'CP14_LS',
0x07: 'FP_ASIMD',
0x08: 'CP10_ID',
0x0C: 'CP14_64',
0x0E: 'ILL_ISS',
0x11: 'SVC32',
0x12: 'HVC32',
0x13: 'SMC32',
0x15: 'SVC64',
0x16: 'HVC64',
0x17: 'SMC64',
0x18: 'SYS64',
0x20: 'IABT',
0x21: 'IABT_HYP',
0x22: 'PC_ALIGN',
0x24: 'DABT',
0x25: 'DABT_HYP',
0x26: 'SP_ALIGN',
0x28: 'FP_EXC32',
0x2C: 'FP_EXC64',
0x2F: 'SERROR',
0x30: 'BREAKPT',
0x31: 'BREAKPT_HYP',
0x32: 'SOFTSTP',
0x33: 'SOFTSTP_HYP',
0x34: 'WATCHPT',
0x35: 'WATCHPT_HYP',
0x38: 'BKPT32',
0x3A: 'VECTOR32',
0x3C: 'BRK64',
}
# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
userspace_exit_reasons = {
0: 'UNKNOWN',
1: 'EXCEPTION',
2: 'IO',
3: 'HYPERCALL',
4: 'DEBUG',
5: 'HLT',
6: 'MMIO',
7: 'IRQ_WINDOW_OPEN',
8: 'SHUTDOWN',
9: 'FAIL_ENTRY',
10: 'INTR',
11: 'SET_TPR',
12: 'TPR_ACCESS',
13: 'S390_SIEIC',
14: 'S390_RESET',
15: 'DCR',
16: 'NMI',
17: 'INTERNAL_ERROR',
18: 'OSI',
19: 'PAPR_HCALL',
20: 'S390_UCONTROL',
21: 'WATCHDOG',
22: 'S390_TSCH',
23: 'EPR',
24: 'SYSTEM_EVENT',
}
x86_exit_reasons = {
'vmx': vmx_exit_reasons,
'svm': svm_exit_reasons,
}
sc_perf_evt_open = None
exit_reasons = None
ioctl_numbers = {
'SET_FILTER' : 0x40082406,
'ENABLE' : 0x00002400,
'DISABLE' : 0x00002401,
'RESET' : 0x00002403,
}
def x86_init(flag):
globals().update({
'sc_perf_evt_open' : 298,
'exit_reasons' : x86_exit_reasons[flag],
})
def s390_init():
globals().update({
'sc_perf_evt_open' : 331
})
def ppc_init():
globals().update({
'sc_perf_evt_open' : 319,
'ioctl_numbers' : {
'SET_FILTER' : 0x80002406 | (ctypes.sizeof(ctypes.c_char_p) << 16),
'ENABLE' : 0x20002400,
'DISABLE' : 0x20002401,
}
})
def aarch64_init():
globals().update({
'sc_perf_evt_open' : 241,
'exit_reasons' : aarch64_exit_reasons,
})
def detect_platform():
if os.uname()[4].startswith('ppc'):
ppc_init()
return
elif os.uname()[4].startswith('aarch64'):
aarch64_init()
return
for line in file('/proc/cpuinfo').readlines():
if line.startswith('flags'):
for flag in line.split():
if flag in x86_exit_reasons:
x86_init(flag)
return
elif line.startswith('vendor_id'):
for flag in line.split():
if flag == 'IBM/S390':
s390_init()
return
detect_platform()
def invert(d):
return dict((x[1], x[0]) for x in d.iteritems())
filters = {}
filters['kvm_userspace_exit'] = ('reason', invert(userspace_exit_reasons))
if exit_reasons:
filters['kvm_exit'] = ('exit_reason', invert(exit_reasons))
libc = ctypes.CDLL('libc.so.6')
syscall = libc.syscall
get_errno = libc.__errno_location
get_errno.restype = ctypes.POINTER(ctypes.c_int)
class perf_event_attr(ctypes.Structure):
_fields_ = [('type', ctypes.c_uint32),
('size', ctypes.c_uint32),
('config', ctypes.c_uint64),
('sample_freq', ctypes.c_uint64),
('sample_type', ctypes.c_uint64),
('read_format', ctypes.c_uint64),
('flags', ctypes.c_uint64),
('wakeup_events', ctypes.c_uint32),
('bp_type', ctypes.c_uint32),
('bp_addr', ctypes.c_uint64),
('bp_len', ctypes.c_uint64),
]
def _perf_event_open(attr, pid, cpu, group_fd, flags):
return syscall(sc_perf_evt_open, ctypes.pointer(attr), ctypes.c_int(pid),
ctypes.c_int(cpu), ctypes.c_int(group_fd),
ctypes.c_long(flags))
PERF_TYPE_HARDWARE = 0
PERF_TYPE_SOFTWARE = 1
PERF_TYPE_TRACEPOINT = 2
PERF_TYPE_HW_CACHE = 3
PERF_TYPE_RAW = 4
PERF_TYPE_BREAKPOINT = 5
PERF_SAMPLE_IP = 1 << 0
PERF_SAMPLE_TID = 1 << 1
PERF_SAMPLE_TIME = 1 << 2
PERF_SAMPLE_ADDR = 1 << 3
PERF_SAMPLE_READ = 1 << 4
PERF_SAMPLE_CALLCHAIN = 1 << 5
PERF_SAMPLE_ID = 1 << 6
PERF_SAMPLE_CPU = 1 << 7
PERF_SAMPLE_PERIOD = 1 << 8
PERF_SAMPLE_STREAM_ID = 1 << 9
PERF_SAMPLE_RAW = 1 << 10
PERF_FORMAT_TOTAL_TIME_ENABLED = 1 << 0
PERF_FORMAT_TOTAL_TIME_RUNNING = 1 << 1
PERF_FORMAT_ID = 1 << 2
PERF_FORMAT_GROUP = 1 << 3
sys_tracing = '/sys/kernel/debug/tracing'
class Group(object):
def __init__(self, cpu):
self.events = []
self.group_leader = None
self.cpu = cpu
def add_event(self, name, event_set, tracepoint, filter = None):
self.events.append(Event(group = self,
name = name, event_set = event_set,
tracepoint = tracepoint, filter = filter))
if len(self.events) == 1:
self.file = os.fdopen(self.events[0].fd)
def read(self):
bytes = 8 * (1 + len(self.events))
fmt = 'xxxxxxxx' + 'q' * len(self.events)
return dict(zip([event.name for event in self.events],
struct.unpack(fmt, self.file.read(bytes))))
class Event(object):
def __init__(self, group, name, event_set, tracepoint, filter = None):
self.name = name
attr = perf_event_attr()
attr.type = PERF_TYPE_TRACEPOINT
attr.size = ctypes.sizeof(attr)
id_path = os.path.join(sys_tracing, 'events', event_set,
tracepoint, 'id')
id = int(file(id_path).read())
attr.config = id
attr.sample_type = (PERF_SAMPLE_RAW
| PERF_SAMPLE_TIME
| PERF_SAMPLE_CPU)
attr.sample_period = 1
attr.read_format = PERF_FORMAT_GROUP
group_leader = -1
if group.events:
group_leader = group.events[0].fd
fd = _perf_event_open(attr, -1, group.cpu, group_leader, 0)
if fd == -1:
err = get_errno()[0]
raise Exception('perf_event_open failed, errno = ' + err.__str__())
if filter:
fcntl.ioctl(fd, ioctl_numbers['SET_FILTER'], filter)
self.fd = fd
def enable(self):
fcntl.ioctl(self.fd, ioctl_numbers['ENABLE'], 0)
def disable(self):
fcntl.ioctl(self.fd, ioctl_numbers['DISABLE'], 0)
def reset(self):
fcntl.ioctl(self.fd, ioctl_numbers['RESET'], 0)
class TracepointProvider(object):
def __init__(self):
path = os.path.join(sys_tracing, 'events', 'kvm')
fields = [f
for f in os.listdir(path)
if os.path.isdir(os.path.join(path, f))]
extra = []
for f in fields:
if f in filters:
subfield, values = filters[f]
for name, number in values.iteritems():
extra.append(f + '(' + name + ')')
fields += extra
self._setup(fields)
self.select(fields)
def fields(self):
return self._fields
def _online_cpus(self):
l = []
pattern = r'cpu([0-9]+)'
basedir = '/sys/devices/system/cpu'
for entry in os.listdir(basedir):
match = re.match(pattern, entry)
if not match:
continue
path = os.path.join(basedir, entry, 'online')
if os.path.exists(path) and open(path).read().strip() != '1':
continue
l.append(int(match.group(1)))
return l
def _setup(self, _fields):
self._fields = _fields
cpus = self._online_cpus()
nfiles = len(cpus) * 1000
resource.setrlimit(resource.RLIMIT_NOFILE, (nfiles, nfiles))
events = []
self.group_leaders = []
for cpu in cpus:
group = Group(cpu)
for name in _fields:
tracepoint = name
filter = None
m = re.match(r'(.*)\((.*)\)', name)
if m:
tracepoint, sub = m.groups()
filter = '%s==%d\0' % (filters[tracepoint][0],
filters[tracepoint][1][sub])
event = group.add_event(name, event_set = 'kvm',
tracepoint = tracepoint,
filter = filter)
self.group_leaders.append(group)
def select(self, fields):
for group in self.group_leaders:
for event in group.events:
if event.name in fields:
event.reset()
event.enable()
else:
event.disable()
def read(self):
ret = defaultdict(int)
for group in self.group_leaders:
for name, val in group.read().iteritems():
ret[name] += val
return ret
class Stats:
def __init__(self, providers, fields = None):
self.providers = providers
self.fields_filter = fields
self._update()
def _update(self):
def wanted(key):
if not self.fields_filter:
return True
return re.match(self.fields_filter, key) is not None
self.values = dict()
for d in providers:
provider_fields = [key for key in d.fields() if wanted(key)]
for key in provider_fields:
self.values[key] = None
d.select(provider_fields)
def set_fields_filter(self, fields_filter):
self.fields_filter = fields_filter
self._update()
def get(self):
for d in providers:
new = d.read()
for key in d.fields():
oldval = self.values.get(key, (0, 0))
newval = new[key]
newdelta = None
if oldval is not None:
newdelta = newval - oldval[0]
self.values[key] = (newval, newdelta)
return self.values
if not os.access('/sys/kernel/debug', os.F_OK):
print 'Please enable CONFIG_DEBUG_FS in your kernel'
sys.exit(1)
if not os.access('/sys/kernel/debug/kvm', os.F_OK):
print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')"
print "and ensure the kvm modules are loaded"
sys.exit(1)
label_width = 40
number_width = 10
def tui(screen, stats):
curses.use_default_colors()
curses.noecho()
drilldown = False
fields_filter = stats.fields_filter
def update_drilldown():
if not fields_filter:
if drilldown:
stats.set_fields_filter(None)
else:
stats.set_fields_filter(r'^[^\(]*$')
update_drilldown()
def refresh(sleeptime):
screen.erase()
screen.addstr(0, 0, 'kvm statistics')
screen.addstr(2, 1, 'Event')
screen.addstr(2, 1 + label_width + number_width - len('Total'), 'Total')
screen.addstr(2, 1 + label_width + number_width + 8 - len('Current'), 'Current')
row = 3
s = stats.get()
def sortkey(x):
if s[x][1]:
return (-s[x][1], -s[x][0])
else:
return (0, -s[x][0])
for key in sorted(s.keys(), key = sortkey):
if row >= screen.getmaxyx()[0]:
break
values = s[key]
if not values[0] and not values[1]:
break
col = 1
screen.addstr(row, col, key)
col += label_width
screen.addstr(row, col, '%10d' % (values[0],))
col += number_width
if values[1] is not None:
screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
row += 1
screen.refresh()
sleeptime = 0.25
while True:
refresh(sleeptime)
curses.halfdelay(int(sleeptime * 10))
sleeptime = 3
try:
c = screen.getkey()
if c == 'x':
drilldown = not drilldown
update_drilldown()
if c == 'q':
break
except KeyboardInterrupt:
break
except curses.error:
continue
def batch(stats):
s = stats.get()
time.sleep(1)
s = stats.get()
for key in sorted(s.keys()):
values = s[key]
print '%-22s%10d%10d' % (key, values[0], values[1])
def log(stats):
keys = sorted(stats.get().iterkeys())
def banner():
for k in keys:
print '%10s' % k[0:9],
print
def statline():
s = stats.get()
for k in keys:
print ' %9d' % s[k][1],
print
line = 0
banner_repeat = 20
while True:
time.sleep(1)
if line % banner_repeat == 0:
banner()
statline()
line += 1
options = optparse.OptionParser()
options.add_option('-1', '--once', '--batch',
action = 'store_true',
default = False,
dest = 'once',
help = 'run in batch mode for one second',
)
options.add_option('-l', '--log',
action = 'store_true',
default = False,
dest = 'log',
help = 'run in logging mode (like vmstat)',
)
options.add_option('-t', '--tracepoints',
action = 'store_true',
default = False,
dest = 'tracepoints',
help = 'retrieve statistics from tracepoints',
)
options.add_option('-d', '--debugfs',
action = 'store_true',
default = False,
dest = 'debugfs',
help = 'retrieve statistics from debugfs',
)
options.add_option('-f', '--fields',
action = 'store',
default = None,
dest = 'fields',
help = 'fields to display (regex)',
)
(options, args) = options.parse_args(sys.argv)
providers = []
if options.tracepoints:
providers.append(TracepointProvider())
if options.debugfs:
providers.append(DebugfsProvider())
if len(providers) == 0:
try:
providers = [TracepointProvider()]
except:
providers = [DebugfsProvider()]
stats = Stats(providers, fields = options.fields)
if options.log:
log(stats)
elif not options.once:
curses.wrapper(tui, stats)
else:
batch(stats)