From 03ce574442d2ee82f59a5232a24492ad80858d75 Mon Sep 17 00:00:00 2001 From: Nikolay Nikolaev Date: Tue, 10 Jun 2014 13:02:16 +0300 Subject: Add the vhost-user netdev backend to the command line The supplied chardev id will be inspected for supported options. Only a socket backend, with a set path (i.e. a Unix socket) and optionally the server parameter set, will be allowed. Other options (nowait, telnet) will make the chardev unusable and the netdev will not be initialised. Additional checks for validity: - requires `-numa node,memdev=..` - requires `-device virtio-net-*` The `vhostforce` option is used to force vhost-net when we deal with non-MSIX guests. Signed-off-by: Antonios Motakis Signed-off-by: Nikolay Nikolaev Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Luiz Capitulino Reviewed-by: Eric Blake --- qapi-schema.json | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'qapi-schema.json') diff --git a/qapi-schema.json b/qapi-schema.json index dc2abe479e..f5d89b024c 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2068,6 +2068,22 @@ 'ifname': 'str', '*devname': 'str' } } +## +# @NetdevVhostUserOptions +# +# Vhost-user network backend +# +# @chardev: name of a unix socket chardev +# +# @vhostforce: #optional vhost on for non-MSIX virtio guests (default: false). +# +# Since 2.1 +## +{ 'type': 'NetdevVhostUserOptions', + 'data': { + 'chardev': 'str', + '*vhostforce': 'bool' } } + ## # @NetClientOptions # @@ -2086,7 +2102,8 @@ 'dump': 'NetdevDumpOptions', 'bridge': 'NetdevBridgeOptions', 'hubport': 'NetdevHubPortOptions', - 'netmap': 'NetdevNetmapOptions' } } + 'netmap': 'NetdevNetmapOptions', + 'vhost-user': 'NetdevVhostUserOptions' } } ## # @NetLegacy -- cgit v1.2.3-55-g7522 From 0042109a6ab629aebfb287ff7aee295f24fad40d Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Wed, 14 May 2014 17:43:08 +0800 Subject: NUMA: convert -numa option to use OptsVisitor Signed-off-by: Wanlong Gao Signed-off-by: Igor Mammedov Tested-by: Eduardo Habkost Reviewed-by: Eduardo Habkost Signed-off-by: Paolo Bonzini Signed-off-by: Hu Tao Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/sysemu/sysemu.h | 3 +- numa.c | 145 +++++++++++++++++++++++------------------------- qapi-schema.json | 32 +++++++++++ vl.c | 11 +++- 4 files changed, 114 insertions(+), 77 deletions(-) (limited to 'qapi-schema.json') diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 3a9308b60d..4102be320f 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -148,9 +148,10 @@ typedef struct node_info { DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); } NodeInfo; extern NodeInfo numa_info[MAX_NODES]; -void numa_add(const char *optarg); void set_numa_nodes(void); void set_numa_modes(void); +extern QemuOptsList qemu_numa_opts; +int numa_init_func(QemuOpts *opts, void *opaque); #define MAX_OPTION_ROMS 16 typedef struct QEMUOptionRom { diff --git a/numa.c b/numa.c index f15c4c41c0..6fb0888d60 100644 --- a/numa.c +++ b/numa.c @@ -28,101 +28,96 @@ #include "qom/cpu.h" #include "qemu/error-report.h" #include "include/exec/cpu-common.h" /* for RAM_ADDR_FMT */ - -static void numa_node_parse_cpus(int nodenr, const char *cpus) +#include "qapi-visit.h" +#include "qapi/opts-visitor.h" +#include "qapi/dealloc-visitor.h" +#include "qapi/qmp/qerror.h" + +QemuOptsList qemu_numa_opts = { + .name = "numa", + .implied_opt_name = "type", + .head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head), + .desc = { { 0 } } /* validated with OptsVisitor */ +}; + +static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) { - char *endptr; - unsigned long long value, endvalue; + uint16_t nodenr; + uint16List *cpus = NULL; - /* Empty CPU range strings will be considered valid, they will simply - * not set any bit in the CPU bitmap. - */ - if (!*cpus) { - return; - } - - if (parse_uint(cpus, &value, &endptr, 10) < 0) { - goto error; - } - if (*endptr == '-') { - if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) { - goto error; - } - } else if (*endptr == '\0') { - endvalue = value; + if (node->has_nodeid) { + nodenr = node->nodeid; } else { - goto error; + nodenr = nb_numa_nodes; } - if (endvalue >= MAX_CPUMASK_BITS) { - endvalue = MAX_CPUMASK_BITS - 1; - fprintf(stderr, - "qemu: NUMA: A max of %d VCPUs are supported\n", - MAX_CPUMASK_BITS); + if (nodenr >= MAX_NODES) { + error_setg(errp, "Max number of NUMA nodes reached: %" + PRIu16 "\n", nodenr); + return; } - if (endvalue < value) { - goto error; + for (cpus = node->cpus; cpus; cpus = cpus->next) { + if (cpus->value > MAX_CPUMASK_BITS) { + error_setg(errp, "CPU number %" PRIu16 " is bigger than %d", + cpus->value, MAX_CPUMASK_BITS); + return; + } + bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1); } - bitmap_set(numa_info[nodenr].node_cpu, value, endvalue-value+1); - return; - -error: - fprintf(stderr, "qemu: Invalid NUMA CPU range: %s\n", cpus); - exit(1); + if (node->has_mem) { + uint64_t mem_size = node->mem; + const char *mem_str = qemu_opt_get(opts, "mem"); + /* Fix up legacy suffix-less format */ + if (g_ascii_isdigit(mem_str[strlen(mem_str) - 1])) { + mem_size <<= 20; + } + numa_info[nodenr].node_mem = mem_size; + } } -void numa_add(const char *optarg) +int numa_init_func(QemuOpts *opts, void *opaque) { - char option[128]; - char *endptr; - unsigned long long nodenr; + NumaOptions *object = NULL; + Error *err = NULL; - optarg = get_opt_name(option, 128, optarg, ','); - if (*optarg == ',') { - optarg++; + { + OptsVisitor *ov = opts_visitor_new(opts); + visit_type_NumaOptions(opts_get_visitor(ov), &object, NULL, &err); + opts_visitor_cleanup(ov); } - if (!strcmp(option, "node")) { - if (nb_numa_nodes >= MAX_NODES) { - fprintf(stderr, "qemu: too many NUMA nodes\n"); - exit(1); - } + if (err) { + goto error; + } - if (get_param_value(option, 128, "nodeid", optarg) == 0) { - nodenr = nb_numa_nodes; - } else { - if (parse_uint_full(option, &nodenr, 10) < 0) { - fprintf(stderr, "qemu: Invalid NUMA nodeid: %s\n", option); - exit(1); - } + switch (object->kind) { + case NUMA_OPTIONS_KIND_NODE: + numa_node_parse(object->node, opts, &err); + if (err) { + goto error; } + nb_numa_nodes++; + break; + default: + abort(); + } - if (nodenr >= MAX_NODES) { - fprintf(stderr, "qemu: invalid NUMA nodeid: %llu\n", nodenr); - exit(1); - } + return 0; - if (get_param_value(option, 128, "mem", optarg) == 0) { - numa_info[nodenr].node_mem = 0; - } else { - int64_t sval; - sval = strtosz(option, &endptr); - if (sval < 0 || *endptr) { - fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg); - exit(1); - } - numa_info[nodenr].node_mem = sval; - } - if (get_param_value(option, 128, "cpus", optarg) != 0) { - numa_node_parse_cpus(nodenr, option); - } - nb_numa_nodes++; - } else { - fprintf(stderr, "Invalid -numa option: %s\n", option); - exit(1); +error: + qerror_report_err(err); + error_free(err); + + if (object) { + QapiDeallocVisitor *dv = qapi_dealloc_visitor_new(); + visit_type_NumaOptions(qapi_dealloc_get_visitor(dv), + &object, NULL, NULL); + qapi_dealloc_visitor_cleanup(dv); } + + return -1; } void set_numa_nodes(void) diff --git a/qapi-schema.json b/qapi-schema.json index f5d89b024c..e65b7b1489 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3097,3 +3097,35 @@ 'btn' : 'InputBtnEvent', 'rel' : 'InputMoveEvent', 'abs' : 'InputMoveEvent' } } + +## +# @NumaOptions +# +# A discriminated record of NUMA options. (for OptsVisitor) +# +# Since 2.1 +## +{ 'union': 'NumaOptions', + 'data': { + 'node': 'NumaNodeOptions' }} + +## +# @NumaNodeOptions +# +# Create a guest NUMA node. (for OptsVisitor) +# +# @nodeid: #optional NUMA node ID (increase by 1 from 0 if omitted) +# +# @cpus: #optional VCPUs belonging to this node (assign VCPUS round-robin +# if omitted) +# +# @mem: #optional memory size of this node (equally divide total memory among +# nodes if omitted) +# +# Since: 2.1 +## +{ 'type': 'NumaNodeOptions', + 'data': { + '*nodeid': 'uint16', + '*cpus': ['uint16'], + '*mem': 'size' }} diff --git a/vl.c b/vl.c index 7af90cd41b..469aa9c00c 100644 --- a/vl.c +++ b/vl.c @@ -2938,6 +2938,7 @@ int main(int argc, char **argv, char **envp) qemu_add_opts(&qemu_realtime_opts); qemu_add_opts(&qemu_msg_opts); qemu_add_opts(&qemu_name_opts); + qemu_add_opts(&qemu_numa_opts); runstate_init(); @@ -3133,7 +3134,10 @@ int main(int argc, char **argv, char **envp) } break; case QEMU_OPTION_numa: - numa_add(optarg); + opts = qemu_opts_parse(qemu_find_opts("numa"), optarg, 1); + if (!opts) { + exit(1); + } break; case QEMU_OPTION_display: display_type = select_display(optarg); @@ -4303,6 +4307,11 @@ int main(int argc, char **argv, char **envp) default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); + if (qemu_opts_foreach(qemu_find_opts("numa"), numa_init_func, + NULL, 1) != 0) { + exit(1); + } + set_numa_nodes(); if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) { -- cgit v1.2.3-55-g7522 From 7febe36f9adbb34756a6a6765a36ea49b6e502ac Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 May 2014 17:43:17 +0800 Subject: numa: add -numa node,memdev= option This option provides the infrastructure for binding guest NUMA nodes to host NUMA nodes. For example: -object memory-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0 \ -numa node,nodeid=0,cpus=0,memdev=ram-node0 \ -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \ -numa node,nodeid=1,cpus=1,memdev=ram-node1 The option replaces "-numa node,mem=". Signed-off-by: Paolo Bonzini Signed-off-by: Hu Tao Signed-off-by: Michael S. Tsirkin Acked-by: Michael S. Tsirkin MST: conflict resolution --- include/sysemu/sysemu.h | 1 + numa.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++-- qapi-schema.json | 11 ++++++--- qemu-options.hx | 12 ++++++--- vl.c | 4 +-- 5 files changed, 83 insertions(+), 10 deletions(-) (limited to 'qapi-schema.json') diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index c4e1bbd9fd..277230db49 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -149,6 +149,7 @@ extern int nb_numa_nodes; typedef struct node_info { uint64_t node_mem; DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); + struct HostMemoryBackend *node_memdev; } NodeInfo; extern NodeInfo numa_info[MAX_NODES]; void set_numa_nodes(void); diff --git a/numa.c b/numa.c index 8bab784a3e..b24bb9dd9f 100644 --- a/numa.c +++ b/numa.c @@ -33,6 +33,7 @@ #include "qapi/dealloc-visitor.h" #include "qapi/qmp/qerror.h" #include "hw/boards.h" +#include "sysemu/hostmem.h" QemuOptsList qemu_numa_opts = { .name = "numa", @@ -41,6 +42,8 @@ QemuOptsList qemu_numa_opts = { .desc = { { 0 } } /* validated with OptsVisitor */ }; +static int have_memdevs = -1; + static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) { uint16_t nodenr; @@ -67,6 +70,20 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1); } + if (node->has_mem && node->has_memdev) { + error_setg(errp, "qemu: cannot specify both mem= and memdev=\n"); + return; + } + + if (have_memdevs == -1) { + have_memdevs = node->has_memdev; + } + if (node->has_memdev != have_memdevs) { + error_setg(errp, "qemu: memdev option must be specified for either " + "all or no nodes\n"); + return; + } + if (node->has_mem) { uint64_t mem_size = node->mem; const char *mem_str = qemu_opt_get(opts, "mem"); @@ -76,6 +93,18 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) } numa_info[nodenr].node_mem = mem_size; } + if (node->has_memdev) { + Object *o; + o = object_resolve_path_type(node->memdev, TYPE_MEMORY_BACKEND, NULL); + if (!o) { + error_setg(errp, "memdev=%s is ambiguous", node->memdev); + return; + } + + object_ref(o); + numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL); + numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); + } } int numa_init_func(QemuOpts *opts, void *opaque) @@ -195,10 +224,42 @@ void set_numa_modes(void) } } +static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, + const char *name, + uint64_t ram_size) +{ + memory_region_init_ram(mr, owner, name, ram_size); + vmstate_register_ram_global(mr); +} + void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, const char *name, uint64_t ram_size) { - memory_region_init_ram(mr, owner, name, ram_size); - vmstate_register_ram_global(mr); + uint64_t addr = 0; + int i; + + if (nb_numa_nodes == 0 || !have_memdevs) { + allocate_system_memory_nonnuma(mr, owner, name, ram_size); + return; + } + + memory_region_init(mr, owner, name, ram_size); + for (i = 0; i < MAX_NODES; i++) { + Error *local_err = NULL; + uint64_t size = numa_info[i].node_mem; + HostMemoryBackend *backend = numa_info[i].node_memdev; + if (!backend) { + continue; + } + MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err); + if (local_err) { + qerror_report_err(local_err); + exit(1); + } + + memory_region_add_subregion(mr, addr, seg); + vmstate_register_ram_global(seg); + addr += size; + } } diff --git a/qapi-schema.json b/qapi-schema.json index e65b7b1489..e05f8ba4f9 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3119,8 +3119,12 @@ # @cpus: #optional VCPUs belonging to this node (assign VCPUS round-robin # if omitted) # -# @mem: #optional memory size of this node (equally divide total memory among -# nodes if omitted) +# @mem: #optional memory size of this node; mutually exclusive with @memdev. +# Equally divide total memory among nodes if both @mem and @memdev are +# omitted. +# +# @memdev: #optional memory backend object. If specified for one node, +# it must be specified for all nodes. # # Since: 2.1 ## @@ -3128,4 +3132,5 @@ 'data': { '*nodeid': 'uint16', '*cpus': ['uint16'], - '*mem': 'size' }} + '*mem': 'size', + '*memdev': 'str' }} diff --git a/qemu-options.hx b/qemu-options.hx index 06111a67cd..ca75760b27 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -95,16 +95,22 @@ specifies the maximum number of hotpluggable CPUs. ETEXI DEF("numa", HAS_ARG, QEMU_OPTION_numa, - "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL) + "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n" + "-numa node[,memdev=id][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL) STEXI @item -numa node[,mem=@var{size}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}] +@item -numa node[,memdev=@var{id}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}] @findex -numa -Simulate a multi node NUMA system. If @samp{mem} +Simulate a multi node NUMA system. If @samp{mem}, @samp{memdev} and @samp{cpus} are omitted, resources are split equally. Also, note that the -@option{numa} option doesn't allocate any of the specified resources. That is, it just assigns existing resources to NUMA nodes. This means that one still has to use the @option{-m}, @option{-smp} options -to allocate RAM and VCPUs respectively. +to allocate RAM and VCPUs respectively, and possibly @option{-object} +to specify the memory backend for the @samp{memdev} suboption. + +@samp{mem} and @samp{memdev} are mutually exclusive. Furthermore, if one +node uses @samp{memdev}, all of them have to use it. ETEXI DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd, diff --git a/vl.c b/vl.c index 469aa9c00c..f78f6da1c1 100644 --- a/vl.c +++ b/vl.c @@ -3952,6 +3952,8 @@ int main(int argc, char **argv, char **envp) exit(1); } + cpu_exec_init_all(); + current_machine = MACHINE(object_new(object_class_get_name( OBJECT_CLASS(machine_class)))); object_property_add_child(object_get_root(), "machine", @@ -4289,8 +4291,6 @@ int main(int argc, char **argv, char **envp) } } - cpu_exec_init_all(); - blk_mig_init(); ram_mig_init(); -- cgit v1.2.3-55-g7522 From 4cf1b76bf1e2cbb91b1123d47505a6586195800c Mon Sep 17 00:00:00 2001 From: Hu Tao Date: Tue, 10 Jun 2014 19:15:25 +0800 Subject: hostmem: add properties for NUMA memory policy Signed-off-by: Hu Tao [Raise errors on setting properties if !CONFIG_NUMA. Add BUILD_BUG_ON checks. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Marcelo Tosatti Signed-off-by: Hu Tao Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- backends/hostmem.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++- include/sysemu/hostmem.h | 4 ++ qapi-schema.json | 20 +++++++ 3 files changed, 159 insertions(+), 1 deletion(-) (limited to 'qapi-schema.json') diff --git a/backends/hostmem.c b/backends/hostmem.c index ebef6206a8..ca10c51b51 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -10,12 +10,21 @@ * See the COPYING file in the top-level directory. */ #include "sysemu/hostmem.h" -#include "sysemu/sysemu.h" #include "qapi/visitor.h" +#include "qapi-types.h" +#include "qapi-visit.h" #include "qapi/qmp/qerror.h" #include "qemu/config-file.h" #include "qom/object_interfaces.h" +#ifdef CONFIG_NUMA +#include +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); +QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); +#endif + static void host_memory_backend_get_size(Object *obj, Visitor *v, void *opaque, const char *name, Error **errp) @@ -53,6 +62,84 @@ out: error_propagate(errp, local_err); } +static void +host_memory_backend_get_host_nodes(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + uint16List *host_nodes = NULL; + uint16List **node = &host_nodes; + unsigned long value; + + value = find_first_bit(backend->host_nodes, MAX_NODES); + if (value == MAX_NODES) { + return; + } + + *node = g_malloc0(sizeof(**node)); + (*node)->value = value; + node = &(*node)->next; + + do { + value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); + if (value == MAX_NODES) { + break; + } + + *node = g_malloc0(sizeof(**node)); + (*node)->value = value; + node = &(*node)->next; + } while (true); + + visit_type_uint16List(v, &host_nodes, name, errp); +} + +static void +host_memory_backend_set_host_nodes(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ +#ifdef CONFIG_NUMA + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + uint16List *l = NULL; + + visit_type_uint16List(v, &l, name, errp); + + while (l) { + bitmap_set(backend->host_nodes, l->value, 1); + l = l->next; + } +#else + error_setg(errp, "NUMA node binding are not supported by this QEMU"); +#endif +} + +static void +host_memory_backend_get_policy(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + int policy = backend->policy; + + visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp); +} + +static void +host_memory_backend_set_policy(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + int policy; + + visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp); + backend->policy = policy; + +#ifndef CONFIG_NUMA + if (policy != HOST_MEM_POLICY_DEFAULT) { + error_setg(errp, "NUMA policies are not supported by this QEMU"); + } +#endif +} + static bool host_memory_backend_get_merge(Object *obj, Error **errp) { HostMemoryBackend *backend = MEMORY_BACKEND(obj); @@ -162,6 +249,12 @@ static void host_memory_backend_init(Object *obj) object_property_add(obj, "size", "int", host_memory_backend_get_size, host_memory_backend_set_size, NULL, NULL, NULL); + object_property_add(obj, "host-nodes", "int", + host_memory_backend_get_host_nodes, + host_memory_backend_set_host_nodes, NULL, NULL, NULL); + object_property_add(obj, "policy", "str", + host_memory_backend_get_policy, + host_memory_backend_set_policy, NULL, NULL, NULL); } static void host_memory_backend_finalize(Object *obj) @@ -204,6 +297,47 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) if (!backend->dump) { qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); } +#ifdef CONFIG_NUMA + unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); + /* lastbit == MAX_NODES means maxnode = 0 */ + unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); + /* ensure policy won't be ignored in case memory is preallocated + * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so + * this doesn't catch hugepage case. */ + unsigned flags = MPOL_MF_STRICT; + + /* check for invalid host-nodes and policies and give more verbose + * error messages than mbind(). */ + if (maxnode && backend->policy == MPOL_DEFAULT) { + error_setg(errp, "host-nodes must be empty for policy default," + " or you should explicitly specify a policy other" + " than default"); + return; + } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { + error_setg(errp, "host-nodes must be set for policy %s", + HostMemPolicy_lookup[backend->policy]); + return; + } + + /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 + * as argument to mbind() due to an old Linux bug (feature?) which + * cuts off the last specified node. This means backend->host_nodes + * must have MAX_NODES+1 bits available. + */ + assert(sizeof(backend->host_nodes) >= + BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); + assert(maxnode <= MAX_NODES); + if (mbind(ptr, sz, backend->policy, + maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { + error_setg_errno(errp, errno, + "cannot bind memory to host NUMA nodes"); + return; + } +#endif + /* Preallocate memory after the NUMA policy has been instantiated. + * This is necessary to guarantee memory is allocated with + * specified NUMA policy in place. + */ if (backend->prealloc) { os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz); } diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h index 4cae673c4b..1ce439415d 100644 --- a/include/sysemu/hostmem.h +++ b/include/sysemu/hostmem.h @@ -12,10 +12,12 @@ #ifndef QEMU_RAM_H #define QEMU_RAM_H +#include "sysemu/sysemu.h" /* for MAX_NODES */ #include "qom/object.h" #include "qapi/error.h" #include "exec/memory.h" #include "qemu/option.h" +#include "qemu/bitmap.h" #define TYPE_MEMORY_BACKEND "memory-backend" #define MEMORY_BACKEND(obj) \ @@ -54,6 +56,8 @@ struct HostMemoryBackend { uint64_t size; bool merge, dump; bool prealloc, force_prealloc; + DECLARE_BITMAP(host_nodes, MAX_NODES + 1); + HostMemPolicy policy; MemoryRegion mr; }; diff --git a/qapi-schema.json b/qapi-schema.json index e05f8ba4f9..10a324a79a 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3134,3 +3134,23 @@ '*cpus': ['uint16'], '*mem': 'size', '*memdev': 'str' }} + +## +# @HostMemPolicy +# +# Host memory policy types +# +# @default: restore default policy, remove any nondefault policy +# +# @preferred: set the preferred host nodes for allocation +# +# @bind: a strict policy that restricts memory allocation to the +# host nodes specified +# +# @interleave: memory allocations are interleaved across the set +# of host nodes specified +# +# Since 2.1 +## +{ 'enum': 'HostMemPolicy', + 'data': [ 'default', 'preferred', 'bind', 'interleave' ] } -- cgit v1.2.3-55-g7522 From 76b5d8507d08db18bc0bd3c75349b357a82ff1a2 Mon Sep 17 00:00:00 2001 From: Hu Tao Date: Mon, 16 Jun 2014 18:05:41 +0800 Subject: qmp: add query-memdev Add qmp command query-memdev to query for information of memory devices Signed-off-by: Hu Tao Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- numa.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ qapi-schema.json | 40 +++++++++++++++++++++++++++ qmp-commands.hx | 38 +++++++++++++++++++++++++ 3 files changed, 162 insertions(+) (limited to 'qapi-schema.json') diff --git a/numa.c b/numa.c index 711f6825cb..eef0717785 100644 --- a/numa.c +++ b/numa.c @@ -34,6 +34,7 @@ #include "qapi/qmp/qerror.h" #include "hw/boards.h" #include "sysemu/hostmem.h" +#include "qmp-commands.h" QemuOptsList qemu_numa_opts = { .name = "numa", @@ -283,3 +284,86 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, addr += size; } } + +static int query_memdev(Object *obj, void *opaque) +{ + MemdevList **list = opaque; + Error *err = NULL; + + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { + MemdevList *m = g_malloc0(sizeof(*m)); + + m->value = g_malloc0(sizeof(*m->value)); + + m->value->size = object_property_get_int(obj, "size", + &err); + if (err) { + goto error; + } + + m->value->merge = object_property_get_bool(obj, "merge", + &err); + if (err) { + goto error; + } + + m->value->dump = object_property_get_bool(obj, "dump", + &err); + if (err) { + goto error; + } + + m->value->prealloc = object_property_get_bool(obj, + "prealloc", &err); + if (err) { + goto error; + } + + m->value->policy = object_property_get_enum(obj, + "policy", + HostMemPolicy_lookup, + &err); + if (err) { + goto error; + } + + object_property_get_uint16List(obj, "host-nodes", + &m->value->host_nodes, &err); + if (err) { + goto error; + } + + m->next = *list; + *list = m; + } + + return 0; +error: + return -1; +} + +MemdevList *qmp_query_memdev(Error **errp) +{ + Object *obj; + MemdevList *list = NULL, *m; + + obj = object_resolve_path("/objects", NULL); + if (obj == NULL) { + return NULL; + } + + if (object_child_foreach(obj, query_memdev, &list) != 0) { + goto error; + } + + return list; + +error: + while (list) { + m = list; + list = list->next; + g_free(m->value); + g_free(m); + } + return NULL; +} diff --git a/qapi-schema.json b/qapi-schema.json index 10a324a79a..082bc3ab4e 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3154,3 +3154,43 @@ ## { 'enum': 'HostMemPolicy', 'data': [ 'default', 'preferred', 'bind', 'interleave' ] } + +## +# @Memdev: +# +# Information of memory device +# +# @size: memory device size +# +# @merge: enables or disables memory merge support +# +# @dump: includes memory device's memory in a core dump or not +# +# @prealloc: enables or disables memory preallocation +# +# @host-nodes: host nodes for its memory policy +# +# @policy: memory policy of memory device +# +# Since: 2.1 +## + +{ 'type': 'Memdev', + 'data': { + 'size': 'size', + 'merge': 'bool', + 'dump': 'bool', + 'prealloc': 'bool', + 'host-nodes': ['uint16'], + 'policy': 'HostMemPolicy' }} + +## +# @query-memdev: +# +# Returns information for all memory devices. +# +# Returns: a list of @Memdev. +# +# Since: 2.1 +## +{ 'command': 'query-memdev', 'returns': ['Memdev'] } diff --git a/qmp-commands.hx b/qmp-commands.hx index d6bb0f483f..d99f23591a 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -3573,4 +3573,42 @@ Example: } } } ] } +EQMP + + { + .name = "query-memdev", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_memdev, + }, + +SQMP +query-memdev +------------ + +Show memory devices information. + + +Example (1): + +-> { "execute": "query-memdev" } +<- { "return": [ + { + "size": 536870912, + "merge": false, + "dump": true, + "prealloc": false, + "host-nodes": [0, 1], + "policy": "bind" + }, + { + "size": 536870912, + "merge": false, + "dump": true, + "prealloc": true, + "host-nodes": [2, 3], + "policy": "preferred" + } + ] + } + EQMP -- cgit v1.2.3-55-g7522 From 6f2e27301d8cfea0a40dda7ec8a6dc1b9d0e228e Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 16 Jun 2014 19:12:25 +0200 Subject: qmp: add query-memory-devices command ... allowing to get state of present memory devices. Currently implemented only for PCDIMMDevice. Signed-off-by: Igor Mammedov Reviewed-by: Eric Blake Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/pc-dimm.c | 39 +++++++++++++++++++++++++++++++ include/hw/mem/pc-dimm.h | 2 ++ qapi-schema.json | 51 +++++++++++++++++++++++++++++++++++++++++ qmp-commands.hx | 27 ++++++++++++++++++++++ qmp.c | 11 +++++++++ stubs/Makefile.objs | 1 + stubs/qmp_pc_dimm_device_list.c | 7 ++++++ 7 files changed, 138 insertions(+) create mode 100644 stubs/qmp_pc_dimm_device_list.c (limited to 'qapi-schema.json') diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index 8c2656821a..ad176b700b 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -23,6 +23,45 @@ #include "qapi/visitor.h" #include "qemu/range.h" +int qmp_pc_dimm_device_list(Object *obj, void *opaque) +{ + MemoryDeviceInfoList ***prev = opaque; + + if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { + DeviceState *dev = DEVICE(obj); + + if (dev->realized) { + MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1); + MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1); + PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1); + DeviceClass *dc = DEVICE_GET_CLASS(obj); + PCDIMMDevice *dimm = PC_DIMM(obj); + + if (dev->id) { + di->has_id = true; + di->id = g_strdup(dev->id); + } + di->hotplugged = dev->hotplugged; + di->hotpluggable = dc->hotpluggable; + di->addr = dimm->addr; + di->slot = dimm->slot; + di->node = dimm->node; + di->size = object_property_get_int(OBJECT(dimm), PC_DIMM_SIZE_PROP, + NULL); + di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem)); + + info->dimm = di; + elem->value = info; + elem->next = NULL; + **prev = elem; + *prev = &elem->next; + } + } + + object_child_foreach(obj, qmp_pc_dimm_device_list, opaque); + return 0; +} + static int pc_dimm_slot2bitmap(Object *obj, void *opaque) { unsigned long *bitmap = opaque; diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h index 0f4a6ba1e4..761eeef801 100644 --- a/include/hw/mem/pc-dimm.h +++ b/include/hw/mem/pc-dimm.h @@ -76,4 +76,6 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, Error **errp); int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp); + +int qmp_pc_dimm_device_list(Object *obj, void *opaque); #endif diff --git a/qapi-schema.json b/qapi-schema.json index 082bc3ab4e..d51a208c05 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3194,3 +3194,54 @@ # Since: 2.1 ## { 'command': 'query-memdev', 'returns': ['Memdev'] } +# @PCDIMMDeviceInfo: +# +# PCDIMMDevice state information +# +# @id: #optional device's ID +# +# @addr: physical address, where device is mapped +# +# @size: size of memory that the device provides +# +# @slot: slot number at which device is plugged in +# +# @node: NUMA node number where device is plugged in +# +# @memdev: memory backend linked with device +# +# @hotplugged: true if device was hotplugged +# +# @hotpluggable: true if device if could be added/removed while machine is running +# +# Since: 2.1 +## +{ 'type': 'PCDIMMDeviceInfo', + 'data': { '*id': 'str', + 'addr': 'int', + 'size': 'int', + 'slot': 'int', + 'node': 'int', + 'memdev': 'str', + 'hotplugged': 'bool', + 'hotpluggable': 'bool' + } +} + +## +# @MemoryDeviceInfo: +# +# Union containing information about a memory device +# +# Since: 2.1 +## +{ 'union': 'MemoryDeviceInfo', 'data': {'dimm': 'PCDIMMDeviceInfo'} } + +## +# @query-memory-devices +# +# Lists available memory devices and their state +# +# Since: 2.1 +## +{ 'command': 'query-memory-devices', 'returns': ['MemoryDeviceInfo'] } diff --git a/qmp-commands.hx b/qmp-commands.hx index e47c3ea900..81054d0b1b 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -3611,4 +3611,31 @@ Example (1): ] } +EQMP + + { + .name = "query-memory-devices", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_memory_devices, + }, + +SQMP +@query-memory-devices +-------------------- + +Return a list of memory devices. + +Example: +-> { "execute": "query-memory-devices" } +<- { "return": [ { "data": + { "addr": 5368709120, + "hotpluggable": true, + "hotplugged": true, + "id": "d1", + "memdev": "/objects/memX", + "node": 0, + "size": 1073741824, + "slot": 0}, + "type": "dimm" + } ] } EQMP diff --git a/qmp.c b/qmp.c index c3c0229cdf..835fd78851 100644 --- a/qmp.c +++ b/qmp.c @@ -28,6 +28,7 @@ #include "qapi/qmp-input-visitor.h" #include "hw/boards.h" #include "qom/object_interfaces.h" +#include "hw/mem/pc-dimm.h" NameInfo *qmp_query_name(Error **errp) { @@ -628,3 +629,13 @@ void qmp_object_del(const char *id, Error **errp) } object_unparent(obj); } + +MemoryDeviceInfoList *qmp_query_memory_devices(Error **errp) +{ + MemoryDeviceInfoList *head = NULL; + MemoryDeviceInfoList **prev = &head; + + qmp_pc_dimm_device_list(qdev_get_machine(), &prev); + + return head; +} diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index 5a0b9174e0..997d68d5b9 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -37,3 +37,4 @@ stub-obj-y += vmstate.o stub-obj-$(CONFIG_WIN32) += fd-register.o stub-obj-y += cpus.o stub-obj-y += kvm.o +stub-obj-y += qmp_pc_dimm_device_list.o diff --git a/stubs/qmp_pc_dimm_device_list.c b/stubs/qmp_pc_dimm_device_list.c new file mode 100644 index 0000000000..5cb220c66c --- /dev/null +++ b/stubs/qmp_pc_dimm_device_list.c @@ -0,0 +1,7 @@ +#include "qom/object.h" +#include "hw/mem/pc-dimm.h" + +int qmp_pc_dimm_device_list(Object *obj, void *opaque) +{ + return 0; +} -- cgit v1.2.3-55-g7522 From 521b3673ac16ec7fa33b1ec2cdfe75ec708f073e Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 16 Jun 2014 19:12:26 +0200 Subject: acpi: introduce TYPE_ACPI_DEVICE_IF interface ... it will be used to abstract generic ACPI bits from device that implements ACPI interface. ACPIOSTInfo type is used for passing-through raw _OST event/status codes reported by guest OS to a management layer. It lets management tools interpret values as specified by ACPI spec if it is interested in it. QEMU doesn't encode these values as enum, since it doesn't need to handle them and it allows interface to scale well without any changes in QEMU while guest OS and management evolves in time. Signed-off-by: Igor Mammedov Reviewed-by: Eric Blake Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/Makefile.objs | 1 + hw/acpi/acpi_interface.c | 15 +++++++++++++ include/hw/acpi/acpi_dev_interface.h | 43 ++++++++++++++++++++++++++++++++++++ qapi-schema.json | 31 ++++++++++++++++++++++++++ 4 files changed, 90 insertions(+) create mode 100644 hw/acpi/acpi_interface.c create mode 100644 include/hw/acpi/acpi_dev_interface.h (limited to 'qapi-schema.json') diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs index 004e1b2090..acd2389431 100644 --- a/hw/acpi/Makefile.objs +++ b/hw/acpi/Makefile.objs @@ -1,2 +1,3 @@ common-obj-$(CONFIG_ACPI) += core.o piix4.o ich9.o pcihp.o cpu_hotplug.o common-obj-$(CONFIG_ACPI) += memory_hotplug.o +common-obj-$(CONFIG_ACPI) += acpi_interface.o diff --git a/hw/acpi/acpi_interface.c b/hw/acpi/acpi_interface.c new file mode 100644 index 0000000000..c181bb2262 --- /dev/null +++ b/hw/acpi/acpi_interface.c @@ -0,0 +1,15 @@ +#include "hw/acpi/acpi_dev_interface.h" +#include "qemu/module.h" + +static void register_types(void) +{ + static const TypeInfo acpi_dev_if_info = { + .name = TYPE_ACPI_DEVICE_IF, + .parent = TYPE_INTERFACE, + .class_size = sizeof(AcpiDeviceIfClass), + }; + + type_register_static(&acpi_dev_if_info); +} + +type_init(register_types) diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h new file mode 100644 index 0000000000..f245f8d236 --- /dev/null +++ b/include/hw/acpi/acpi_dev_interface.h @@ -0,0 +1,43 @@ +#ifndef ACPI_DEV_INTERFACE_H +#define ACPI_DEV_INTERFACE_H + +#include "qom/object.h" +#include "qapi-types.h" + +#define TYPE_ACPI_DEVICE_IF "acpi-device-interface" + +#define ACPI_DEVICE_IF_CLASS(klass) \ + OBJECT_CLASS_CHECK(AcpiDeviceIfClass, (klass), \ + TYPE_ACPI_DEVICE_IF) +#define ACPI_DEVICE_IF_GET_CLASS(obj) \ + OBJECT_GET_CLASS(AcpiDeviceIfClass, (obj), \ + TYPE_ACPI_DEVICE_IF) +#define ACPI_DEVICE_IF(obj) \ + INTERFACE_CHECK(AcpiDeviceIf, (obj), \ + TYPE_ACPI_DEVICE_IF) + + +typedef struct AcpiDeviceIf { + /* */ + Object Parent; +} AcpiDeviceIf; + +/** + * AcpiDeviceIfClass: + * + * ospm_status: returns status of ACPI device objects, reported + * via _OST method if device supports it. + * + * Interface is designed for providing unified interface + * to generic ACPI functionality that could be used without + * knowledge about internals of actual device that implements + * ACPI interface. + */ +typedef struct AcpiDeviceIfClass { + /* */ + InterfaceClass parent_class; + + /* */ + void (*ospm_status)(AcpiDeviceIf *adev, ACPIOSTInfoList ***list); +} AcpiDeviceIfClass; +#endif diff --git a/qapi-schema.json b/qapi-schema.json index d51a208c05..ff30ace82b 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3245,3 +3245,34 @@ # Since: 2.1 ## { 'command': 'query-memory-devices', 'returns': ['MemoryDeviceInfo'] } + +## @ACPISlotType +# +# @DIMM: memory slot +# +{ 'enum': 'ACPISlotType', 'data': [ 'DIMM' ] } + +## @ACPIOSTInfo +# +# OSPM Status Indication for a device +# For description of possible values of @source and @status fields +# see "_OST (OSPM Status Indication)" chapter of ACPI5.0 spec. +# +# @device: #optional device ID associated with slot +# +# @slot: slot ID, unique per slot of a given @slot-type +# +# @slot-type: type of the slot +# +# @source: an integer containing the source event +# +# @status: an integer containing the status code +# +# Since: 2.1 +## +{ 'type': 'ACPIOSTInfo', + 'data' : { '*device': 'str', + 'slot': 'str', + 'slot-type': 'ACPISlotType', + 'source': 'int', + 'status': 'int' } } -- cgit v1.2.3-55-g7522 From 02419bcb3f896fc42b50b2b04e2938365b8f7350 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 16 Jun 2014 19:12:28 +0200 Subject: qmp: add query-acpi-ospm-status command ... to get ACPI OSPM status reported by ACPI devices via _OST method. Signed-off-by: Igor Mammedov Reviewed-by: Eric Blake Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- qapi-schema.json | 10 ++++++++++ qmp-commands.hx | 22 ++++++++++++++++++++++ qmp.c | 20 ++++++++++++++++++++ 3 files changed, 52 insertions(+) (limited to 'qapi-schema.json') diff --git a/qapi-schema.json b/qapi-schema.json index ff30ace82b..98350048f6 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3276,3 +3276,13 @@ 'slot-type': 'ACPISlotType', 'source': 'int', 'status': 'int' } } + +## +# @query-acpi-ospm-status +# +# Lists ACPI OSPM status of ACPI device objects, +# which might be reported via _OST method +# +# Since: 2.1 +## +{ 'command': 'query-acpi-ospm-status', 'returns': ['ACPIOSTInfo'] } diff --git a/qmp-commands.hx b/qmp-commands.hx index 81054d0b1b..e4a1c80434 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -3639,3 +3639,25 @@ Example: "type": "dimm" } ] } EQMP + + { + .name = "query-acpi-ospm-status", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_acpi_ospm_status, + }, + +SQMP +@query-acpi-ospm-status +-------------------- + +Return list of ACPIOSTInfo for devices that support status reporting +via ACPI _OST method. + +Example: +-> { "execute": "query-acpi-ospm-status" } +<- { "return": [ { "device": "d1", "slot": "0", "slot-type": "DIMM", "source": 1, "status": 0}, + { "slot": "1", "slot-type": "DIMM", "source": 0, "status": 0}, + { "slot": "2", "slot-type": "DIMM", "source": 0, "status": 0}, + { "slot": "3", "slot-type": "DIMM", "source": 0, "status": 0} + ]} +EQMP diff --git a/qmp.c b/qmp.c index 835fd78851..dca6efb7b8 100644 --- a/qmp.c +++ b/qmp.c @@ -29,6 +29,7 @@ #include "hw/boards.h" #include "qom/object_interfaces.h" #include "hw/mem/pc-dimm.h" +#include "hw/acpi/acpi_dev_interface.h" NameInfo *qmp_query_name(Error **errp) { @@ -639,3 +640,22 @@ MemoryDeviceInfoList *qmp_query_memory_devices(Error **errp) return head; } + +ACPIOSTInfoList *qmp_query_acpi_ospm_status(Error **errp) +{ + bool ambig; + ACPIOSTInfoList *head = NULL; + ACPIOSTInfoList **prev = &head; + Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, &ambig); + + if (obj) { + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj); + AcpiDeviceIf *adev = ACPI_DEVICE_IF(obj); + + adevc->ospm_status(adev, &prev); + } else { + error_setg(errp, "command is not supported, missing ACPI device"); + } + + return head; +} -- cgit v1.2.3-55-g7522