From 127833eeea798b0863806341893cf575d2d29cf2 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 7 Mar 2018 22:25:40 -0500 Subject: virtio-net: use 64-bit values for feature flags In prepartion for using some of the high order feature bits, make sure that virtio-net uses 64-bit values everywhere. Signed-off-by: Jason Baron Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: virtio-dev@lists.oasis-open.org Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/virtio-net.c | 55 +++++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 27 deletions(-) (limited to 'hw') diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 188744e17d..ab06f933bc 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -48,18 +48,18 @@ (offsetof(container, field) + sizeof(((container *)0)->field)) typedef struct VirtIOFeature { - uint32_t flags; + uint64_t flags; size_t end; } VirtIOFeature; static VirtIOFeature feature_sizes[] = { - {.flags = 1 << VIRTIO_NET_F_MAC, + {.flags = 1ULL << VIRTIO_NET_F_MAC, .end = endof(struct virtio_net_config, mac)}, - {.flags = 1 << VIRTIO_NET_F_STATUS, + {.flags = 1ULL << VIRTIO_NET_F_STATUS, .end = endof(struct virtio_net_config, status)}, - {.flags = 1 << VIRTIO_NET_F_MQ, + {.flags = 1ULL << VIRTIO_NET_F_MQ, .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, - {.flags = 1 << VIRTIO_NET_F_MTU, + {.flags = 1ULL << VIRTIO_NET_F_MTU, .end = endof(struct virtio_net_config, mtu)}, {} }; @@ -1938,7 +1938,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) int i; if (n->net_conf.mtu) { - n->host_features |= (0x1 << VIRTIO_NET_F_MTU); + n->host_features |= (1ULL << VIRTIO_NET_F_MTU); } virtio_net_set_config_size(n, n->host_features); @@ -2109,45 +2109,46 @@ static const VMStateDescription vmstate_virtio_net = { }; static Property virtio_net_properties[] = { - DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true), - DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features, + DEFINE_PROP_BIT64("csum", VirtIONet, host_features, + VIRTIO_NET_F_CSUM, true), + DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, VIRTIO_NET_F_GUEST_CSUM, true), - DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), - DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features, + DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), + DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, VIRTIO_NET_F_GUEST_TSO4, true), - DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features, + DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, VIRTIO_NET_F_GUEST_TSO6, true), - DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features, + DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, VIRTIO_NET_F_GUEST_ECN, true), - DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features, + DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, VIRTIO_NET_F_GUEST_UFO, true), - DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features, + DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, VIRTIO_NET_F_GUEST_ANNOUNCE, true), - DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features, + DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, VIRTIO_NET_F_HOST_TSO4, true), - DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features, + DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, VIRTIO_NET_F_HOST_TSO6, true), - DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features, + DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, VIRTIO_NET_F_HOST_ECN, true), - DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features, + DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, VIRTIO_NET_F_HOST_UFO, true), - DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features, + DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, VIRTIO_NET_F_MRG_RXBUF, true), - DEFINE_PROP_BIT("status", VirtIONet, host_features, + DEFINE_PROP_BIT64("status", VirtIONet, host_features, VIRTIO_NET_F_STATUS, true), - DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features, + DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, VIRTIO_NET_F_CTRL_VQ, true), - DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features, + DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, VIRTIO_NET_F_CTRL_RX, true), - DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features, + DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, VIRTIO_NET_F_CTRL_VLAN, true), - DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features, + DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, VIRTIO_NET_F_CTRL_RX_EXTRA, true), - DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features, + DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, VIRTIO_NET_F_CTRL_MAC_ADDR, true), - DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features, + DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), - DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), + DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, TX_TIMER_INTERVAL), -- cgit v1.2.3-55-g7522 From 9473939ed7addcaaeb8fde5c093918fb7fa0919c Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 7 Mar 2018 22:25:41 -0500 Subject: virtio-net: add linkspeed and duplex settings to virtio-net Although linkspeed and duplex can be set in a linux guest via 'ethtool -s', this requires custom ethtool commands for virtio-net by default. Introduce a new feature flag, VIRTIO_NET_F_SPEED_DUPLEX, which allows the hypervisor to export a linkspeed and duplex setting. The user can subsequently overwrite it later if desired via: 'ethtool -s'. Linkspeed and duplex settings can be set as: '-device virtio-net,speed=10000,duplex=full' where speed is [0...INT_MAX], and duplex is ["half"|"full"]. Signed-off-by: Jason Baron Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: virtio-dev@lists.oasis-open.org Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/virtio-net.c | 26 ++++++++++++++++++++++++++ include/hw/virtio/virtio-net.h | 3 +++ 2 files changed, 29 insertions(+) (limited to 'hw') diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index ab06f933bc..67ad38cfe4 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -26,6 +26,7 @@ #include "qapi/qapi-events-net.h" #include "hw/virtio/virtio-access.h" #include "migration/misc.h" +#include "standard-headers/linux/ethtool.h" #define VIRTIO_NET_VM_VERSION 11 @@ -61,6 +62,8 @@ static VirtIOFeature feature_sizes[] = { .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, {.flags = 1ULL << VIRTIO_NET_F_MTU, .end = endof(struct virtio_net_config, mtu)}, + {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, + .end = endof(struct virtio_net_config, duplex)}, {} }; @@ -89,6 +92,8 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues); virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); memcpy(netcfg.mac, n->mac, ETH_ALEN); + virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); + netcfg.duplex = n->net_conf.duplex; memcpy(config, &netcfg, n->config_size); } @@ -1941,6 +1946,25 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) n->host_features |= (1ULL << VIRTIO_NET_F_MTU); } + if (n->net_conf.duplex_str) { + if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { + n->net_conf.duplex = DUPLEX_HALF; + } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { + n->net_conf.duplex = DUPLEX_FULL; + } else { + error_setg(errp, "'duplex' must be 'half' or 'full'"); + } + n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); + } else { + n->net_conf.duplex = DUPLEX_UNKNOWN; + } + + if (n->net_conf.speed < SPEED_UNKNOWN) { + error_setg(errp, "'speed' must be between 0 and INT_MAX"); + } else if (n->net_conf.speed >= 0) { + n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); + } + virtio_net_set_config_size(n, n->host_features); virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); @@ -2161,6 +2185,8 @@ static Property virtio_net_properties[] = { DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, true), + DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), + DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index e7634c9044..02484dc94c 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -38,6 +38,9 @@ typedef struct virtio_net_conf uint16_t rx_queue_size; uint16_t tx_queue_size; uint16_t mtu; + int32_t speed; + char *duplex_str; + uint8_t duplex; } virtio_net_conf; /* Maximum packet size we can receive from tap device: header + 64k */ -- cgit v1.2.3-55-g7522 From 697155cdf13f63001ded45664f3ca093630f1f69 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 28 Feb 2018 15:23:47 +0100 Subject: pc: replace pm object initialization with one-liner in acpi_get_pm_info() next patch will need it before it gets to piix4/lpc branches that initializes 'obj' now. Signed-off-by: Igor Mammedov Reviewed-by: Eric Auger Reviewed-by: Philippe Mathieu-Daudé Tested-by: Eric Auger Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index deb440f286..b85fefe931 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -128,7 +128,7 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) { Object *piix = piix4_pm_find(); Object *lpc = ich9_lpc_find(); - Object *obj = NULL; + Object *obj = piix ? piix : lpc; QObject *o; pm->force_rev1_fadt = false; @@ -138,7 +138,6 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) if (piix) { /* w2k requires FADT(rev1) or it won't boot, keep PC compatible */ pm->force_rev1_fadt = true; - obj = piix; pm->cpu_hp_io_base = PIIX4_CPU_HOTPLUG_IO_BASE; pm->pcihp_io_base = object_property_get_uint(obj, ACPI_PCIHP_IO_BASE_PROP, NULL); @@ -146,7 +145,6 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) object_property_get_uint(obj, ACPI_PCIHP_IO_LEN_PROP, NULL); } if (lpc) { - obj = lpc; pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; } assert(obj); -- cgit v1.2.3-55-g7522 From d0384d9020f1f52f90e5e5b1b6c9c04aac5f6756 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 28 Feb 2018 15:23:49 +0100 Subject: acpi: add build_append_gas() helper for Generic Address Structure it will help to add Generic Address Structure to ACPI tables without using packed C structures and avoid endianness issues as API doesn't need an explicit conversion. Signed-off-by: Igor Mammedov Reviewed-by: Eric Auger Tested-by: Eric Auger Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/aml-build.c | 16 ++++++++++++++++ include/hw/acpi/aml-build.h | 20 ++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'hw') diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 36a6cc450e..3fef5f6abf 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -258,6 +258,22 @@ static void build_append_int(GArray *table, uint64_t value) } } +/* Generic Address Structure (GAS) + * ACPI 2.0/3.0: 5.2.3.1 Generic Address Structure + * 2.0 compat note: + * @access_width must be 0, see ACPI 2.0:Table 5-1 + */ +void build_append_gas(GArray *table, AmlAddressSpace as, + uint8_t bit_width, uint8_t bit_offset, + uint8_t access_width, uint64_t address) +{ + build_append_int_noprefix(table, as, 1); + build_append_int_noprefix(table, bit_width, 1); + build_append_int_noprefix(table, bit_offset, 1); + build_append_int_noprefix(table, access_width, 1); + build_append_int_noprefix(table, address, 8); +} + /* * Build NAME(XXXX, 0x00000000) where 0x00000000 is encoded as a dword, * and return the offset to 0x00000000 for runtime patching. diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index 88d0738d76..8692ccc047 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -77,6 +77,15 @@ typedef enum { AML_WRITE_AS_ZEROS = 2, } AmlUpdateRule; +typedef enum { + AML_AS_SYSTEM_MEMORY = 0X00, + AML_AS_SYSTEM_IO = 0X01, + AML_AS_PCI_CONFIG = 0X02, + AML_AS_EMBEDDED_CTRL = 0X03, + AML_AS_SMBUS = 0X04, + AML_AS_FFH = 0X7F, +} AmlAddressSpace; + typedef enum { AML_SYSTEM_MEMORY = 0X00, AML_SYSTEM_IO = 0X01, @@ -389,6 +398,17 @@ int build_append_named_dword(GArray *array, const char *name_format, ...) GCC_FMT_ATTR(2, 3); +void build_append_gas(GArray *table, AmlAddressSpace as, + uint8_t bit_width, uint8_t bit_offset, + uint8_t access_width, uint64_t address); + +static inline void +build_append_gas_from_struct(GArray *table, const struct AcpiGenericAddress *s) +{ + build_append_gas(table, s->space_id, s->bit_width, s->bit_offset, + s->access_width, s->address); +} + void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, uint64_t len, int node, MemoryAffinityFlags flags); -- cgit v1.2.3-55-g7522 From f8eaef67a3946cd483cbb77c50cf2ded55c0ddde Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 28 Feb 2018 15:23:50 +0100 Subject: acpi: move ACPI_PORT_SMI_CMD define to header it belongs to ACPI_PORT_SMI_CMD is alias for APM_CNT_IOPORT, so make it really one instead of duplicating its value. Signed-off-by: Igor Mammedov Reviewed-by: Eric Auger Tested-by: Eric Auger Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 2 -- hw/isa/apm.c | 1 - include/hw/isa/apm.h | 3 +++ 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index b85fefe931..699f3a005f 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -255,8 +255,6 @@ static void acpi_get_pci_holes(Range *hole, Range *hole64) NULL)); } -#define ACPI_PORT_SMI_CMD 0x00b2 /* TODO: this is APM_CNT_IOPORT */ - static void acpi_align_size(GArray *blob, unsigned align) { /* Align size to multiple of given size. This reduces the chance diff --git a/hw/isa/apm.c b/hw/isa/apm.c index e232b0da03..c3101ef52f 100644 --- a/hw/isa/apm.c +++ b/hw/isa/apm.c @@ -34,7 +34,6 @@ #endif /* fixed I/O location */ -#define APM_CNT_IOPORT 0xb2 #define APM_STS_IOPORT 0xb3 static void apm_ioport_writeb(void *opaque, hwaddr addr, uint64_t val, diff --git a/include/hw/isa/apm.h b/include/hw/isa/apm.h index 4839ff1df2..b7098bf7ca 100644 --- a/include/hw/isa/apm.h +++ b/include/hw/isa/apm.h @@ -5,6 +5,9 @@ #include "hw/hw.h" #include "exec/memory.h" +#define APM_CNT_IOPORT 0xb2 +#define ACPI_PORT_SMI_CMD APM_CNT_IOPORT + typedef void (*apm_ctrl_changed_t)(uint32_t val, void *arg); typedef struct APMState { -- cgit v1.2.3-55-g7522 From 937d1b58714b4002dd0fb7c3573c42b1c017e61b Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 28 Feb 2018 15:23:51 +0100 Subject: pc: acpi: isolate FADT specific data into AcpiFadtData structure move FADT data initialization out of fadt_setup() into dedicated init_fadt_data() that will set common for pc/q35 values in AcpiFadtData structure and acpi_get_pm_info() will complement it with pc/q35 specific values initialization. That will allow to get rid of fadt_setup() and generalize build_fadt() so it could be easily extended for rev5 and reused by ARM target. While at it also move facs/dsdt/xdsdt offsets from build_fadt() arg list into AcpiFadtData, as they belong to the same dataset. Signed-off-by: Igor Mammedov Reviewed-by: Eric Auger Tested-by: Eric Auger Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 190 ++++++++++++++++++++++++-------------------- include/hw/acpi/acpi-defs.h | 28 +++++++ 2 files changed, 130 insertions(+), 88 deletions(-) (limited to 'hw') diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 699f3a005f..1f88ed1a11 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -91,17 +91,11 @@ typedef struct AcpiMcfgInfo { } AcpiMcfgInfo; typedef struct AcpiPmInfo { - bool force_rev1_fadt; bool s3_disabled; bool s4_disabled; bool pcihp_bridge_en; uint8_t s4_val; - uint16_t sci_int; - uint8_t acpi_enable_cmd; - uint8_t acpi_disable_cmd; - uint32_t gpe0_blk; - uint32_t gpe0_blk_len; - uint32_t io_base; + AcpiFadtData fadt; uint16_t cpu_hp_io_base; uint16_t pcihp_io_base; uint16_t pcihp_io_len; @@ -124,20 +118,59 @@ typedef struct AcpiBuildPciBusHotplugState { bool pcihp_bridge_en; } AcpiBuildPciBusHotplugState; +static void init_common_fadt_data(Object *o, AcpiFadtData *data) +{ + uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL); + AmlAddressSpace as = AML_AS_SYSTEM_IO; + AcpiFadtData fadt = { + .rev = 3, + .flags = + (1 << ACPI_FADT_F_WBINVD) | + (1 << ACPI_FADT_F_PROC_C1) | + (1 << ACPI_FADT_F_SLP_BUTTON) | + (1 << ACPI_FADT_F_RTC_S4) | + (1 << ACPI_FADT_F_USE_PLATFORM_CLOCK) | + /* APIC destination mode ("Flat Logical") has an upper limit of 8 + * CPUs for more than 8 CPUs, "Clustered Logical" mode has to be + * used + */ + ((max_cpus > 8) ? (1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL) : 0), + .int_model = 1 /* Multiple APIC */, + .rtc_century = RTC_CENTURY, + .plvl2_lat = 0xfff /* C2 state not supported */, + .plvl3_lat = 0xfff /* C3 state not supported */, + .smi_cmd = ACPI_PORT_SMI_CMD, + .sci_int = object_property_get_uint(o, ACPI_PM_PROP_SCI_INT, NULL), + .acpi_enable_cmd = + object_property_get_uint(o, ACPI_PM_PROP_ACPI_ENABLE_CMD, NULL), + .acpi_disable_cmd = + object_property_get_uint(o, ACPI_PM_PROP_ACPI_DISABLE_CMD, NULL), + .pm1a_evt = { .space_id = as, .bit_width = 4 * 8, .address = io }, + .pm1a_cnt = { .space_id = as, .bit_width = 2 * 8, + .address = io + 0x04 }, + .pm_tmr = { .space_id = as, .bit_width = 4 * 8, .address = io + 0x08 }, + .gpe0_blk = { .space_id = as, .bit_width = + object_property_get_uint(o, ACPI_PM_PROP_GPE0_BLK_LEN, NULL) * 8, + .address = object_property_get_uint(o, ACPI_PM_PROP_GPE0_BLK, NULL) + }, + }; + *data = fadt; +} + static void acpi_get_pm_info(AcpiPmInfo *pm) { Object *piix = piix4_pm_find(); Object *lpc = ich9_lpc_find(); Object *obj = piix ? piix : lpc; QObject *o; - - pm->force_rev1_fadt = false; pm->cpu_hp_io_base = 0; pm->pcihp_io_base = 0; pm->pcihp_io_len = 0; + + init_common_fadt_data(obj, &pm->fadt); if (piix) { /* w2k requires FADT(rev1) or it won't boot, keep PC compatible */ - pm->force_rev1_fadt = true; + pm->fadt.rev = 1; pm->cpu_hp_io_base = PIIX4_CPU_HOTPLUG_IO_BASE; pm->pcihp_io_base = object_property_get_uint(obj, ACPI_PCIHP_IO_BASE_PROP, NULL); @@ -145,10 +178,19 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) object_property_get_uint(obj, ACPI_PCIHP_IO_LEN_PROP, NULL); } if (lpc) { + struct AcpiGenericAddress r = { .space_id = AML_AS_SYSTEM_IO, + .bit_width = 8, .address = ICH9_RST_CNT_IOPORT }; + pm->fadt.reset_reg = r; + pm->fadt.reset_val = 0xf; + pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; } assert(obj); + /* The above need not be conditional on machine type because the reset port + * happens to be the same on PIIX (pc) and ICH9 (q35). */ + QEMU_BUILD_BUG_ON(ICH9_RST_CNT_IOPORT != RCR_IOPORT); + /* Fill in optional s3/s4 related properties */ o = object_property_get_qobject(obj, ACPI_PM_PROP_S3_DISABLED, NULL); if (o) { @@ -172,22 +214,6 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) } qobject_decref(o); - /* Fill in mandatory properties */ - pm->sci_int = object_property_get_uint(obj, ACPI_PM_PROP_SCI_INT, NULL); - - pm->acpi_enable_cmd = object_property_get_uint(obj, - ACPI_PM_PROP_ACPI_ENABLE_CMD, - NULL); - pm->acpi_disable_cmd = - object_property_get_uint(obj, - ACPI_PM_PROP_ACPI_DISABLE_CMD, - NULL); - pm->io_base = object_property_get_uint(obj, ACPI_PM_PROP_PM_IO_BASE, - NULL); - pm->gpe0_blk = object_property_get_uint(obj, ACPI_PM_PROP_GPE0_BLK, - NULL); - pm->gpe0_blk_len = object_property_get_uint(obj, ACPI_PM_PROP_GPE0_BLK_LEN, - NULL); pm->pcihp_bridge_en = object_property_get_bool(obj, "acpi-pci-hotplug-with-bridge-support", NULL); @@ -273,73 +299,53 @@ build_facs(GArray *table_data, BIOSLinker *linker) } /* Load chipset information in FADT */ -static void fadt_setup(AcpiFadtDescriptorRev3 *fadt, AcpiPmInfo *pm) +static void fadt_setup(AcpiFadtDescriptorRev3 *fadt, AcpiFadtData f) { - fadt->model = 1; + fadt->model = f.int_model; fadt->reserved1 = 0; - fadt->sci_int = cpu_to_le16(pm->sci_int); - fadt->smi_cmd = cpu_to_le32(ACPI_PORT_SMI_CMD); - fadt->acpi_enable = pm->acpi_enable_cmd; - fadt->acpi_disable = pm->acpi_disable_cmd; + fadt->sci_int = cpu_to_le16(f.sci_int); + fadt->smi_cmd = cpu_to_le32(f.smi_cmd); + fadt->acpi_enable = f.acpi_enable_cmd; + fadt->acpi_disable = f.acpi_disable_cmd; /* EVT, CNT, TMR offset matches hw/acpi/core.c */ - fadt->pm1a_evt_blk = cpu_to_le32(pm->io_base); - fadt->pm1a_cnt_blk = cpu_to_le32(pm->io_base + 0x04); - fadt->pm_tmr_blk = cpu_to_le32(pm->io_base + 0x08); - fadt->gpe0_blk = cpu_to_le32(pm->gpe0_blk); + fadt->pm1a_evt_blk = cpu_to_le32(f.pm1a_evt.address); + fadt->pm1a_cnt_blk = cpu_to_le32(f.pm1a_cnt.address); + fadt->pm_tmr_blk = cpu_to_le32(f.pm_tmr.address); + fadt->gpe0_blk = cpu_to_le32(f.gpe0_blk.address); /* EVT, CNT, TMR length matches hw/acpi/core.c */ - fadt->pm1_evt_len = 4; - fadt->pm1_cnt_len = 2; - fadt->pm_tmr_len = 4; - fadt->gpe0_blk_len = pm->gpe0_blk_len; - fadt->plvl2_lat = cpu_to_le16(0xfff); /* C2 state not supported */ - fadt->plvl3_lat = cpu_to_le16(0xfff); /* C3 state not supported */ - fadt->flags = cpu_to_le32((1 << ACPI_FADT_F_WBINVD) | - (1 << ACPI_FADT_F_PROC_C1) | - (1 << ACPI_FADT_F_SLP_BUTTON) | - (1 << ACPI_FADT_F_RTC_S4)); - fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_USE_PLATFORM_CLOCK); - /* APIC destination mode ("Flat Logical") has an upper limit of 8 CPUs - * For more than 8 CPUs, "Clustered Logical" mode has to be used - */ - if (max_cpus > 8) { - fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL); - } - fadt->century = RTC_CENTURY; - if (pm->force_rev1_fadt) { + fadt->pm1_evt_len = f.pm1a_evt.bit_width / 8; + fadt->pm1_cnt_len = f.pm1a_cnt.bit_width / 8; + fadt->pm_tmr_len = f.pm_tmr.bit_width / 8; + fadt->gpe0_blk_len = f.gpe0_blk.bit_width / 8; + fadt->plvl2_lat = cpu_to_le16(f.plvl2_lat); + fadt->plvl3_lat = cpu_to_le16(f.plvl3_lat); + fadt->flags = cpu_to_le32(f.flags); + fadt->century = f.rtc_century; + if (f.rev == 1) { return; } - fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_RESET_REG_SUP); - fadt->reset_value = 0xf; - fadt->reset_register.space_id = AML_SYSTEM_IO; - fadt->reset_register.bit_width = 8; - fadt->reset_register.address = cpu_to_le64(ICH9_RST_CNT_IOPORT); - /* The above need not be conditional on machine type because the reset port - * happens to be the same on PIIX (pc) and ICH9 (q35). */ - QEMU_BUILD_BUG_ON(ICH9_RST_CNT_IOPORT != RCR_IOPORT); + fadt->reset_value = f.reset_val; + fadt->reset_register = f.reset_reg; + fadt->reset_register.address = cpu_to_le64(f.reset_reg.address); - fadt->xpm1a_event_block.space_id = AML_SYSTEM_IO; - fadt->xpm1a_event_block.bit_width = fadt->pm1_evt_len * 8; - fadt->xpm1a_event_block.address = cpu_to_le64(pm->io_base); + fadt->xpm1a_event_block = f.pm1a_evt; + fadt->xpm1a_event_block.address = cpu_to_le64(f.pm1a_evt.address); - fadt->xpm1a_control_block.space_id = AML_SYSTEM_IO; - fadt->xpm1a_control_block.bit_width = fadt->pm1_cnt_len * 8; - fadt->xpm1a_control_block.address = cpu_to_le64(pm->io_base + 0x4); + fadt->xpm1a_control_block = f.pm1a_cnt; + fadt->xpm1a_control_block.address = cpu_to_le64(f.pm1a_cnt.address); - fadt->xpm_timer_block.space_id = AML_SYSTEM_IO; - fadt->xpm_timer_block.bit_width = fadt->pm_tmr_len * 8; - fadt->xpm_timer_block.address = cpu_to_le64(pm->io_base + 0x8); + fadt->xpm_timer_block = f.pm_tmr; + fadt->xpm_timer_block.address = cpu_to_le64(f.pm_tmr.address); - fadt->xgpe0_block.space_id = AML_SYSTEM_IO; - fadt->xgpe0_block.bit_width = pm->gpe0_blk_len * 8; - fadt->xgpe0_block.address = cpu_to_le64(pm->gpe0_blk); + fadt->xgpe0_block = f.gpe0_blk; + fadt->xgpe0_block.address = cpu_to_le64(f.gpe0_blk.address); } /* FADT */ static void -build_fadt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm, - unsigned facs_tbl_offset, unsigned dsdt_tbl_offset, +build_fadt(GArray *table_data, BIOSLinker *linker, AcpiFadtData *f, const char *oem_id, const char *oem_table_id) { AcpiFadtDescriptorRev3 *fadt = acpi_data_push(table_data, sizeof(*fadt)); @@ -347,29 +353,29 @@ build_fadt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm, unsigned dsdt_entry_offset = (char *)&fadt->dsdt - table_data->data; unsigned xdsdt_entry_offset = (char *)&fadt->x_dsdt - table_data->data; int fadt_size = sizeof(*fadt); - int rev = 3; /* FACS address to be filled by Guest linker */ bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, fw_ctrl_offset, sizeof(fadt->firmware_ctrl), - ACPI_BUILD_TABLE_FILE, facs_tbl_offset); + ACPI_BUILD_TABLE_FILE, *f->facs_tbl_offset); /* DSDT address to be filled by Guest linker */ - fadt_setup(fadt, pm); + fadt_setup(fadt, *f); bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, dsdt_entry_offset, sizeof(fadt->dsdt), - ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset); - if (pm->force_rev1_fadt) { - rev = 1; + ACPI_BUILD_TABLE_FILE, *f->dsdt_tbl_offset); + + if (f->rev == 1) { fadt_size = offsetof(typeof(*fadt), reset_register); - } else { + } else if (f->xdsdt_tbl_offset) { bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, xdsdt_entry_offset, sizeof(fadt->x_dsdt), - ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset); + ACPI_BUILD_TABLE_FILE, *f->xdsdt_tbl_offset); } build_header(linker, table_data, - (void *)fadt, "FACP", fadt_size, rev, oem_id, oem_table_id); + (void *)fadt, "FACP", fadt_size, f->rev, + oem_id, oem_table_id); } void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, @@ -2049,7 +2055,12 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); crs = aml_resource_template(); aml_append(crs, - aml_io(AML_DECODE16, pm->gpe0_blk, pm->gpe0_blk, 1, pm->gpe0_blk_len) + aml_io( + AML_DECODE16, + pm->fadt.gpe0_blk.address, + pm->fadt.gpe0_blk.address, + 1, + pm->fadt.gpe0_blk.bit_width / 8) ); aml_append(dev, aml_name_decl("_CRS", crs)); aml_append(scope, dev); @@ -2696,7 +2707,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) /* ACPI tables pointed to by RSDT */ fadt = tables_blob->len; acpi_add_table(table_offsets, tables_blob); - build_fadt(tables_blob, tables->linker, &pm, facs, dsdt, + pm.fadt.facs_tbl_offset = &facs; + pm.fadt.dsdt_tbl_offset = &dsdt; + pm.fadt.xdsdt_tbl_offset = &dsdt; + build_fadt(tables_blob, tables->linker, &pm.fadt, slic_oem.id, slic_oem.table_id); aml_len += tables_blob->len - fadt; diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h index 9942bc5757..3fb0ace5f8 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h @@ -175,6 +175,34 @@ struct AcpiFadtDescriptorRev5_1 { typedef struct AcpiFadtDescriptorRev5_1 AcpiFadtDescriptorRev5_1; +typedef struct AcpiFadtData { + struct AcpiGenericAddress pm1a_cnt; /* PM1a_CNT_BLK */ + struct AcpiGenericAddress pm1a_evt; /* PM1a_EVT_BLK */ + struct AcpiGenericAddress pm_tmr; /* PM_TMR_BLK */ + struct AcpiGenericAddress gpe0_blk; /* GPE0_BLK */ + struct AcpiGenericAddress reset_reg; /* RESET_REG */ + uint8_t reset_val; /* RESET_VALUE */ + uint8_t rev; /* Revision */ + uint32_t flags; /* Flags */ + uint32_t smi_cmd; /* SMI_CMD */ + uint16_t sci_int; /* SCI_INT */ + uint8_t int_model; /* INT_MODEL */ + uint8_t acpi_enable_cmd; /* ACPI_ENABLE */ + uint8_t acpi_disable_cmd; /* ACPI_DISABLE */ + uint8_t rtc_century; /* CENTURY */ + uint16_t plvl2_lat; /* P_LVL2_LAT */ + uint16_t plvl3_lat; /* P_LVL3_LAT */ + + /* + * respective tables offsets within ACPI_BUILD_TABLE_FILE, + * NULL if table doesn't exist (in that case field's value + * won't be patched by linker and will be kept set to 0) + */ + unsigned *facs_tbl_offset; /* FACS offset in */ + unsigned *dsdt_tbl_offset; + unsigned *xdsdt_tbl_offset; +} AcpiFadtData; + #define ACPI_FADT_ARM_PSCI_COMPLIANT (1 << 0) #define ACPI_FADT_ARM_PSCI_USE_HVC (1 << 1) -- cgit v1.2.3-55-g7522 From 5d7a334f7c40dcb6aab3e4496ac826e813ccdb07 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 28 Feb 2018 15:23:52 +0100 Subject: pc: acpi: use build_append_foo() API to construct FADT build_append_foo() API doesn't need explicit endianness conversions which eliminates a source of errors and it makes build_fadt() look like declarative definition of FADT table in ACPI spec, which makes it easy to review. Also it allows easily extending FADT to support other revisions which will be used by follow up patches where build_fadt() will be reused for ARM target. Signed-off-by: Igor Mammedov Reviewed-by: Eric Auger Tested-by: Eric Auger Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 146 +++++++++++++++++++++++++++++---------------------- 1 file changed, 84 insertions(+), 62 deletions(-) (limited to 'hw') diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 1f88ed1a11..d1b387e77b 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -298,84 +298,106 @@ build_facs(GArray *table_data, BIOSLinker *linker) facs->length = cpu_to_le32(sizeof(*facs)); } -/* Load chipset information in FADT */ -static void fadt_setup(AcpiFadtDescriptorRev3 *fadt, AcpiFadtData f) -{ - fadt->model = f.int_model; - fadt->reserved1 = 0; - fadt->sci_int = cpu_to_le16(f.sci_int); - fadt->smi_cmd = cpu_to_le32(f.smi_cmd); - fadt->acpi_enable = f.acpi_enable_cmd; - fadt->acpi_disable = f.acpi_disable_cmd; - /* EVT, CNT, TMR offset matches hw/acpi/core.c */ - fadt->pm1a_evt_blk = cpu_to_le32(f.pm1a_evt.address); - fadt->pm1a_cnt_blk = cpu_to_le32(f.pm1a_cnt.address); - fadt->pm_tmr_blk = cpu_to_le32(f.pm_tmr.address); - fadt->gpe0_blk = cpu_to_le32(f.gpe0_blk.address); - /* EVT, CNT, TMR length matches hw/acpi/core.c */ - fadt->pm1_evt_len = f.pm1a_evt.bit_width / 8; - fadt->pm1_cnt_len = f.pm1a_cnt.bit_width / 8; - fadt->pm_tmr_len = f.pm_tmr.bit_width / 8; - fadt->gpe0_blk_len = f.gpe0_blk.bit_width / 8; - fadt->plvl2_lat = cpu_to_le16(f.plvl2_lat); - fadt->plvl3_lat = cpu_to_le16(f.plvl3_lat); - fadt->flags = cpu_to_le32(f.flags); - fadt->century = f.rtc_century; - if (f.rev == 1) { - return; - } - - fadt->reset_value = f.reset_val; - fadt->reset_register = f.reset_reg; - fadt->reset_register.address = cpu_to_le64(f.reset_reg.address); - - fadt->xpm1a_event_block = f.pm1a_evt; - fadt->xpm1a_event_block.address = cpu_to_le64(f.pm1a_evt.address); - - fadt->xpm1a_control_block = f.pm1a_cnt; - fadt->xpm1a_control_block.address = cpu_to_le64(f.pm1a_cnt.address); - - fadt->xpm_timer_block = f.pm_tmr; - fadt->xpm_timer_block.address = cpu_to_le64(f.pm_tmr.address); - - fadt->xgpe0_block = f.gpe0_blk; - fadt->xgpe0_block.address = cpu_to_le64(f.gpe0_blk.address); -} - - /* FADT */ static void -build_fadt(GArray *table_data, BIOSLinker *linker, AcpiFadtData *f, +build_fadt(GArray *tbl, BIOSLinker *linker, AcpiFadtData *f, const char *oem_id, const char *oem_table_id) { - AcpiFadtDescriptorRev3 *fadt = acpi_data_push(table_data, sizeof(*fadt)); - unsigned fw_ctrl_offset = (char *)&fadt->firmware_ctrl - table_data->data; - unsigned dsdt_entry_offset = (char *)&fadt->dsdt - table_data->data; - unsigned xdsdt_entry_offset = (char *)&fadt->x_dsdt - table_data->data; - int fadt_size = sizeof(*fadt); + int off; + int fadt_start = tbl->len; - /* FACS address to be filled by Guest linker */ + acpi_data_push(tbl, sizeof(AcpiTableHeader)); + + /* FACS address to be filled by Guest linker at runtime */ + off = tbl->len; + build_append_int_noprefix(tbl, 0, 4); /* FIRMWARE_CTRL */ bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, fw_ctrl_offset, sizeof(fadt->firmware_ctrl), + ACPI_BUILD_TABLE_FILE, off, 4, ACPI_BUILD_TABLE_FILE, *f->facs_tbl_offset); - /* DSDT address to be filled by Guest linker */ - fadt_setup(fadt, *f); + /* DSDT address to be filled by Guest linker at runtime */ + off = tbl->len; + build_append_int_noprefix(tbl, 0, 4); /* DSDT */ bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, dsdt_entry_offset, sizeof(fadt->dsdt), + ACPI_BUILD_TABLE_FILE, off, 4, ACPI_BUILD_TABLE_FILE, *f->dsdt_tbl_offset); + /* ACPI1.0: INT_MODEL, ACPI2.0+: Reserved */ + build_append_int_noprefix(tbl, f->int_model /* Multiple APIC */, 1); + /* Preferred_PM_Profile */ + build_append_int_noprefix(tbl, 0 /* Unspecified */, 1); + build_append_int_noprefix(tbl, f->sci_int, 2); /* SCI_INT */ + build_append_int_noprefix(tbl, f->smi_cmd, 4); /* SMI_CMD */ + build_append_int_noprefix(tbl, f->acpi_enable_cmd, 1); /* ACPI_ENABLE */ + build_append_int_noprefix(tbl, f->acpi_disable_cmd, 1); /* ACPI_DISABLE */ + build_append_int_noprefix(tbl, 0 /* not supported */, 1); /* S4BIOS_REQ */ + /* ACPI1.0: Reserved, ACPI2.0+: PSTATE_CNT */ + build_append_int_noprefix(tbl, 0, 1); + build_append_int_noprefix(tbl, f->pm1a_evt.address, 4); /* PM1a_EVT_BLK */ + build_append_int_noprefix(tbl, 0, 4); /* PM1b_EVT_BLK */ + build_append_int_noprefix(tbl, f->pm1a_cnt.address, 4); /* PM1a_CNT_BLK */ + build_append_int_noprefix(tbl, 0, 4); /* PM1b_CNT_BLK */ + build_append_int_noprefix(tbl, 0, 4); /* PM2_CNT_BLK */ + build_append_int_noprefix(tbl, f->pm_tmr.address, 4); /* PM_TMR_BLK */ + build_append_int_noprefix(tbl, f->gpe0_blk.address, 4); /* GPE0_BLK */ + build_append_int_noprefix(tbl, 0, 4); /* GPE1_BLK */ + /* PM1_EVT_LEN */ + build_append_int_noprefix(tbl, f->pm1a_evt.bit_width / 8, 1); + /* PM1_CNT_LEN */ + build_append_int_noprefix(tbl, f->pm1a_cnt.bit_width / 8, 1); + build_append_int_noprefix(tbl, 0, 1); /* PM2_CNT_LEN */ + build_append_int_noprefix(tbl, f->pm_tmr.bit_width / 8, 1); /* PM_TMR_LEN */ + /* GPE0_BLK_LEN */ + build_append_int_noprefix(tbl, f->gpe0_blk.bit_width / 8, 1); + build_append_int_noprefix(tbl, 0, 1); /* GPE1_BLK_LEN */ + build_append_int_noprefix(tbl, 0, 1); /* GPE1_BASE */ + build_append_int_noprefix(tbl, 0, 1); /* CST_CNT */ + build_append_int_noprefix(tbl, f->plvl2_lat, 2); /* P_LVL2_LAT */ + build_append_int_noprefix(tbl, f->plvl3_lat, 2); /* P_LVL3_LAT */ + build_append_int_noprefix(tbl, 0, 2); /* FLUSH_SIZE */ + build_append_int_noprefix(tbl, 0, 2); /* FLUSH_STRIDE */ + build_append_int_noprefix(tbl, 0, 1); /* DUTY_OFFSET */ + build_append_int_noprefix(tbl, 0, 1); /* DUTY_WIDTH */ + build_append_int_noprefix(tbl, 0, 1); /* DAY_ALRM */ + build_append_int_noprefix(tbl, 0, 1); /* MON_ALRM */ + build_append_int_noprefix(tbl, f->rtc_century, 1); /* CENTURY */ + build_append_int_noprefix(tbl, 0, 2); /* IAPC_BOOT_ARCH */ + build_append_int_noprefix(tbl, 0, 1); /* Reserved */ + build_append_int_noprefix(tbl, f->flags, 4); /* Flags */ + if (f->rev == 1) { - fadt_size = offsetof(typeof(*fadt), reset_register); - } else if (f->xdsdt_tbl_offset) { + goto build_hdr; + } + + build_append_gas_from_struct(tbl, &f->reset_reg); /* RESET_REG */ + build_append_int_noprefix(tbl, f->reset_val, 1); /* RESET_VALUE */ + build_append_int_noprefix(tbl, 0, 3); /* Reserved, ACPI 3.0 */ + build_append_int_noprefix(tbl, 0, 8); /* X_FIRMWARE_CTRL */ + + /* XDSDT address to be filled by Guest linker at runtime */ + off = tbl->len; + build_append_int_noprefix(tbl, 0, 8); /* X_DSDT */ + if (f->xdsdt_tbl_offset) { bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, xdsdt_entry_offset, sizeof(fadt->x_dsdt), + ACPI_BUILD_TABLE_FILE, off, 8, ACPI_BUILD_TABLE_FILE, *f->xdsdt_tbl_offset); } - build_header(linker, table_data, - (void *)fadt, "FACP", fadt_size, f->rev, - oem_id, oem_table_id); + build_append_gas_from_struct(tbl, &f->pm1a_evt); /* X_PM1a_EVT_BLK */ + /* X_PM1b_EVT_BLK */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + build_append_gas_from_struct(tbl, &f->pm1a_cnt); /* X_PM1a_CNT_BLK */ + /* X_PM1b_CNT_BLK */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + /* X_PM2_CNT_BLK */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + build_append_gas_from_struct(tbl, &f->pm_tmr); /* X_PM_TMR_BLK */ + build_append_gas_from_struct(tbl, &f->gpe0_blk); /* X_GPE0_BLK */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); /* X_GPE1_BLK */ + +build_hdr: + build_header(linker, tbl, (void *)(tbl->data + fadt_start), + "FACP", tbl->len - fadt_start, f->rev, oem_id, oem_table_id); } void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, -- cgit v1.2.3-55-g7522 From 8612f8bd9fb667c62e9a56c67ab2e3af2798dd9c Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 28 Feb 2018 15:23:53 +0100 Subject: acpi: move build_fadt() from i386 specific to generic ACPI source It will be extended and reused by follow up patch for ARM target. PS: Since it's generic function now, don't patch FIRMWARE_CTRL, DSDT fields if they don't point to tables since platform might not provide them and use X_ variants instead if applicable. Signed-off-by: Igor Mammedov Reviewed-by: Eric Auger Tested-by: Eric Auger Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/aml-build.c | 105 ++++++++++++++++++++++++++++++++++++++++++++ hw/arm/virt-acpi-build.c | 6 +-- hw/i386/acpi-build.c | 102 ------------------------------------------ include/hw/acpi/aml-build.h | 3 ++ 4 files changed, 111 insertions(+), 105 deletions(-) (limited to 'hw') diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 3fef5f6abf..8f45298a3a 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -1678,3 +1678,108 @@ void build_slit(GArray *table_data, BIOSLinker *linker) "SLIT", table_data->len - slit_start, 1, NULL, NULL); } + +/* build rev1/rev3 FADT */ +void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, + const char *oem_id, const char *oem_table_id) +{ + int off; + int fadt_start = tbl->len; + + acpi_data_push(tbl, sizeof(AcpiTableHeader)); + + /* FACS address to be filled by Guest linker at runtime */ + off = tbl->len; + build_append_int_noprefix(tbl, 0, 4); /* FIRMWARE_CTRL */ + if (f->facs_tbl_offset) { /* don't patch if not supported by platform */ + bios_linker_loader_add_pointer(linker, + ACPI_BUILD_TABLE_FILE, off, 4, + ACPI_BUILD_TABLE_FILE, *f->facs_tbl_offset); + } + + /* DSDT address to be filled by Guest linker at runtime */ + off = tbl->len; + build_append_int_noprefix(tbl, 0, 4); /* DSDT */ + if (f->dsdt_tbl_offset) { /* don't patch if not supported by platform */ + bios_linker_loader_add_pointer(linker, + ACPI_BUILD_TABLE_FILE, off, 4, + ACPI_BUILD_TABLE_FILE, *f->dsdt_tbl_offset); + } + + /* ACPI1.0: INT_MODEL, ACPI2.0+: Reserved */ + build_append_int_noprefix(tbl, f->int_model /* Multiple APIC */, 1); + /* Preferred_PM_Profile */ + build_append_int_noprefix(tbl, 0 /* Unspecified */, 1); + build_append_int_noprefix(tbl, f->sci_int, 2); /* SCI_INT */ + build_append_int_noprefix(tbl, f->smi_cmd, 4); /* SMI_CMD */ + build_append_int_noprefix(tbl, f->acpi_enable_cmd, 1); /* ACPI_ENABLE */ + build_append_int_noprefix(tbl, f->acpi_disable_cmd, 1); /* ACPI_DISABLE */ + build_append_int_noprefix(tbl, 0 /* not supported */, 1); /* S4BIOS_REQ */ + /* ACPI1.0: Reserved, ACPI2.0+: PSTATE_CNT */ + build_append_int_noprefix(tbl, 0, 1); + build_append_int_noprefix(tbl, f->pm1a_evt.address, 4); /* PM1a_EVT_BLK */ + build_append_int_noprefix(tbl, 0, 4); /* PM1b_EVT_BLK */ + build_append_int_noprefix(tbl, f->pm1a_cnt.address, 4); /* PM1a_CNT_BLK */ + build_append_int_noprefix(tbl, 0, 4); /* PM1b_CNT_BLK */ + build_append_int_noprefix(tbl, 0, 4); /* PM2_CNT_BLK */ + build_append_int_noprefix(tbl, f->pm_tmr.address, 4); /* PM_TMR_BLK */ + build_append_int_noprefix(tbl, f->gpe0_blk.address, 4); /* GPE0_BLK */ + build_append_int_noprefix(tbl, 0, 4); /* GPE1_BLK */ + /* PM1_EVT_LEN */ + build_append_int_noprefix(tbl, f->pm1a_evt.bit_width / 8, 1); + /* PM1_CNT_LEN */ + build_append_int_noprefix(tbl, f->pm1a_cnt.bit_width / 8, 1); + build_append_int_noprefix(tbl, 0, 1); /* PM2_CNT_LEN */ + build_append_int_noprefix(tbl, f->pm_tmr.bit_width / 8, 1); /* PM_TMR_LEN */ + /* GPE0_BLK_LEN */ + build_append_int_noprefix(tbl, f->gpe0_blk.bit_width / 8, 1); + build_append_int_noprefix(tbl, 0, 1); /* GPE1_BLK_LEN */ + build_append_int_noprefix(tbl, 0, 1); /* GPE1_BASE */ + build_append_int_noprefix(tbl, 0, 1); /* CST_CNT */ + build_append_int_noprefix(tbl, f->plvl2_lat, 2); /* P_LVL2_LAT */ + build_append_int_noprefix(tbl, f->plvl3_lat, 2); /* P_LVL3_LAT */ + build_append_int_noprefix(tbl, 0, 2); /* FLUSH_SIZE */ + build_append_int_noprefix(tbl, 0, 2); /* FLUSH_STRIDE */ + build_append_int_noprefix(tbl, 0, 1); /* DUTY_OFFSET */ + build_append_int_noprefix(tbl, 0, 1); /* DUTY_WIDTH */ + build_append_int_noprefix(tbl, 0, 1); /* DAY_ALRM */ + build_append_int_noprefix(tbl, 0, 1); /* MON_ALRM */ + build_append_int_noprefix(tbl, f->rtc_century, 1); /* CENTURY */ + build_append_int_noprefix(tbl, 0, 2); /* IAPC_BOOT_ARCH */ + build_append_int_noprefix(tbl, 0, 1); /* Reserved */ + build_append_int_noprefix(tbl, f->flags, 4); /* Flags */ + + if (f->rev == 1) { + goto build_hdr; + } + + build_append_gas_from_struct(tbl, &f->reset_reg); /* RESET_REG */ + build_append_int_noprefix(tbl, f->reset_val, 1); /* RESET_VALUE */ + build_append_int_noprefix(tbl, 0, 3); /* Reserved, ACPI 3.0 */ + build_append_int_noprefix(tbl, 0, 8); /* X_FIRMWARE_CTRL */ + + /* XDSDT address to be filled by Guest linker at runtime */ + off = tbl->len; + build_append_int_noprefix(tbl, 0, 8); /* X_DSDT */ + if (f->xdsdt_tbl_offset) { + bios_linker_loader_add_pointer(linker, + ACPI_BUILD_TABLE_FILE, off, 8, + ACPI_BUILD_TABLE_FILE, *f->xdsdt_tbl_offset); + } + + build_append_gas_from_struct(tbl, &f->pm1a_evt); /* X_PM1a_EVT_BLK */ + /* X_PM1b_EVT_BLK */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + build_append_gas_from_struct(tbl, &f->pm1a_cnt); /* X_PM1a_CNT_BLK */ + /* X_PM1b_CNT_BLK */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + /* X_PM2_CNT_BLK */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + build_append_gas_from_struct(tbl, &f->pm_tmr); /* X_PM_TMR_BLK */ + build_append_gas_from_struct(tbl, &f->gpe0_blk); /* X_GPE0_BLK */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); /* X_GPE1_BLK */ + +build_hdr: + build_header(linker, tbl, (void *)(tbl->data + fadt_start), + "FACP", tbl->len - fadt_start, f->rev, oem_id, oem_table_id); +} diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index f7fa795278..b644da901b 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -651,8 +651,8 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) } /* FADT */ -static void build_fadt(GArray *table_data, BIOSLinker *linker, - VirtMachineState *vms, unsigned dsdt_tbl_offset) +static void build_fadt_rev5(GArray *table_data, BIOSLinker *linker, + VirtMachineState *vms, unsigned dsdt_tbl_offset) { int fadt_start = table_data->len; AcpiFadtDescriptorRev5_1 *fadt = acpi_data_push(table_data, sizeof(*fadt)); @@ -761,7 +761,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) /* FADT MADT GTDT MCFG SPCR pointed to by RSDT */ acpi_add_table(table_offsets, tables_blob); - build_fadt(tables_blob, tables->linker, vms, dsdt); + build_fadt_rev5(tables_blob, tables->linker, vms, dsdt); acpi_add_table(table_offsets, tables_blob); build_madt(tables_blob, tables->linker, vms); diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index d1b387e77b..ebde2cda9d 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -298,108 +298,6 @@ build_facs(GArray *table_data, BIOSLinker *linker) facs->length = cpu_to_le32(sizeof(*facs)); } -/* FADT */ -static void -build_fadt(GArray *tbl, BIOSLinker *linker, AcpiFadtData *f, - const char *oem_id, const char *oem_table_id) -{ - int off; - int fadt_start = tbl->len; - - acpi_data_push(tbl, sizeof(AcpiTableHeader)); - - /* FACS address to be filled by Guest linker at runtime */ - off = tbl->len; - build_append_int_noprefix(tbl, 0, 4); /* FIRMWARE_CTRL */ - bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, off, 4, - ACPI_BUILD_TABLE_FILE, *f->facs_tbl_offset); - - /* DSDT address to be filled by Guest linker at runtime */ - off = tbl->len; - build_append_int_noprefix(tbl, 0, 4); /* DSDT */ - bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, off, 4, - ACPI_BUILD_TABLE_FILE, *f->dsdt_tbl_offset); - - /* ACPI1.0: INT_MODEL, ACPI2.0+: Reserved */ - build_append_int_noprefix(tbl, f->int_model /* Multiple APIC */, 1); - /* Preferred_PM_Profile */ - build_append_int_noprefix(tbl, 0 /* Unspecified */, 1); - build_append_int_noprefix(tbl, f->sci_int, 2); /* SCI_INT */ - build_append_int_noprefix(tbl, f->smi_cmd, 4); /* SMI_CMD */ - build_append_int_noprefix(tbl, f->acpi_enable_cmd, 1); /* ACPI_ENABLE */ - build_append_int_noprefix(tbl, f->acpi_disable_cmd, 1); /* ACPI_DISABLE */ - build_append_int_noprefix(tbl, 0 /* not supported */, 1); /* S4BIOS_REQ */ - /* ACPI1.0: Reserved, ACPI2.0+: PSTATE_CNT */ - build_append_int_noprefix(tbl, 0, 1); - build_append_int_noprefix(tbl, f->pm1a_evt.address, 4); /* PM1a_EVT_BLK */ - build_append_int_noprefix(tbl, 0, 4); /* PM1b_EVT_BLK */ - build_append_int_noprefix(tbl, f->pm1a_cnt.address, 4); /* PM1a_CNT_BLK */ - build_append_int_noprefix(tbl, 0, 4); /* PM1b_CNT_BLK */ - build_append_int_noprefix(tbl, 0, 4); /* PM2_CNT_BLK */ - build_append_int_noprefix(tbl, f->pm_tmr.address, 4); /* PM_TMR_BLK */ - build_append_int_noprefix(tbl, f->gpe0_blk.address, 4); /* GPE0_BLK */ - build_append_int_noprefix(tbl, 0, 4); /* GPE1_BLK */ - /* PM1_EVT_LEN */ - build_append_int_noprefix(tbl, f->pm1a_evt.bit_width / 8, 1); - /* PM1_CNT_LEN */ - build_append_int_noprefix(tbl, f->pm1a_cnt.bit_width / 8, 1); - build_append_int_noprefix(tbl, 0, 1); /* PM2_CNT_LEN */ - build_append_int_noprefix(tbl, f->pm_tmr.bit_width / 8, 1); /* PM_TMR_LEN */ - /* GPE0_BLK_LEN */ - build_append_int_noprefix(tbl, f->gpe0_blk.bit_width / 8, 1); - build_append_int_noprefix(tbl, 0, 1); /* GPE1_BLK_LEN */ - build_append_int_noprefix(tbl, 0, 1); /* GPE1_BASE */ - build_append_int_noprefix(tbl, 0, 1); /* CST_CNT */ - build_append_int_noprefix(tbl, f->plvl2_lat, 2); /* P_LVL2_LAT */ - build_append_int_noprefix(tbl, f->plvl3_lat, 2); /* P_LVL3_LAT */ - build_append_int_noprefix(tbl, 0, 2); /* FLUSH_SIZE */ - build_append_int_noprefix(tbl, 0, 2); /* FLUSH_STRIDE */ - build_append_int_noprefix(tbl, 0, 1); /* DUTY_OFFSET */ - build_append_int_noprefix(tbl, 0, 1); /* DUTY_WIDTH */ - build_append_int_noprefix(tbl, 0, 1); /* DAY_ALRM */ - build_append_int_noprefix(tbl, 0, 1); /* MON_ALRM */ - build_append_int_noprefix(tbl, f->rtc_century, 1); /* CENTURY */ - build_append_int_noprefix(tbl, 0, 2); /* IAPC_BOOT_ARCH */ - build_append_int_noprefix(tbl, 0, 1); /* Reserved */ - build_append_int_noprefix(tbl, f->flags, 4); /* Flags */ - - if (f->rev == 1) { - goto build_hdr; - } - - build_append_gas_from_struct(tbl, &f->reset_reg); /* RESET_REG */ - build_append_int_noprefix(tbl, f->reset_val, 1); /* RESET_VALUE */ - build_append_int_noprefix(tbl, 0, 3); /* Reserved, ACPI 3.0 */ - build_append_int_noprefix(tbl, 0, 8); /* X_FIRMWARE_CTRL */ - - /* XDSDT address to be filled by Guest linker at runtime */ - off = tbl->len; - build_append_int_noprefix(tbl, 0, 8); /* X_DSDT */ - if (f->xdsdt_tbl_offset) { - bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, off, 8, - ACPI_BUILD_TABLE_FILE, *f->xdsdt_tbl_offset); - } - - build_append_gas_from_struct(tbl, &f->pm1a_evt); /* X_PM1a_EVT_BLK */ - /* X_PM1b_EVT_BLK */ - build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); - build_append_gas_from_struct(tbl, &f->pm1a_cnt); /* X_PM1a_CNT_BLK */ - /* X_PM1b_CNT_BLK */ - build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); - /* X_PM2_CNT_BLK */ - build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); - build_append_gas_from_struct(tbl, &f->pm_tmr); /* X_PM_TMR_BLK */ - build_append_gas_from_struct(tbl, &f->gpe0_blk); /* X_GPE0_BLK */ - build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); /* X_GPE1_BLK */ - -build_hdr: - build_header(linker, tbl, (void *)(tbl->data + fadt_start), - "FACP", tbl->len - fadt_start, f->rev, oem_id, oem_table_id); -} - void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, const CPUArchIdList *apic_ids, GArray *entry) { diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index 8692ccc047..6c36903c0a 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -413,4 +413,7 @@ void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, uint64_t len, int node, MemoryAffinityFlags flags); void build_slit(GArray *table_data, BIOSLinker *linker); + +void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, + const char *oem_id, const char *oem_table_id); #endif -- cgit v1.2.3-55-g7522 From dd1b2037a38024b69e952c5ac41fd63b96e552ab Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 28 Feb 2018 15:23:54 +0100 Subject: virt_arm: acpi: reuse common build_fadt() Extend generic build_fadt() to support rev5.1 FADT and reuse it for 'virt' board, it would allow to phase out usage of AcpiFadtDescriptorRev5_1 and later ACPI_FADT_COMMON_DEF. Signed-off-by: Igor Mammedov Reviewed-by: Eric Auger Tested-by: Eric Auger Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/aml-build.c | 23 +++++++++++++++++++++-- hw/arm/virt-acpi-build.c | 33 ++++++++++++--------------------- include/hw/acpi/acpi-defs.h | 12 ++---------- 3 files changed, 35 insertions(+), 33 deletions(-) (limited to 'hw') diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 8f45298a3a..3fa557cea1 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -1679,7 +1679,7 @@ void build_slit(GArray *table_data, BIOSLinker *linker) table_data->len - slit_start, 1, NULL, NULL); } -/* build rev1/rev3 FADT */ +/* build rev1/rev3/rev5.1 FADT */ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, const char *oem_id, const char *oem_table_id) { @@ -1755,7 +1755,14 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, build_append_gas_from_struct(tbl, &f->reset_reg); /* RESET_REG */ build_append_int_noprefix(tbl, f->reset_val, 1); /* RESET_VALUE */ - build_append_int_noprefix(tbl, 0, 3); /* Reserved, ACPI 3.0 */ + /* Since ACPI 5.1 */ + if ((f->rev >= 6) || ((f->rev == 5) && f->minor_ver > 0)) { + build_append_int_noprefix(tbl, f->arm_boot_arch, 2); /* ARM_BOOT_ARCH */ + /* FADT Minor Version */ + build_append_int_noprefix(tbl, f->minor_ver, 1); + } else { + build_append_int_noprefix(tbl, 0, 3); /* Reserved upto ACPI 5.0 */ + } build_append_int_noprefix(tbl, 0, 8); /* X_FIRMWARE_CTRL */ /* XDSDT address to be filled by Guest linker at runtime */ @@ -1779,6 +1786,18 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, build_append_gas_from_struct(tbl, &f->gpe0_blk); /* X_GPE0_BLK */ build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); /* X_GPE1_BLK */ + if (f->rev <= 4) { + goto build_hdr; + } + + /* SLEEP_CONTROL_REG */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + /* SLEEP_STATUS_REG */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + + /* TODO: extra fields need to be added to support revisions above rev5 */ + assert(f->rev == 5); + build_hdr: build_header(linker, tbl, (void *)(tbl->data + fadt_start), "FACP", tbl->len - fadt_start, f->rev, oem_id, oem_table_id); diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index b644da901b..c7c6a57ec5 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -654,39 +654,30 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) static void build_fadt_rev5(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms, unsigned dsdt_tbl_offset) { - int fadt_start = table_data->len; - AcpiFadtDescriptorRev5_1 *fadt = acpi_data_push(table_data, sizeof(*fadt)); - unsigned xdsdt_entry_offset = (char *)&fadt->x_dsdt - table_data->data; - uint16_t bootflags; + /* ACPI v5.1 */ + AcpiFadtData fadt = { + .rev = 5, + .minor_ver = 1, + .flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI, + .xdsdt_tbl_offset = &dsdt_tbl_offset, + }; switch (vms->psci_conduit) { case QEMU_PSCI_CONDUIT_DISABLED: - bootflags = 0; + fadt.arm_boot_arch = 0; break; case QEMU_PSCI_CONDUIT_HVC: - bootflags = ACPI_FADT_ARM_PSCI_COMPLIANT | ACPI_FADT_ARM_PSCI_USE_HVC; + fadt.arm_boot_arch = ACPI_FADT_ARM_PSCI_COMPLIANT | + ACPI_FADT_ARM_PSCI_USE_HVC; break; case QEMU_PSCI_CONDUIT_SMC: - bootflags = ACPI_FADT_ARM_PSCI_COMPLIANT; + fadt.arm_boot_arch = ACPI_FADT_ARM_PSCI_COMPLIANT; break; default: g_assert_not_reached(); } - /* Hardware Reduced = 1 and use PSCI 0.2+ */ - fadt->flags = cpu_to_le32(1 << ACPI_FADT_F_HW_REDUCED_ACPI); - fadt->arm_boot_flags = cpu_to_le16(bootflags); - - /* ACPI v5.1 (fadt->revision.fadt->minor_revision) */ - fadt->minor_revision = 0x1; - - /* DSDT address to be filled by Guest linker */ - bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, xdsdt_entry_offset, sizeof(fadt->x_dsdt), - ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset); - - build_header(linker, table_data, (void *)(table_data->data + fadt_start), - "FACP", table_data->len - fadt_start, 5, NULL, NULL); + build_fadt(table_data, linker, &fadt, NULL, NULL); } /* DSDT */ diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h index 3fb0ace5f8..fe15e954fa 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h @@ -165,16 +165,6 @@ struct AcpiFadtDescriptorRev3 { } QEMU_PACKED; typedef struct AcpiFadtDescriptorRev3 AcpiFadtDescriptorRev3; -struct AcpiFadtDescriptorRev5_1 { - ACPI_FADT_COMMON_DEF - /* 64-bit Sleep Control register (ACPI 5.0) */ - struct AcpiGenericAddress sleep_control; - /* 64-bit Sleep Status register (ACPI 5.0) */ - struct AcpiGenericAddress sleep_status; -} QEMU_PACKED; - -typedef struct AcpiFadtDescriptorRev5_1 AcpiFadtDescriptorRev5_1; - typedef struct AcpiFadtData { struct AcpiGenericAddress pm1a_cnt; /* PM1a_CNT_BLK */ struct AcpiGenericAddress pm1a_evt; /* PM1a_EVT_BLK */ @@ -192,6 +182,8 @@ typedef struct AcpiFadtData { uint8_t rtc_century; /* CENTURY */ uint16_t plvl2_lat; /* P_LVL2_LAT */ uint16_t plvl3_lat; /* P_LVL3_LAT */ + uint16_t arm_boot_arch; /* ARM_BOOT_ARCH */ + uint8_t minor_ver; /* FADT Minor Version */ /* * respective tables offsets within ACPI_BUILD_TABLE_FILE, -- cgit v1.2.3-55-g7522 From 1d0cad532ca1a5c65196e3fcd3904ef29b253b1c Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Sun, 17 Dec 2017 17:49:12 -0300 Subject: hw/pci: remove obsolete PCIDevice->init() All PCI devices are now QOM'ified. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 14 -------------- include/hw/pci/pci.h | 1 - 2 files changed, 15 deletions(-) (limited to 'hw') diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 2174c254eb..f98efdc6e1 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2049,18 +2049,6 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) } } -static void pci_default_realize(PCIDevice *dev, Error **errp) -{ - PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); - - if (pc->init) { - if (pc->init(dev) < 0) { - error_setg(errp, "Device initialization failed"); - return; - } - } -} - PCIDevice *pci_create_multifunction(PCIBus *bus, int devfn, bool multifunction, const char *name) { @@ -2533,13 +2521,11 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev) static void pci_device_class_init(ObjectClass *klass, void *data) { DeviceClass *k = DEVICE_CLASS(klass); - PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass); k->realize = pci_qdev_realize; k->unrealize = pci_qdev_unrealize; k->bus_type = TYPE_PCI_BUS; k->props = pci_props; - pc->realize = pci_default_realize; } static void pci_device_class_base_init(ObjectClass *klass, void *data) diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index d8c18c7fa4..e28f3fa33e 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -217,7 +217,6 @@ typedef struct PCIDeviceClass { DeviceClass parent_class; void (*realize)(PCIDevice *dev, Error **errp); - int (*init)(PCIDevice *dev);/* TODO convert to realize() and remove */ PCIUnregisterFunc *exit; PCIConfigReadFunc *config_read; PCIConfigWriteFunc *config_write; -- cgit v1.2.3-55-g7522 From 52c95cae4e1299e0a2a9709093581a3a96473aa5 Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Sun, 11 Mar 2018 11:02:11 +0800 Subject: pc-dimm: make qmp_pc_dimm_device_list() sort devices by address Make qmp_pc_dimm_device_list() return sorted by start address list of devices so that it could be reused in places that would need sorted list*. Reuse existing pc_dimm_built_list() to get sorted list. While at it hide recursive callbacks from callers, so that: qmp_pc_dimm_device_list(qdev_get_machine(), &list); could be replaced with simpler: list = qmp_pc_dimm_device_list(); * follow up patch will use it in build_srat() Signed-off-by: Haozhong Zhang Reviewed-by: Igor Mammedov Acked-by: David Gibson for ppc part Reviewed-by: Bharata B Rao Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/pc-dimm.c | 83 +++++++++++++++++++++++++----------------------- hw/ppc/spapr.c | 3 +- include/hw/mem/pc-dimm.h | 2 +- numa.c | 4 +-- qmp.c | 7 +--- stubs/qmp_pc_dimm.c | 4 +-- 6 files changed, 50 insertions(+), 53 deletions(-) (limited to 'hw') diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index 6e74b61cb6..4d050fe2cd 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -162,45 +162,6 @@ uint64_t get_plugged_memory_size(void) return pc_existing_dimms_capacity(&error_abort); } -int qmp_pc_dimm_device_list(Object *obj, void *opaque) -{ - MemoryDeviceInfoList ***prev = opaque; - - if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { - DeviceState *dev = DEVICE(obj); - - if (dev->realized) { - MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1); - MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1); - PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1); - DeviceClass *dc = DEVICE_GET_CLASS(obj); - PCDIMMDevice *dimm = PC_DIMM(obj); - - if (dev->id) { - di->has_id = true; - di->id = g_strdup(dev->id); - } - di->hotplugged = dev->hotplugged; - di->hotpluggable = dc->hotpluggable; - di->addr = dimm->addr; - di->slot = dimm->slot; - di->node = dimm->node; - di->size = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP, - NULL); - di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem)); - - info->u.dimm.data = di; - elem->value = info; - elem->next = NULL; - **prev = elem; - *prev = &elem->next; - } - } - - object_child_foreach(obj, qmp_pc_dimm_device_list, opaque); - return 0; -} - static int pc_dimm_slot2bitmap(Object *obj, void *opaque) { unsigned long *bitmap = opaque; @@ -276,6 +237,50 @@ static int pc_dimm_built_list(Object *obj, void *opaque) return 0; } +MemoryDeviceInfoList *qmp_pc_dimm_device_list(void) +{ + GSList *dimms = NULL, *item; + MemoryDeviceInfoList *list = NULL, *prev = NULL; + + object_child_foreach(qdev_get_machine(), pc_dimm_built_list, &dimms); + + for (item = dimms; item; item = g_slist_next(item)) { + PCDIMMDevice *dimm = PC_DIMM(item->data); + Object *obj = OBJECT(dimm); + MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1); + MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1); + PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1); + DeviceClass *dc = DEVICE_GET_CLASS(obj); + DeviceState *dev = DEVICE(obj); + + if (dev->id) { + di->has_id = true; + di->id = g_strdup(dev->id); + } + di->hotplugged = dev->hotplugged; + di->hotpluggable = dc->hotpluggable; + di->addr = dimm->addr; + di->slot = dimm->slot; + di->node = dimm->node; + di->size = object_property_get_uint(obj, PC_DIMM_SIZE_PROP, NULL); + di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem)); + + info->u.dimm.data = di; + elem->value = info; + elem->next = NULL; + if (prev) { + prev->next = elem; + } else { + list = elem; + } + prev = elem; + } + + g_slist_free(dimms); + + return list; +} + uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, uint64_t address_space_size, uint64_t *hint, uint64_t align, uint64_t size, diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 7e1c858566..44a0670d11 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -722,8 +722,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) } if (hotplug_lmb_start) { - MemoryDeviceInfoList **prev = &dimms; - qmp_pc_dimm_device_list(qdev_get_machine(), &prev); + dimms = qmp_pc_dimm_device_list(); } /* ibm,dynamic-memory */ diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h index d83b957829..1fc479281c 100644 --- a/include/hw/mem/pc-dimm.h +++ b/include/hw/mem/pc-dimm.h @@ -93,7 +93,7 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp); -int qmp_pc_dimm_device_list(Object *obj, void *opaque); +MemoryDeviceInfoList *qmp_pc_dimm_device_list(void); uint64_t pc_existing_dimms_capacity(Error **errp); uint64_t get_plugged_memory_size(void); void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, diff --git a/numa.c b/numa.c index 398e2c9a85..94427046ec 100644 --- a/numa.c +++ b/numa.c @@ -520,12 +520,10 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, static void numa_stat_memory_devices(NumaNodeMem node_mem[]) { - MemoryDeviceInfoList *info_list = NULL; - MemoryDeviceInfoList **prev = &info_list; + MemoryDeviceInfoList *info_list = qmp_pc_dimm_device_list(); MemoryDeviceInfoList *info; PCDIMMDeviceInfo *pcdimm_info; - qmp_pc_dimm_device_list(qdev_get_machine(), &prev); for (info = info_list; info; info = info->next) { MemoryDeviceInfo *value = info->value; diff --git a/qmp.c b/qmp.c index 8c7d1cc479..4b2517d8b5 100644 --- a/qmp.c +++ b/qmp.c @@ -731,12 +731,7 @@ void qmp_object_del(const char *id, Error **errp) MemoryDeviceInfoList *qmp_query_memory_devices(Error **errp) { - MemoryDeviceInfoList *head = NULL; - MemoryDeviceInfoList **prev = &head; - - qmp_pc_dimm_device_list(qdev_get_machine(), &prev); - - return head; + return qmp_pc_dimm_device_list(); } ACPIOSTInfoList *qmp_query_acpi_ospm_status(Error **errp) diff --git a/stubs/qmp_pc_dimm.c b/stubs/qmp_pc_dimm.c index 9ddc4f619a..b6b2cca89e 100644 --- a/stubs/qmp_pc_dimm.c +++ b/stubs/qmp_pc_dimm.c @@ -2,9 +2,9 @@ #include "qom/object.h" #include "hw/mem/pc-dimm.h" -int qmp_pc_dimm_device_list(Object *obj, void *opaque) +MemoryDeviceInfoList *qmp_pc_dimm_device_list(void) { - return 0; + return NULL; } uint64_t get_plugged_memory_size(void) -- cgit v1.2.3-55-g7522 From 6388e18de9c6842de9a1307b61d42c8e4549009c Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Sun, 11 Mar 2018 11:02:12 +0800 Subject: qmp: distinguish PC-DIMM and NVDIMM in MemoryDeviceInfoList It may need to treat PC-DIMM and NVDIMM differently, e.g., when deciding the necessity of non-volatile flag bit in SRAT memory affinity structures. A new field 'nvdimm' is added to the union type MemoryDeviceInfo for such purpose. Its type is currently PCDIMMDeviceInfo and will be updated when necessary in the future. It also fixes "info memory-devices"/query-memory-devices which currently show nvdimm devices as dimm devices since object_dynamic_cast(obj, TYPE_PC_DIMM) happily cast nvdimm to TYPE_PC_DIMM which it's been inherited from. Signed-off-by: Haozhong Zhang Reviewed-by: Eric Blake Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hmp.c | 14 +++++++++++--- hw/mem/pc-dimm.c | 10 +++++++++- numa.c | 19 +++++++++++++------ qapi/misc.json | 6 +++++- 4 files changed, 38 insertions(+), 11 deletions(-) (limited to 'hw') diff --git a/hmp.c b/hmp.c index ba9e299ee2..a277517625 100644 --- a/hmp.c +++ b/hmp.c @@ -2423,7 +2423,18 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) switch (value->type) { case MEMORY_DEVICE_INFO_KIND_DIMM: di = value->u.dimm.data; + break; + + case MEMORY_DEVICE_INFO_KIND_NVDIMM: + di = value->u.nvdimm.data; + break; + + default: + di = NULL; + break; + } + if (di) { monitor_printf(mon, "Memory device [%s]: \"%s\"\n", MemoryDeviceInfoKind_str(value->type), di->id ? di->id : ""); @@ -2436,9 +2447,6 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) di->hotplugged ? "true" : "false"); monitor_printf(mon, " hotpluggable: %s\n", di->hotpluggable ? "true" : "false"); - break; - default: - break; } } } diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index 4d050fe2cd..51350d9c2d 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "hw/mem/pc-dimm.h" +#include "hw/mem/nvdimm.h" #include "qapi/error.h" #include "qemu/config-file.h" #include "qapi/visitor.h" @@ -250,6 +251,7 @@ MemoryDeviceInfoList *qmp_pc_dimm_device_list(void) MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1); MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1); PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1); + bool is_nvdimm = object_dynamic_cast(obj, TYPE_NVDIMM); DeviceClass *dc = DEVICE_GET_CLASS(obj); DeviceState *dev = DEVICE(obj); @@ -265,7 +267,13 @@ MemoryDeviceInfoList *qmp_pc_dimm_device_list(void) di->size = object_property_get_uint(obj, PC_DIMM_SIZE_PROP, NULL); di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem)); - info->u.dimm.data = di; + if (!is_nvdimm) { + info->u.dimm.data = di; + info->type = MEMORY_DEVICE_INFO_KIND_DIMM; + } else { + info->u.nvdimm.data = di; + info->type = MEMORY_DEVICE_INFO_KIND_NVDIMM; + } elem->value = info; elem->next = NULL; if (prev) { diff --git a/numa.c b/numa.c index 94427046ec..1116c90af9 100644 --- a/numa.c +++ b/numa.c @@ -529,18 +529,25 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) if (value) { switch (value->type) { - case MEMORY_DEVICE_INFO_KIND_DIMM: { + case MEMORY_DEVICE_INFO_KIND_DIMM: pcdimm_info = value->u.dimm.data; + break; + + case MEMORY_DEVICE_INFO_KIND_NVDIMM: + pcdimm_info = value->u.nvdimm.data; + break; + + default: + pcdimm_info = NULL; + break; + } + + if (pcdimm_info) { node_mem[pcdimm_info->node].node_mem += pcdimm_info->size; if (pcdimm_info->hotpluggable && pcdimm_info->hotplugged) { node_mem[pcdimm_info->node].node_plugged_mem += pcdimm_info->size; } - break; - } - - default: - break; } } } diff --git a/qapi/misc.json b/qapi/misc.json index bcd5d10778..6bf082f612 100644 --- a/qapi/misc.json +++ b/qapi/misc.json @@ -2852,7 +2852,11 @@ # # Since: 2.1 ## -{ 'union': 'MemoryDeviceInfo', 'data': {'dimm': 'PCDIMMDeviceInfo'} } +{ 'union': 'MemoryDeviceInfo', + 'data': { 'dimm': 'PCDIMMDeviceInfo', + 'nvdimm': 'PCDIMMDeviceInfo' + } +} ## # @query-memory-devices: -- cgit v1.2.3-55-g7522 From 848a1cc1e8b04301d40aef2a0e21a783b8dcc1c5 Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Sun, 11 Mar 2018 11:02:13 +0800 Subject: hw/acpi-build: build SRAT memory affinity structures for DIMM devices ACPI 6.2A Table 5-129 "SPA Range Structure" requires the proximity domain of a NVDIMM SPA range must match with corresponding entry in SRAT table. The address ranges of vNVDIMM in QEMU are allocated from the hot-pluggable address space, which is entirely covered by one SRAT memory affinity structure. However, users can set the vNVDIMM proximity domain in NFIT SPA range structure by the 'node' property of '-device nvdimm' to a value different than the one in the above SRAT memory affinity structure. In order to solve such proximity domain mismatch, this patch builds one SRAT memory affinity structure for each DIMM device present at boot time, including both PC-DIMM and NVDIMM, with the proximity domain specified in '-device pc-dimm' or '-device nvdimm'. The remaining hot-pluggable address space is covered by one or multiple SRAT memory affinity structures with the proximity domain of the last node as before. Signed-off-by: Haozhong Zhang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 4 deletions(-) (limited to 'hw') diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index ebde2cda9d..1df9ed275e 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2250,6 +2250,55 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) #define HOLE_640K_START (640 * 1024) #define HOLE_640K_END (1024 * 1024) +static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base, + uint64_t len, int default_node) +{ + MemoryDeviceInfoList *info_list = qmp_pc_dimm_device_list(); + MemoryDeviceInfoList *info; + MemoryDeviceInfo *mi; + PCDIMMDeviceInfo *di; + uint64_t end = base + len, cur, size; + bool is_nvdimm; + AcpiSratMemoryAffinity *numamem; + MemoryAffinityFlags flags; + + for (cur = base, info = info_list; + cur < end; + cur += size, info = info->next) { + numamem = acpi_data_push(table_data, sizeof *numamem); + + if (!info) { + build_srat_memory(numamem, cur, end - cur, default_node, + MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); + break; + } + + mi = info->value; + is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM); + di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data; + + if (cur < di->addr) { + build_srat_memory(numamem, cur, di->addr - cur, default_node, + MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); + numamem = acpi_data_push(table_data, sizeof *numamem); + } + + size = di->size; + + flags = MEM_AFFINITY_ENABLED; + if (di->hotpluggable) { + flags |= MEM_AFFINITY_HOTPLUGGABLE; + } + if (is_nvdimm) { + flags |= MEM_AFFINITY_NON_VOLATILE; + } + + build_srat_memory(numamem, di->addr, size, di->node, flags); + } + + qapi_free_MemoryDeviceInfoList(info_list); +} + static void build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) { @@ -2361,10 +2410,9 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) * providing _PXM method if necessary. */ if (hotplugabble_address_space_size) { - numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, pcms->hotplug_memory.base, - hotplugabble_address_space_size, pcms->numa_nodes - 1, - MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); + build_srat_hotpluggable_memory(table_data, pcms->hotplug_memory.base, + hotplugabble_address_space_size, + pcms->numa_nodes - 1); } build_header(linker, table_data, -- cgit v1.2.3-55-g7522 From 9ccbfe14ddfce379ee24684b3648376b130293cd Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:00 +0000 Subject: postcopy: Add vhost-user flag for postcopy and check it Add a vhost feature flag for postcopy support, and use the postcopy notifier to check it before allowing postcopy. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/libvhost-user/libvhost-user.h | 2 ++ docs/interop/vhost-user.txt | 10 +++++++++ hw/virtio/vhost-user.c | 41 ++++++++++++++++++++++++++++++++++- 3 files changed, 52 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index 18f95f65d7..96db29c4ce 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -48,6 +48,8 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_NET_MTU = 4, VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, + VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, + VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, VHOST_USER_PROTOCOL_F_MAX }; diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt index cb3a7595aa..91a572d781 100644 --- a/docs/interop/vhost-user.txt +++ b/docs/interop/vhost-user.txt @@ -290,6 +290,15 @@ Once the source has finished migration, rings will be stopped by the source. No further update must be done before rings are restarted. +In postcopy migration the slave is started before all the memory has been +received from the source host, and care must be taken to avoid accessing pages +that have yet to be received. The slave opens a 'userfault'-fd and registers +the memory with it; this fd is then passed back over to the master. +The master services requests on the userfaultfd for pages that are accessed +and when the page is available it performs WAKE ioctl's on the userfaultfd +to wake the stalled slave. The client indicates support for this via the +VHOST_USER_PROTOCOL_F_PAGEFAULT feature. + Memory access ------------- @@ -369,6 +378,7 @@ Protocol features #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN 6 #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7 +#define VHOST_USER_PROTOCOL_F_PAGEFAULT 8 Master message types -------------------- diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 41ff5cff41..aab35c4845 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -18,6 +18,8 @@ #include "qemu/error-report.h" #include "qemu/sockets.h" #include "sysemu/cryptodev.h" +#include "migration/migration.h" +#include "migration/postcopy-ram.h" #include #include @@ -41,7 +43,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, - + VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, VHOST_USER_PROTOCOL_F_MAX }; @@ -164,8 +166,10 @@ static VhostUserMsg m __attribute__ ((unused)); #define VHOST_USER_VERSION (0x1) struct vhost_user { + struct vhost_dev *dev; CharBackend *chr; int slave_fd; + NotifierWithReturn postcopy_notifier; }; static bool ioeventfd_enabled(void) @@ -791,6 +795,33 @@ out: return ret; } +static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, + void *opaque) +{ + struct PostcopyNotifyData *pnd = opaque; + struct vhost_user *u = container_of(notifier, struct vhost_user, + postcopy_notifier); + struct vhost_dev *dev = u->dev; + + switch (pnd->reason) { + case POSTCOPY_NOTIFY_PROBE: + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_PAGEFAULT)) { + /* TODO: Get the device name into this error somehow */ + error_setg(pnd->errp, + "vhost-user backend not capable of postcopy"); + return -ENOENT; + } + break; + + default: + /* We ignore notifications we don't know */ + break; + } + + return 0; +} + static int vhost_user_init(struct vhost_dev *dev, void *opaque) { uint64_t features, protocol_features; @@ -802,6 +833,7 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque) u = g_new0(struct vhost_user, 1); u->chr = opaque; u->slave_fd = -1; + u->dev = dev; dev->opaque = u; err = vhost_user_get_features(dev, &features); @@ -858,6 +890,9 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque) return err; } + u->postcopy_notifier.notify = vhost_user_postcopy_notifier; + postcopy_add_notifier(&u->postcopy_notifier); + return 0; } @@ -868,6 +903,10 @@ static int vhost_user_cleanup(struct vhost_dev *dev) assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); u = dev->opaque; + if (u->postcopy_notifier.notify) { + postcopy_remove_notifier(&u->postcopy_notifier); + u->postcopy_notifier.notify = NULL; + } if (u->slave_fd >= 0) { qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); close(u->slave_fd); -- cgit v1.2.3-55-g7522 From d3dff7a5a1e0a6eff963fabc4d06879d060f34ee Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:01 +0000 Subject: vhost-user: Add 'VHOST_USER_POSTCOPY_ADVISE' message Wire up a notifier to send a VHOST_USER_POSTCOPY_ADVISE message on an incoming advise. Later patches will fill in the behaviour/contents of the message. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/libvhost-user/libvhost-user.c | 11 ++++++++ contrib/libvhost-user/libvhost-user.h | 3 +++ docs/interop/vhost-user.txt | 10 ++++++++ hw/virtio/vhost-user.c | 48 +++++++++++++++++++++++++++++++++++ migration/postcopy-ram.h | 1 + migration/savevm.c | 6 +++++ 6 files changed, 79 insertions(+) (limited to 'hw') diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 2e358b5bce..37d4228193 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -86,6 +86,7 @@ vu_request_to_string(unsigned int req) REQ(VHOST_USER_SET_VRING_ENDIAN), REQ(VHOST_USER_GET_CONFIG), REQ(VHOST_USER_SET_CONFIG), + REQ(VHOST_USER_POSTCOPY_ADVISE), REQ(VHOST_USER_MAX), }; #undef REQ @@ -856,6 +857,14 @@ vu_set_config(VuDev *dev, VhostUserMsg *vmsg) return false; } +static bool +vu_set_postcopy_advise(VuDev *dev, VhostUserMsg *vmsg) +{ + /* TODO: Open ufd, pass it back in the request */ + vmsg->size = 0; + return true; /* = send a reply */ +} + static bool vu_process_message(VuDev *dev, VhostUserMsg *vmsg) { @@ -927,6 +936,8 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) return vu_set_config(dev, vmsg); case VHOST_USER_NONE: break; + case VHOST_USER_POSTCOPY_ADVISE: + return vu_set_postcopy_advise(dev, vmsg); default: vmsg_close_fds(vmsg); vu_panic(dev, "Unhandled request: %d", vmsg->request); diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index 96db29c4ce..00d78a8810 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -83,6 +83,9 @@ typedef enum VhostUserRequest { VHOST_USER_SET_VRING_ENDIAN = 23, VHOST_USER_GET_CONFIG = 24, VHOST_USER_SET_CONFIG = 25, + VHOST_USER_CREATE_CRYPTO_SESSION = 26, + VHOST_USER_CLOSE_CRYPTO_SESSION = 27, + VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_MAX } VhostUserRequest; diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt index 91a572d781..7854e50008 100644 --- a/docs/interop/vhost-user.txt +++ b/docs/interop/vhost-user.txt @@ -699,6 +699,16 @@ Master message types feature has been successfully negotiated. It's a required feature for crypto devices. + * VHOST_USER_POSTCOPY_ADVISE + Id: 28 + Master payload: N/A + Slave payload: userfault fd + + When VHOST_USER_PROTOCOL_F_PAGEFAULT is supported, the + master advises slave that a migration with postcopy enabled is underway, + the slave must open a userfaultfd for later use. + Note that at this stage the migration is still in precopy mode. + Slave message types ------------------- diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index aab35c4845..ceb17b0554 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -78,6 +78,7 @@ typedef enum VhostUserRequest { VHOST_USER_SET_CONFIG = 25, VHOST_USER_CREATE_CRYPTO_SESSION = 26, VHOST_USER_CLOSE_CRYPTO_SESSION = 27, + VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_MAX } VhostUserRequest; @@ -795,6 +796,50 @@ out: return ret; } +/* + * Called at the start of an inbound postcopy on reception of the + * 'advise' command. + */ +static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) +{ + struct vhost_user *u = dev->opaque; + CharBackend *chr = u->chr; + int ufd; + VhostUserMsg msg = { + .hdr.request = VHOST_USER_POSTCOPY_ADVISE, + .hdr.flags = VHOST_USER_VERSION, + }; + + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + error_setg(errp, "Failed to send postcopy_advise to vhost"); + return -1; + } + + if (vhost_user_read(dev, &msg) < 0) { + error_setg(errp, "Failed to get postcopy_advise reply from vhost"); + return -1; + } + + if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { + error_setg(errp, "Unexpected msg type. Expected %d received %d", + VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); + return -1; + } + + if (msg.hdr.size) { + error_setg(errp, "Received bad msg size."); + return -1; + } + ufd = qemu_chr_fe_get_msgfd(chr); + if (ufd < 0) { + error_setg(errp, "%s: Failed to get ufd", __func__); + return -1; + } + + /* TODO: register ufd with userfault thread */ + return 0; +} + static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, void *opaque) { @@ -814,6 +859,9 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, } break; + case POSTCOPY_NOTIFY_INBOUND_ADVISE: + return vhost_user_postcopy_advise(dev, pnd->errp); + default: /* We ignore notifications we don't know */ break; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 2e879bbacb..0421c98d57 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -130,6 +130,7 @@ void postcopy_infrastructure_init(void); */ enum PostcopyNotifyReason { POSTCOPY_NOTIFY_PROBE = 0, + POSTCOPY_NOTIFY_INBOUND_ADVISE, }; struct PostcopyNotifyData { diff --git a/migration/savevm.c b/migration/savevm.c index 358c5b51e2..1f2bf12a28 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1386,6 +1386,7 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, { PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE); uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps; + Error *local_err = NULL; trace_loadvm_postcopy_handle_advise(); if (ps != POSTCOPY_INCOMING_NONE) { @@ -1451,6 +1452,11 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, return -1; } + if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) { + error_report_err(local_err); + return -1; + } + if (ram_postcopy_incoming_init(mis)) { return -1; } -- cgit v1.2.3-55-g7522 From f82c11165ffaa1a8f3f0488fb4075a818a4efdaf Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:05 +0000 Subject: vhost+postcopy: Register shared ufd with postcopy Register the UFD that comes in as the response to the 'advise' method with the postcopy code. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index ceb17b0554..5900583437 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -171,6 +171,7 @@ struct vhost_user { CharBackend *chr; int slave_fd; NotifierWithReturn postcopy_notifier; + struct PostCopyFD postcopy_fd; }; static bool ioeventfd_enabled(void) @@ -796,6 +797,17 @@ out: return ret; } +/* + * Called back from the postcopy fault thread when a fault is received on our + * ufd. + * TODO: This is Linux specific + */ +static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, + void *ufd) +{ + return 0; +} + /* * Called at the start of an inbound postcopy on reception of the * 'advise' command. @@ -835,8 +847,14 @@ static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) error_setg(errp, "%s: Failed to get ufd", __func__); return -1; } + fcntl(ufd, F_SETFL, O_NONBLOCK); - /* TODO: register ufd with userfault thread */ + /* register ufd with userfault thread */ + u->postcopy_fd.fd = ufd; + u->postcopy_fd.data = dev; + u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; + u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ + postcopy_register_shared_ufd(&u->postcopy_fd); return 0; } -- cgit v1.2.3-55-g7522 From 6864a7b5aced6d8d9b287b92db8d7a996ea2e8a3 Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:06 +0000 Subject: vhost+postcopy: Transmit 'listen' to slave Notify the vhost-user slave on reception of the 'postcopy-listen' event from the source. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Marc-André Lureau Reviewed-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/libvhost-user/libvhost-user.c | 19 +++++++++++++++++++ contrib/libvhost-user/libvhost-user.h | 2 ++ docs/interop/vhost-user.txt | 11 +++++++++++ hw/virtio/trace-events | 3 +++ hw/virtio/vhost-user.c | 34 ++++++++++++++++++++++++++++++++++ migration/postcopy-ram.h | 1 + migration/savevm.c | 7 +++++++ 7 files changed, 77 insertions(+) (limited to 'hw') diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 9e31f47b7a..e53b1953df 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -98,6 +98,7 @@ vu_request_to_string(unsigned int req) REQ(VHOST_USER_GET_CONFIG), REQ(VHOST_USER_SET_CONFIG), REQ(VHOST_USER_POSTCOPY_ADVISE), + REQ(VHOST_USER_POSTCOPY_LISTEN), REQ(VHOST_USER_MAX), }; #undef REQ @@ -931,6 +932,22 @@ out: return true; /* = send a reply */ } +static bool +vu_set_postcopy_listen(VuDev *dev, VhostUserMsg *vmsg) +{ + vmsg->payload.u64 = -1; + vmsg->size = sizeof(vmsg->payload.u64); + + if (dev->nregions) { + vu_panic(dev, "Regions already registered at postcopy-listen"); + return true; + } + dev->postcopy_listening = true; + + vmsg->flags = VHOST_USER_VERSION | VHOST_USER_REPLY_MASK; + vmsg->payload.u64 = 0; /* Success */ + return true; +} static bool vu_process_message(VuDev *dev, VhostUserMsg *vmsg) { @@ -1004,6 +1021,8 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) break; case VHOST_USER_POSTCOPY_ADVISE: return vu_set_postcopy_advise(dev, vmsg); + case VHOST_USER_POSTCOPY_LISTEN: + return vu_set_postcopy_listen(dev, vmsg); default: vmsg_close_fds(vmsg); vu_panic(dev, "Unhandled request: %d", vmsg->request); diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index 074b7860f6..ed505cf0c1 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -86,6 +86,7 @@ typedef enum VhostUserRequest { VHOST_USER_CREATE_CRYPTO_SESSION = 26, VHOST_USER_CLOSE_CRYPTO_SESSION = 27, VHOST_USER_POSTCOPY_ADVISE = 28, + VHOST_USER_POSTCOPY_LISTEN = 29, VHOST_USER_MAX } VhostUserRequest; @@ -285,6 +286,7 @@ struct VuDev { /* Postcopy data */ int postcopy_ufd; + bool postcopy_listening; }; typedef struct VuVirtqElement { diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt index 7854e50008..0d24203d31 100644 --- a/docs/interop/vhost-user.txt +++ b/docs/interop/vhost-user.txt @@ -709,6 +709,17 @@ Master message types the slave must open a userfaultfd for later use. Note that at this stage the migration is still in precopy mode. + * VHOST_USER_POSTCOPY_LISTEN + Id: 29 + Master payload: N/A + + Master advises slave that a transition to postcopy mode has happened. + The slave must ensure that shared memory is registered with userfaultfd + to cause faulting of non-present pages. + + This is always sent sometime after a VHOST_USER_POSTCOPY_ADVISE, and + thus only when VHOST_USER_PROTOCOL_F_PAGEFAULT is supported. + Slave message types ------------------- diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 742ff0f90b..06ec03d6e7 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -6,6 +6,9 @@ vhost_region_add_section(const char *name, uint64_t gpa, uint64_t size, uint64_t vhost_region_add_section_abut(const char *name, uint64_t new_size) "%s: 0x%"PRIx64 vhost_section(const char *name, int r) "%s:%d" +# hw/virtio/vhost-user.c +vhost_user_postcopy_listen(void) "" + # hw/virtio/virtio.c virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u" diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 5900583437..c3ab2994fb 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -20,6 +20,7 @@ #include "sysemu/cryptodev.h" #include "migration/migration.h" #include "migration/postcopy-ram.h" +#include "trace.h" #include #include @@ -79,6 +80,7 @@ typedef enum VhostUserRequest { VHOST_USER_CREATE_CRYPTO_SESSION = 26, VHOST_USER_CLOSE_CRYPTO_SESSION = 27, VHOST_USER_POSTCOPY_ADVISE = 28, + VHOST_USER_POSTCOPY_LISTEN = 29, VHOST_USER_MAX } VhostUserRequest; @@ -172,6 +174,8 @@ struct vhost_user { int slave_fd; NotifierWithReturn postcopy_notifier; struct PostCopyFD postcopy_fd; + /* True once we've entered postcopy_listen */ + bool postcopy_listen; }; static bool ioeventfd_enabled(void) @@ -858,6 +862,33 @@ static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) return 0; } +/* + * Called at the switch to postcopy on reception of the 'listen' command. + */ +static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) +{ + struct vhost_user *u = dev->opaque; + int ret; + VhostUserMsg msg = { + .hdr.request = VHOST_USER_POSTCOPY_LISTEN, + .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, + }; + u->postcopy_listen = true; + trace_vhost_user_postcopy_listen(); + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + error_setg(errp, "Failed to send postcopy_listen to vhost"); + return -1; + } + + ret = process_message_reply(dev, &msg); + if (ret) { + error_setg(errp, "Failed to receive reply to postcopy_listen"); + return ret; + } + + return 0; +} + static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, void *opaque) { @@ -880,6 +911,9 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, case POSTCOPY_NOTIFY_INBOUND_ADVISE: return vhost_user_postcopy_advise(dev, pnd->errp); + case POSTCOPY_NOTIFY_INBOUND_LISTEN: + return vhost_user_postcopy_listen(dev, pnd->errp); + default: /* We ignore notifications we don't know */ break; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index f21eef6702..c8ced3470b 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -131,6 +131,7 @@ void postcopy_infrastructure_init(void); enum PostcopyNotifyReason { POSTCOPY_NOTIFY_PROBE = 0, POSTCOPY_NOTIFY_INBOUND_ADVISE, + POSTCOPY_NOTIFY_INBOUND_LISTEN, }; struct PostcopyNotifyData { diff --git a/migration/savevm.c b/migration/savevm.c index 1f2bf12a28..305c3ceaf5 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1618,6 +1618,8 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) { PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING); trace_loadvm_postcopy_handle_listen(); + Error *local_err = NULL; + if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) { error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps); return -1; @@ -1643,6 +1645,11 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) } } + if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) { + error_report_err(local_err); + return -1; + } + if (mis->have_listen_thread) { error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread"); return -1; -- cgit v1.2.3-55-g7522 From 55d754b307f6e10503dcf2e2136e6e0aef8c80f5 Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:07 +0000 Subject: postcopy+vhost-user: Split set_mem_table for postcopy Split the set_mem_table routines in both qemu and libvhost-user because the postcopy versions are going to be quite different once changes in the later patches are added. However, this patch doesn't produce any functional change, just the split. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/libvhost-user/libvhost-user.c | 53 ++++++++++++++++++++++++ hw/virtio/vhost-user.c | 77 ++++++++++++++++++++++++++++++++++- 2 files changed, 128 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index e53b1953df..b2de8ed0a8 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -448,6 +448,55 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg) return false; } +static bool +vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) +{ + int i; + VhostUserMemory *memory = &vmsg->payload.memory; + dev->nregions = memory->nregions; + /* TODO: Postcopy specific code */ + DPRINT("Nregions: %d\n", memory->nregions); + for (i = 0; i < dev->nregions; i++) { + void *mmap_addr; + VhostUserMemoryRegion *msg_region = &memory->regions[i]; + VuDevRegion *dev_region = &dev->regions[i]; + + DPRINT("Region %d\n", i); + DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", + msg_region->guest_phys_addr); + DPRINT(" memory_size: 0x%016"PRIx64"\n", + msg_region->memory_size); + DPRINT(" userspace_addr 0x%016"PRIx64"\n", + msg_region->userspace_addr); + DPRINT(" mmap_offset 0x%016"PRIx64"\n", + msg_region->mmap_offset); + + dev_region->gpa = msg_region->guest_phys_addr; + dev_region->size = msg_region->memory_size; + dev_region->qva = msg_region->userspace_addr; + dev_region->mmap_offset = msg_region->mmap_offset; + + /* We don't use offset argument of mmap() since the + * mapped address has to be page aligned, and we use huge + * pages. */ + mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, + PROT_READ | PROT_WRITE, MAP_SHARED, + vmsg->fds[i], 0); + + if (mmap_addr == MAP_FAILED) { + vu_panic(dev, "region mmap error: %s", strerror(errno)); + } else { + dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; + DPRINT(" mmap_addr: 0x%016"PRIx64"\n", + dev_region->mmap_addr); + } + + close(vmsg->fds[i]); + } + + return false; +} + static bool vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) { @@ -464,6 +513,10 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) } dev->nregions = memory->nregions; + if (dev->postcopy_listening) { + return vu_set_mem_table_exec_postcopy(dev, vmsg); + } + DPRINT("Nregions: %d\n", memory->nregions); for (i = 0; i < dev->nregions; i++) { void *mmap_addr; diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index c3ab2994fb..b6757ebae3 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -340,15 +340,86 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, return 0; } +static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, + struct vhost_memory *mem) +{ + int fds[VHOST_MEMORY_MAX_NREGIONS]; + int i, fd; + size_t fd_num = 0; + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + /* TODO: Add actual postcopy differences */ + VhostUserMsg msg = { + .hdr.request = VHOST_USER_SET_MEM_TABLE, + .hdr.flags = VHOST_USER_VERSION, + }; + + if (reply_supported) { + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; + } + + for (i = 0; i < dev->mem->nregions; ++i) { + struct vhost_memory_region *reg = dev->mem->regions + i; + ram_addr_t offset; + MemoryRegion *mr; + + assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); + mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, + &offset); + fd = memory_region_get_fd(mr); + if (fd > 0) { + msg.payload.memory.regions[fd_num].userspace_addr = + reg->userspace_addr; + msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; + msg.payload.memory.regions[fd_num].guest_phys_addr = + reg->guest_phys_addr; + msg.payload.memory.regions[fd_num].mmap_offset = offset; + assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); + fds[fd_num++] = fd; + } + } + + msg.payload.memory.nregions = fd_num; + + if (!fd_num) { + error_report("Failed initializing vhost-user memory map, " + "consider using -object memory-backend-file share=on"); + return -1; + } + + msg.hdr.size = sizeof(msg.payload.memory.nregions); + msg.hdr.size += sizeof(msg.payload.memory.padding); + msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); + + if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return -1; + } + + if (reply_supported) { + return process_message_reply(dev, &msg); + } + + return 0; +} + static int vhost_user_set_mem_table(struct vhost_dev *dev, struct vhost_memory *mem) { + struct vhost_user *u = dev->opaque; int fds[VHOST_MEMORY_MAX_NREGIONS]; int i, fd; size_t fd_num = 0; + bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; bool reply_supported = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK); + if (do_postcopy) { + /* Postcopy has enough differences that it's best done in it's own + * version + */ + return vhost_user_set_mem_table_postcopy(dev, mem); + } + VhostUserMsg msg = { .hdr.request = VHOST_USER_SET_MEM_TABLE, .hdr.flags = VHOST_USER_VERSION, @@ -372,9 +443,11 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, error_report("Failed preparing vhost-user memory table msg"); return -1; } - msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr; + msg.payload.memory.regions[fd_num].userspace_addr = + reg->userspace_addr; msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; - msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; + msg.payload.memory.regions[fd_num].guest_phys_addr = + reg->guest_phys_addr; msg.payload.memory.regions[fd_num].mmap_offset = offset; fds[fd_num++] = fd; } -- cgit v1.2.3-55-g7522 From 9bb38019942c2f3f44b98f5830e369faec701e55 Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:10 +0000 Subject: vhost+postcopy: Send address back to qemu We need a better way, but at the moment we need the address of the mappings sent back to qemu so it can interpret the messages on the userfaultfd it reads. This is done as a 3 stage set: QEMU -> client set_mem_table mmap stuff, get addresses client -> qemu here are the addresses qemu -> client OK - now you can use them That ensures that qemu has registered the new addresses in it's userfault code before the client starts accessing them. Note: We don't ask for the default 'ack' reply since we've got our own. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/libvhost-user/libvhost-user.c | 24 ++++++++++++- docs/interop/vhost-user.txt | 9 +++++ hw/virtio/trace-events | 1 + hw/virtio/vhost-user.c | 67 +++++++++++++++++++++++++++++++++-- 4 files changed, 98 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 7c8cd5878e..6314549b65 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -491,10 +491,32 @@ vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) dev_region->mmap_addr); } + /* Return the address to QEMU so that it can translate the ufd + * fault addresses back. + */ + msg_region->userspace_addr = (uintptr_t)(mmap_addr + + dev_region->mmap_offset); close(vmsg->fds[i]); } - /* TODO: Get address back to QEMU */ + /* Send the message back to qemu with the addresses filled in */ + vmsg->fd_num = 0; + if (!vu_message_write(dev, dev->sock, vmsg)) { + vu_panic(dev, "failed to respond to set-mem-table for postcopy"); + return false; + } + + /* Wait for QEMU to confirm that it's registered the handler for the + * faults. + */ + if (!vu_message_read(dev, dev->sock, vmsg) || + vmsg->size != sizeof(vmsg->payload.u64) || + vmsg->payload.u64 != 0) { + vu_panic(dev, "failed to receive valid ack for postcopy set-mem-table"); + return false; + } + + /* OK, now we can go and register the memory and generate faults */ for (i = 0; i < dev->nregions; i++) { VuDevRegion *dev_region = &dev->regions[i]; #ifdef UFFDIO_REGISTER diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt index 0d24203d31..e295ef12ca 100644 --- a/docs/interop/vhost-user.txt +++ b/docs/interop/vhost-user.txt @@ -455,12 +455,21 @@ Master message types Id: 5 Equivalent ioctl: VHOST_SET_MEM_TABLE Master payload: memory regions description + Slave payload: (postcopy only) memory regions description Sets the memory map regions on the slave so it can translate the vring addresses. In the ancillary data there is an array of file descriptors for each memory mapped region. The size and ordering of the fds matches the number and ordering of memory regions. + When VHOST_USER_POSTCOPY_LISTEN has been received, SET_MEM_TABLE replies with + the bases of the memory mapped regions to the master. The slave must + have mmap'd the regions but not yet accessed them and should not yet generate + a userfault event. Note NEED_REPLY_MASK is not set in this case. + QEMU will then reply back to the list of mappings with an empty + VHOST_USER_SET_MEM_TABLE as an acknowledgment; only upon reception of this + message may the guest start accessing the memory and generating faults. + * VHOST_USER_SET_LOG_BASE Id: 6 diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 06ec03d6e7..05d18ada77 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -8,6 +8,7 @@ vhost_section(const char *name, int r) "%s:%d" # hw/virtio/vhost-user.c vhost_user_postcopy_listen(void) "" +vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d region %d" # hw/virtio/virtio.c virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index b6757ebae3..1603d70bea 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -174,6 +174,7 @@ struct vhost_user { int slave_fd; NotifierWithReturn postcopy_notifier; struct PostCopyFD postcopy_fd; + uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; /* True once we've entered postcopy_listen */ bool postcopy_listen; }; @@ -343,12 +344,15 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, struct vhost_memory *mem) { + struct vhost_user *u = dev->opaque; int fds[VHOST_MEMORY_MAX_NREGIONS]; int i, fd; size_t fd_num = 0; bool reply_supported = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK); - /* TODO: Add actual postcopy differences */ + VhostUserMsg msg_reply; + int region_i, msg_i; + VhostUserMsg msg = { .hdr.request = VHOST_USER_SET_MEM_TABLE, .hdr.flags = VHOST_USER_VERSION, @@ -395,6 +399,64 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, return -1; } + if (vhost_user_read(dev, &msg_reply) < 0) { + return -1; + } + + if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { + error_report("%s: Received unexpected msg type." + "Expected %d received %d", __func__, + VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); + return -1; + } + /* We're using the same structure, just reusing one of the + * fields, so it should be the same size. + */ + if (msg_reply.hdr.size != msg.hdr.size) { + error_report("%s: Unexpected size for postcopy reply " + "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size); + return -1; + } + + memset(u->postcopy_client_bases, 0, + sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS); + + /* They're in the same order as the regions that were sent + * but some of the regions were skipped (above) if they + * didn't have fd's + */ + for (msg_i = 0, region_i = 0; + region_i < dev->mem->nregions; + region_i++) { + if (msg_i < fd_num && + msg_reply.payload.memory.regions[msg_i].guest_phys_addr == + dev->mem->regions[region_i].guest_phys_addr) { + u->postcopy_client_bases[region_i] = + msg_reply.payload.memory.regions[msg_i].userspace_addr; + trace_vhost_user_set_mem_table_postcopy( + msg_reply.payload.memory.regions[msg_i].userspace_addr, + msg.payload.memory.regions[msg_i].userspace_addr, + msg_i, region_i); + msg_i++; + } + } + if (msg_i != fd_num) { + error_report("%s: postcopy reply not fully consumed " + "%d vs %zd", + __func__, msg_i, fd_num); + return -1; + } + /* Now we've registered this with the postcopy code, we ack to the client, + * because now we're in the position to be able to deal with any faults + * it generates. + */ + /* TODO: Use this for failure cases as well with a bad value */ + msg.hdr.size = sizeof(msg.payload.u64); + msg.payload.u64 = 0; /* OK */ + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } + if (reply_supported) { return process_message_reply(dev, &msg); } @@ -411,7 +473,8 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, size_t fd_num = 0; bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; bool reply_supported = virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_REPLY_ACK); + VHOST_USER_PROTOCOL_F_REPLY_ACK) && + !do_postcopy; if (do_postcopy) { /* Postcopy has enough differences that it's best done in it's own -- cgit v1.2.3-55-g7522 From 905125d0e20395188a090c16d38e38694fc43fc4 Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:11 +0000 Subject: vhost+postcopy: Stash RAMBlock and offset Stash the RAMBlock and offset for later use looking up addresses. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/trace-events | 1 + hw/virtio/vhost-user.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'hw') diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 05d18ada77..d7e9e1084b 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -9,6 +9,7 @@ vhost_section(const char *name, int r) "%s:%d" # hw/virtio/vhost-user.c vhost_user_postcopy_listen(void) "" vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d region %d" +vhost_user_set_mem_table_withfd(int index, const char *name, uint64_t memory_size, uint64_t guest_phys_addr, uint64_t userspace_addr, uint64_t offset) "%d:%s: size:0x%"PRIx64" GPA:0x%"PRIx64" QVA/userspace:0x%"PRIx64" RB offset:0x%"PRIx64 # hw/virtio/virtio.c virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 1603d70bea..b47de62c47 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -175,6 +175,15 @@ struct vhost_user { NotifierWithReturn postcopy_notifier; struct PostCopyFD postcopy_fd; uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; + /* Length of the region_rb and region_rb_offset arrays */ + size_t region_rb_len; + /* RAMBlock associated with a given region */ + RAMBlock **region_rb; + /* The offset from the start of the RAMBlock to the start of the + * vhost region. + */ + ram_addr_t *region_rb_offset; + /* True once we've entered postcopy_listen */ bool postcopy_listen; }; @@ -362,6 +371,17 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; } + if (u->region_rb_len < dev->mem->nregions) { + u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); + u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, + dev->mem->nregions); + memset(&(u->region_rb[u->region_rb_len]), '\0', + sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); + memset(&(u->region_rb_offset[u->region_rb_len]), '\0', + sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); + u->region_rb_len = dev->mem->nregions; + } + for (i = 0; i < dev->mem->nregions; ++i) { struct vhost_memory_region *reg = dev->mem->regions + i; ram_addr_t offset; @@ -372,6 +392,12 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, &offset); fd = memory_region_get_fd(mr); if (fd > 0) { + trace_vhost_user_set_mem_table_withfd(fd_num, mr->name, + reg->memory_size, + reg->guest_phys_addr, + reg->userspace_addr, offset); + u->region_rb_offset[i] = offset; + u->region_rb[i] = mr->ram_block; msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr; msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; @@ -380,6 +406,9 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, msg.payload.memory.regions[fd_num].mmap_offset = offset; assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); fds[fd_num++] = fd; + } else { + u->region_rb_offset[i] = 0; + u->region_rb[i] = NULL; } } @@ -1148,6 +1177,11 @@ static int vhost_user_cleanup(struct vhost_dev *dev) close(u->slave_fd); u->slave_fd = -1; } + g_free(u->region_rb); + u->region_rb = NULL; + g_free(u->region_rb_offset); + u->region_rb_offset = NULL; + u->region_rb_len = 0; g_free(u); dev->opaque = 0; -- cgit v1.2.3-55-g7522 From 375318d03f3f3fca72606eea71240cb6a3536314 Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:13 +0000 Subject: vhost+postcopy: Resolve client address Resolve fault addresses read off the clients UFD into RAMBlock and offset, and call back to the postcopy code to ask for the page. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/trace-events | 3 +++ hw/virtio/vhost-user.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index d7e9e1084b..3afd12cfea 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -7,6 +7,9 @@ vhost_region_add_section_abut(const char *name, uint64_t new_size) "%s: 0x%"PRIx vhost_section(const char *name, int r) "%s:%d" # hw/virtio/vhost-user.c +vhost_user_postcopy_fault_handler(const char *name, uint64_t fault_address, int nregions) "%s: @0x%"PRIx64" nregions:%d" +vhost_user_postcopy_fault_handler_loop(int i, uint64_t client_base, uint64_t size) "%d: client 0x%"PRIx64" +0x%"PRIx64 +vhost_user_postcopy_fault_handler_found(int i, uint64_t region_offset, uint64_t rb_offset) "%d: region_offset: 0x%"PRIx64" rb_offset:0x%"PRIx64 vhost_user_postcopy_listen(void) "" vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d region %d" vhost_user_set_mem_table_withfd(int index, const char *name, uint64_t memory_size, uint64_t guest_phys_addr, uint64_t userspace_addr, uint64_t offset) "%d:%s: size:0x%"PRIx64" GPA:0x%"PRIx64" QVA/userspace:0x%"PRIx64" RB offset:0x%"PRIx64 diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index b47de62c47..6dee1b565c 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -26,6 +26,7 @@ #include #include #include +#include #define VHOST_MEMORY_MAX_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 @@ -974,7 +975,35 @@ out: static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, void *ufd) { - return 0; + struct vhost_dev *dev = pcfd->data; + struct vhost_user *u = dev->opaque; + struct uffd_msg *msg = ufd; + uint64_t faultaddr = msg->arg.pagefault.address; + RAMBlock *rb = NULL; + uint64_t rb_offset; + int i; + + trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, + dev->mem->nregions); + for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { + trace_vhost_user_postcopy_fault_handler_loop(i, + u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); + if (faultaddr >= u->postcopy_client_bases[i]) { + /* Ofset of the fault address in the vhost region */ + uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; + if (region_offset < dev->mem->regions[i].memory_size) { + rb_offset = region_offset + u->region_rb_offset[i]; + trace_vhost_user_postcopy_fault_handler_found(i, + region_offset, rb_offset); + rb = u->region_rb[i]; + return postcopy_request_shared_page(pcfd, rb, faultaddr, + rb_offset); + } + } + } + error_report("%s: Failed to find region for fault %" PRIx64, + __func__, faultaddr); + return -1; } /* -- cgit v1.2.3-55-g7522 From c07e36158fe342e3da8d10c34ba4fc82f28a5d1b Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:16 +0000 Subject: vhost+postcopy: Add vhost waker Register a waker function in vhost-user code to be notified when pages arrive or requests to previously mapped pages get requested. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/trace-events | 3 +++ hw/virtio/vhost-user.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'hw') diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 3afd12cfea..fe5e0ff856 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -13,6 +13,9 @@ vhost_user_postcopy_fault_handler_found(int i, uint64_t region_offset, uint64_t vhost_user_postcopy_listen(void) "" vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d region %d" vhost_user_set_mem_table_withfd(int index, const char *name, uint64_t memory_size, uint64_t guest_phys_addr, uint64_t userspace_addr, uint64_t offset) "%d:%s: size:0x%"PRIx64" GPA:0x%"PRIx64" QVA/userspace:0x%"PRIx64" RB offset:0x%"PRIx64 +vhost_user_postcopy_waker(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64 +vhost_user_postcopy_waker_found(uint64_t client_addr) "0x%"PRIx64 +vhost_user_postcopy_waker_nomatch(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64 # hw/virtio/virtio.c virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 6dee1b565c..a785aefd3e 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1006,6 +1006,35 @@ static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, return -1; } +static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, + uint64_t offset) +{ + struct vhost_dev *dev = pcfd->data; + struct vhost_user *u = dev->opaque; + int i; + + trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); + + if (!u) { + return 0; + } + /* Translate the offset into an address in the clients address space */ + for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { + if (u->region_rb[i] == rb && + offset >= u->region_rb_offset[i] && + offset < (u->region_rb_offset[i] + + dev->mem->regions[i].memory_size)) { + uint64_t client_addr = (offset - u->region_rb_offset[i]) + + u->postcopy_client_bases[i]; + trace_vhost_user_postcopy_waker_found(client_addr); + return postcopy_wake_shared(pcfd, client_addr, rb); + } + } + + trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); + return 0; +} + /* * Called at the start of an inbound postcopy on reception of the * 'advise' command. @@ -1051,6 +1080,7 @@ static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) u->postcopy_fd.fd = ufd; u->postcopy_fd.data = dev; u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; + u->postcopy_fd.waker = vhost_user_postcopy_waker; u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ postcopy_register_shared_ufd(&u->postcopy_fd); return 0; -- cgit v1.2.3-55-g7522 From c639187e3342cb14e100d14ce4854444f7ae98d5 Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:19 +0000 Subject: vhost-user: Add VHOST_USER_POSTCOPY_END message This message is sent just before the end of postcopy to get the client to stop using userfault since we wont respond to any more requests. It should close userfaultfd so that any other pages get mapped to the backing file automatically by the kernel, since at this point we know we've received everything. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/libvhost-user/libvhost-user.c | 23 +++++++++++++++++++++++ contrib/libvhost-user/libvhost-user.h | 1 + docs/interop/vhost-user.txt | 12 ++++++++++++ hw/virtio/vhost-user.c | 1 + 4 files changed, 37 insertions(+) (limited to 'hw') diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 5feed52098..504ff5ea59 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -99,6 +99,7 @@ vu_request_to_string(unsigned int req) REQ(VHOST_USER_SET_CONFIG), REQ(VHOST_USER_POSTCOPY_ADVISE), REQ(VHOST_USER_POSTCOPY_LISTEN), + REQ(VHOST_USER_POSTCOPY_END), REQ(VHOST_USER_MAX), }; #undef REQ @@ -1094,6 +1095,26 @@ vu_set_postcopy_listen(VuDev *dev, VhostUserMsg *vmsg) vmsg->payload.u64 = 0; /* Success */ return true; } + +static bool +vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg) +{ + DPRINT("%s: Entry\n", __func__); + dev->postcopy_listening = false; + if (dev->postcopy_ufd > 0) { + close(dev->postcopy_ufd); + dev->postcopy_ufd = -1; + DPRINT("%s: Done close\n", __func__); + } + + vmsg->fd_num = 0; + vmsg->payload.u64 = 0; + vmsg->size = sizeof(vmsg->payload.u64); + vmsg->flags = VHOST_USER_VERSION | VHOST_USER_REPLY_MASK; + DPRINT("%s: exit\n", __func__); + return true; +} + static bool vu_process_message(VuDev *dev, VhostUserMsg *vmsg) { @@ -1169,6 +1190,8 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) return vu_set_postcopy_advise(dev, vmsg); case VHOST_USER_POSTCOPY_LISTEN: return vu_set_postcopy_listen(dev, vmsg); + case VHOST_USER_POSTCOPY_END: + return vu_set_postcopy_end(dev, vmsg); default: vmsg_close_fds(vmsg); vu_panic(dev, "Unhandled request: %d", vmsg->request); diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index ed505cf0c1..79f7a53ee8 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -87,6 +87,7 @@ typedef enum VhostUserRequest { VHOST_USER_CLOSE_CRYPTO_SESSION = 27, VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_POSTCOPY_LISTEN = 29, + VHOST_USER_POSTCOPY_END = 30, VHOST_USER_MAX } VhostUserRequest; diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt index e295ef12ca..c058c407df 100644 --- a/docs/interop/vhost-user.txt +++ b/docs/interop/vhost-user.txt @@ -729,6 +729,18 @@ Master message types This is always sent sometime after a VHOST_USER_POSTCOPY_ADVISE, and thus only when VHOST_USER_PROTOCOL_F_PAGEFAULT is supported. + * VHOST_USER_POSTCOPY_END + Id: 30 + Slave payload: u64 + + Master advises that postcopy migration has now completed. The + slave must disable the userfaultfd. The response is an acknowledgement + only. + When VHOST_USER_PROTOCOL_F_PAGEFAULT is supported, this message + is sent at the end of the migration, after VHOST_USER_POSTCOPY_LISTEN + was previously sent. + The value returned is an error indication; 0 is success. + Slave message types ------------------- diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index a785aefd3e..230f2f9d55 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -82,6 +82,7 @@ typedef enum VhostUserRequest { VHOST_USER_CLOSE_CRYPTO_SESSION = 27, VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_POSTCOPY_LISTEN = 29, + VHOST_USER_POSTCOPY_END = 30, VHOST_USER_MAX } VhostUserRequest; -- cgit v1.2.3-55-g7522 From 46343570c06e63b4499f619011df80f91349cd49 Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:20 +0000 Subject: vhost+postcopy: Wire up POSTCOPY_END notify Wire up a call to VHOST_USER_POSTCOPY_END message to the vhost clients right before we ask the listener thread to shutdown. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/trace-events | 2 ++ hw/virtio/vhost-user.c | 34 ++++++++++++++++++++++++++++++++++ migration/postcopy-ram.c | 7 +++++++ migration/postcopy-ram.h | 1 + 4 files changed, 44 insertions(+) (limited to 'hw') diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index fe5e0ff856..857c495e65 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -7,6 +7,8 @@ vhost_region_add_section_abut(const char *name, uint64_t new_size) "%s: 0x%"PRIx vhost_section(const char *name, int r) "%s:%d" # hw/virtio/vhost-user.c +vhost_user_postcopy_end_entry(void) "" +vhost_user_postcopy_end_exit(void) "" vhost_user_postcopy_fault_handler(const char *name, uint64_t fault_address, int nregions) "%s: @0x%"PRIx64" nregions:%d" vhost_user_postcopy_fault_handler_loop(int i, uint64_t client_base, uint64_t size) "%d: client 0x%"PRIx64" +0x%"PRIx64 vhost_user_postcopy_fault_handler_found(int i, uint64_t region_offset, uint64_t rb_offset) "%d: region_offset: 0x%"PRIx64" rb_offset:0x%"PRIx64 diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 230f2f9d55..44aea5c0a8 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1114,6 +1114,37 @@ static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) return 0; } +/* + * Called at the end of postcopy + */ +static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) +{ + VhostUserMsg msg = { + .hdr.request = VHOST_USER_POSTCOPY_END, + .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, + }; + int ret; + struct vhost_user *u = dev->opaque; + + trace_vhost_user_postcopy_end_entry(); + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + error_setg(errp, "Failed to send postcopy_end to vhost"); + return -1; + } + + ret = process_message_reply(dev, &msg); + if (ret) { + error_setg(errp, "Failed to receive reply to postcopy_end"); + return ret; + } + postcopy_unregister_shared_ufd(&u->postcopy_fd); + u->postcopy_fd.handler = NULL; + + trace_vhost_user_postcopy_end_exit(); + + return 0; +} + static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, void *opaque) { @@ -1139,6 +1170,9 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, case POSTCOPY_NOTIFY_INBOUND_LISTEN: return vhost_user_postcopy_listen(dev, pnd->errp); + case POSTCOPY_NOTIFY_INBOUND_END: + return vhost_user_postcopy_end(dev, pnd->errp); + default: /* We ignore notifications we don't know */ break; diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 6894399d24..7d4396fe95 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -413,6 +413,13 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) trace_postcopy_ram_incoming_cleanup_entry(); if (mis->have_fault_thread) { + Error *local_err = NULL; + + if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_END, &local_err)) { + error_report_err(local_err); + return -1; + } + if (qemu_ram_foreach_block(cleanup_range, mis)) { return -1; } diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 2c73d77a5c..d900d9c34f 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -132,6 +132,7 @@ enum PostcopyNotifyReason { POSTCOPY_NOTIFY_PROBE = 0, POSTCOPY_NOTIFY_INBOUND_ADVISE, POSTCOPY_NOTIFY_INBOUND_LISTEN, + POSTCOPY_NOTIFY_INBOUND_END, }; struct PostcopyNotifyData { -- cgit v1.2.3-55-g7522 From c1ece84e7c930a12eccff56c7482e4068ec00eb7 Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:21 +0000 Subject: vhost: Huge page align and merge Align RAMBlocks to page size alignment, and adjust the merging code to deal with partial overlap due to that alignment. This is needed for postcopy so that we can place/fetch whole hugepages when under userfault. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/trace-events | 3 ++- hw/virtio/vhost.c | 66 ++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 58 insertions(+), 11 deletions(-) (limited to 'hw') diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 857c495e65..1422ff03ab 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -3,7 +3,8 @@ # hw/virtio/vhost.c vhost_commit(bool started, bool changed) "Started: %d Changed: %d" vhost_region_add_section(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 -vhost_region_add_section_abut(const char *name, uint64_t new_size) "%s: 0x%"PRIx64 +vhost_region_add_section_merge(const char *name, uint64_t new_size, uint64_t gpa, uint64_t owr) "%s: size: 0x%"PRIx64 " gpa: 0x%"PRIx64 " owr: 0x%"PRIx64 +vhost_region_add_section_aligned(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 vhost_section(const char *name, int r) "%s:%d" # hw/virtio/vhost-user.c diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index d8d0ef92e1..250f886acb 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -522,10 +522,28 @@ static void vhost_region_add_section(struct vhost_dev *dev, uint64_t mrs_gpa = section->offset_within_address_space; uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + section->offset_within_region; + RAMBlock *mrs_rb = section->mr->ram_block; + size_t mrs_page = qemu_ram_pagesize(mrs_rb); trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, mrs_host); + /* Round the section to it's page size */ + /* First align the start down to a page boundary */ + uint64_t alignage = mrs_host & (mrs_page - 1); + if (alignage) { + mrs_host -= alignage; + mrs_size += alignage; + mrs_gpa -= alignage; + } + /* Now align the size up to a page boundary */ + alignage = mrs_size & (mrs_page - 1); + if (alignage) { + mrs_size += mrs_page - alignage; + } + trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, + mrs_host); + if (dev->n_tmp_sections) { /* Since we already have at least one section, lets see if * this extends it; since we're scanning in order, we only @@ -542,18 +560,46 @@ static void vhost_region_add_section(struct vhost_dev *dev, prev_sec->offset_within_region; uint64_t prev_host_end = range_get_last(prev_host_start, prev_size); - if (prev_gpa_end + 1 == mrs_gpa && - prev_host_end + 1 == mrs_host && - section->mr == prev_sec->mr && - (!dev->vhost_ops->vhost_backend_can_merge || - dev->vhost_ops->vhost_backend_can_merge(dev, + if (mrs_gpa <= (prev_gpa_end + 1)) { + /* OK, looks like overlapping/intersecting - it's possible that + * the rounding to page sizes has made them overlap, but they should + * match up in the same RAMBlock if they do. + */ + if (mrs_gpa < prev_gpa_start) { + error_report("%s:Section rounded to %"PRIx64 + " prior to previous %"PRIx64, + __func__, mrs_gpa, prev_gpa_start); + /* A way to cleanly fail here would be better */ + return; + } + /* Offset from the start of the previous GPA to this GPA */ + size_t offset = mrs_gpa - prev_gpa_start; + + if (prev_host_start + offset == mrs_host && + section->mr == prev_sec->mr && + (!dev->vhost_ops->vhost_backend_can_merge || + dev->vhost_ops->vhost_backend_can_merge(dev, mrs_host, mrs_size, prev_host_start, prev_size))) { - /* The two sections abut */ - need_add = false; - prev_sec->size = int128_add(prev_sec->size, section->size); - trace_vhost_region_add_section_abut(section->mr->name, - mrs_size + prev_size); + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size); + need_add = false; + prev_sec->offset_within_address_space = + MIN(prev_gpa_start, mrs_gpa); + prev_sec->offset_within_region = + MIN(prev_host_start, mrs_host) - + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr); + prev_sec->size = int128_make64(max_end - MIN(prev_host_start, + mrs_host)); + trace_vhost_region_add_section_merge(section->mr->name, + int128_get64(prev_sec->size), + prev_sec->offset_within_address_space, + prev_sec->offset_within_region); + } else { + error_report("%s: Overlapping but not coherent sections " + "at %"PRIx64, + __func__, mrs_gpa); + return; + } } } -- cgit v1.2.3-55-g7522