diff options
author | Peter Maydell | 2018-03-20 16:48:34 +0100 |
---|---|---|
committer | Peter Maydell | 2018-03-20 16:48:34 +0100 |
commit | ed627b2ad37469eeba9e9ed5fecfe315df9ecc60 (patch) | |
tree | aca1c6bddbaa61ffe2d029b123539fe20e6ecddc /hw | |
parent | Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.12-pull-request'... (diff) | |
parent | postcopy shared docs (diff) | |
download | qemu-ed627b2ad37469eeba9e9ed5fecfe315df9ecc60.tar.gz qemu-ed627b2ad37469eeba9e9ed5fecfe315df9ecc60.tar.xz qemu-ed627b2ad37469eeba9e9ed5fecfe315df9ecc60.zip |
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio,vhost,pci,pc: features, cleanups
SRAT tables for DIMM devices
new virtio net flags for speed/duplex
post-copy migration support in vhost
cleanups in pci
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
# gpg: Signature made Tue 20 Mar 2018 14:40:43 GMT
# gpg: using RSA key 281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg: aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67
# Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469
* remotes/mst/tags/for_upstream: (51 commits)
postcopy shared docs
libvhost-user: Claim support for postcopy
postcopy: Allow shared memory
vhost: Huge page align and merge
vhost+postcopy: Wire up POSTCOPY_END notify
vhost-user: Add VHOST_USER_POSTCOPY_END message
libvhost-user: mprotect & madvises for postcopy
vhost+postcopy: Call wakeups
vhost+postcopy: Add vhost waker
postcopy: postcopy_notify_shared_wake
postcopy: helper for waking shared
vhost+postcopy: Resolve client address
postcopy-ram: add a stub for postcopy_request_shared_page
vhost+postcopy: Helper to send requests to source for shared pages
vhost+postcopy: Stash RAMBlock and offset
vhost+postcopy: Send address back to qemu
libvhost-user+postcopy: Register new regions with the ufd
migration/ram: ramblock_recv_bitmap_test_byte_offset
postcopy+vhost-user: Split set_mem_table for postcopy
vhost+postcopy: Transmit 'listen' to slave
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
# Conflicts:
# scripts/update-linux-headers.sh
Diffstat (limited to 'hw')
-rw-r--r-- | hw/acpi/aml-build.c | 140 | ||||
-rw-r--r-- | hw/arm/virt-acpi-build.c | 39 | ||||
-rw-r--r-- | hw/i386/acpi-build.c | 252 | ||||
-rw-r--r-- | hw/isa/apm.c | 1 | ||||
-rw-r--r-- | hw/mem/pc-dimm.c | 91 | ||||
-rw-r--r-- | hw/net/virtio-net.c | 81 | ||||
-rw-r--r-- | hw/pci/pci.c | 14 | ||||
-rw-r--r-- | hw/ppc/spapr.c | 3 | ||||
-rw-r--r-- | hw/virtio/trace-events | 16 | ||||
-rw-r--r-- | hw/virtio/vhost-user.c | 411 | ||||
-rw-r--r-- | hw/virtio/vhost.c | 66 |
11 files changed, 855 insertions, 259 deletions
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 36a6cc450e..3fa557cea1 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -258,6 +258,22 @@ static void build_append_int(GArray *table, uint64_t value) } } +/* Generic Address Structure (GAS) + * ACPI 2.0/3.0: 5.2.3.1 Generic Address Structure + * 2.0 compat note: + * @access_width must be 0, see ACPI 2.0:Table 5-1 + */ +void build_append_gas(GArray *table, AmlAddressSpace as, + uint8_t bit_width, uint8_t bit_offset, + uint8_t access_width, uint64_t address) +{ + build_append_int_noprefix(table, as, 1); + build_append_int_noprefix(table, bit_width, 1); + build_append_int_noprefix(table, bit_offset, 1); + build_append_int_noprefix(table, access_width, 1); + build_append_int_noprefix(table, address, 8); +} + /* * Build NAME(XXXX, 0x00000000) where 0x00000000 is encoded as a dword, * and return the offset to 0x00000000 for runtime patching. @@ -1662,3 +1678,127 @@ void build_slit(GArray *table_data, BIOSLinker *linker) "SLIT", table_data->len - slit_start, 1, NULL, NULL); } + +/* build rev1/rev3/rev5.1 FADT */ +void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, + const char *oem_id, const char *oem_table_id) +{ + int off; + int fadt_start = tbl->len; + + acpi_data_push(tbl, sizeof(AcpiTableHeader)); + + /* FACS address to be filled by Guest linker at runtime */ + off = tbl->len; + build_append_int_noprefix(tbl, 0, 4); /* FIRMWARE_CTRL */ + if (f->facs_tbl_offset) { /* don't patch if not supported by platform */ + bios_linker_loader_add_pointer(linker, + ACPI_BUILD_TABLE_FILE, off, 4, + ACPI_BUILD_TABLE_FILE, *f->facs_tbl_offset); + } + + /* DSDT address to be filled by Guest linker at runtime */ + off = tbl->len; + build_append_int_noprefix(tbl, 0, 4); /* DSDT */ + if (f->dsdt_tbl_offset) { /* don't patch if not supported by platform */ + bios_linker_loader_add_pointer(linker, + ACPI_BUILD_TABLE_FILE, off, 4, + ACPI_BUILD_TABLE_FILE, *f->dsdt_tbl_offset); + } + + /* ACPI1.0: INT_MODEL, ACPI2.0+: Reserved */ + build_append_int_noprefix(tbl, f->int_model /* Multiple APIC */, 1); + /* Preferred_PM_Profile */ + build_append_int_noprefix(tbl, 0 /* Unspecified */, 1); + build_append_int_noprefix(tbl, f->sci_int, 2); /* SCI_INT */ + build_append_int_noprefix(tbl, f->smi_cmd, 4); /* SMI_CMD */ + build_append_int_noprefix(tbl, f->acpi_enable_cmd, 1); /* ACPI_ENABLE */ + build_append_int_noprefix(tbl, f->acpi_disable_cmd, 1); /* ACPI_DISABLE */ + build_append_int_noprefix(tbl, 0 /* not supported */, 1); /* S4BIOS_REQ */ + /* ACPI1.0: Reserved, ACPI2.0+: PSTATE_CNT */ + build_append_int_noprefix(tbl, 0, 1); + build_append_int_noprefix(tbl, f->pm1a_evt.address, 4); /* PM1a_EVT_BLK */ + build_append_int_noprefix(tbl, 0, 4); /* PM1b_EVT_BLK */ + build_append_int_noprefix(tbl, f->pm1a_cnt.address, 4); /* PM1a_CNT_BLK */ + build_append_int_noprefix(tbl, 0, 4); /* PM1b_CNT_BLK */ + build_append_int_noprefix(tbl, 0, 4); /* PM2_CNT_BLK */ + build_append_int_noprefix(tbl, f->pm_tmr.address, 4); /* PM_TMR_BLK */ + build_append_int_noprefix(tbl, f->gpe0_blk.address, 4); /* GPE0_BLK */ + build_append_int_noprefix(tbl, 0, 4); /* GPE1_BLK */ + /* PM1_EVT_LEN */ + build_append_int_noprefix(tbl, f->pm1a_evt.bit_width / 8, 1); + /* PM1_CNT_LEN */ + build_append_int_noprefix(tbl, f->pm1a_cnt.bit_width / 8, 1); + build_append_int_noprefix(tbl, 0, 1); /* PM2_CNT_LEN */ + build_append_int_noprefix(tbl, f->pm_tmr.bit_width / 8, 1); /* PM_TMR_LEN */ + /* GPE0_BLK_LEN */ + build_append_int_noprefix(tbl, f->gpe0_blk.bit_width / 8, 1); + build_append_int_noprefix(tbl, 0, 1); /* GPE1_BLK_LEN */ + build_append_int_noprefix(tbl, 0, 1); /* GPE1_BASE */ + build_append_int_noprefix(tbl, 0, 1); /* CST_CNT */ + build_append_int_noprefix(tbl, f->plvl2_lat, 2); /* P_LVL2_LAT */ + build_append_int_noprefix(tbl, f->plvl3_lat, 2); /* P_LVL3_LAT */ + build_append_int_noprefix(tbl, 0, 2); /* FLUSH_SIZE */ + build_append_int_noprefix(tbl, 0, 2); /* FLUSH_STRIDE */ + build_append_int_noprefix(tbl, 0, 1); /* DUTY_OFFSET */ + build_append_int_noprefix(tbl, 0, 1); /* DUTY_WIDTH */ + build_append_int_noprefix(tbl, 0, 1); /* DAY_ALRM */ + build_append_int_noprefix(tbl, 0, 1); /* MON_ALRM */ + build_append_int_noprefix(tbl, f->rtc_century, 1); /* CENTURY */ + build_append_int_noprefix(tbl, 0, 2); /* IAPC_BOOT_ARCH */ + build_append_int_noprefix(tbl, 0, 1); /* Reserved */ + build_append_int_noprefix(tbl, f->flags, 4); /* Flags */ + + if (f->rev == 1) { + goto build_hdr; + } + + build_append_gas_from_struct(tbl, &f->reset_reg); /* RESET_REG */ + build_append_int_noprefix(tbl, f->reset_val, 1); /* RESET_VALUE */ + /* Since ACPI 5.1 */ + if ((f->rev >= 6) || ((f->rev == 5) && f->minor_ver > 0)) { + build_append_int_noprefix(tbl, f->arm_boot_arch, 2); /* ARM_BOOT_ARCH */ + /* FADT Minor Version */ + build_append_int_noprefix(tbl, f->minor_ver, 1); + } else { + build_append_int_noprefix(tbl, 0, 3); /* Reserved upto ACPI 5.0 */ + } + build_append_int_noprefix(tbl, 0, 8); /* X_FIRMWARE_CTRL */ + + /* XDSDT address to be filled by Guest linker at runtime */ + off = tbl->len; + build_append_int_noprefix(tbl, 0, 8); /* X_DSDT */ + if (f->xdsdt_tbl_offset) { + bios_linker_loader_add_pointer(linker, + ACPI_BUILD_TABLE_FILE, off, 8, + ACPI_BUILD_TABLE_FILE, *f->xdsdt_tbl_offset); + } + + build_append_gas_from_struct(tbl, &f->pm1a_evt); /* X_PM1a_EVT_BLK */ + /* X_PM1b_EVT_BLK */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + build_append_gas_from_struct(tbl, &f->pm1a_cnt); /* X_PM1a_CNT_BLK */ + /* X_PM1b_CNT_BLK */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + /* X_PM2_CNT_BLK */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + build_append_gas_from_struct(tbl, &f->pm_tmr); /* X_PM_TMR_BLK */ + build_append_gas_from_struct(tbl, &f->gpe0_blk); /* X_GPE0_BLK */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); /* X_GPE1_BLK */ + + if (f->rev <= 4) { + goto build_hdr; + } + + /* SLEEP_CONTROL_REG */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + /* SLEEP_STATUS_REG */ + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + + /* TODO: extra fields need to be added to support revisions above rev5 */ + assert(f->rev == 5); + +build_hdr: + build_header(linker, tbl, (void *)(tbl->data + fadt_start), + "FACP", tbl->len - fadt_start, f->rev, oem_id, oem_table_id); +} diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index f7fa795278..c7c6a57ec5 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -651,42 +651,33 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) } /* FADT */ -static void build_fadt(GArray *table_data, BIOSLinker *linker, - VirtMachineState *vms, unsigned dsdt_tbl_offset) +static void build_fadt_rev5(GArray *table_data, BIOSLinker *linker, + VirtMachineState *vms, unsigned dsdt_tbl_offset) { - int fadt_start = table_data->len; - AcpiFadtDescriptorRev5_1 *fadt = acpi_data_push(table_data, sizeof(*fadt)); - unsigned xdsdt_entry_offset = (char *)&fadt->x_dsdt - table_data->data; - uint16_t bootflags; + /* ACPI v5.1 */ + AcpiFadtData fadt = { + .rev = 5, + .minor_ver = 1, + .flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI, + .xdsdt_tbl_offset = &dsdt_tbl_offset, + }; switch (vms->psci_conduit) { case QEMU_PSCI_CONDUIT_DISABLED: - bootflags = 0; + fadt.arm_boot_arch = 0; break; case QEMU_PSCI_CONDUIT_HVC: - bootflags = ACPI_FADT_ARM_PSCI_COMPLIANT | ACPI_FADT_ARM_PSCI_USE_HVC; + fadt.arm_boot_arch = ACPI_FADT_ARM_PSCI_COMPLIANT | + ACPI_FADT_ARM_PSCI_USE_HVC; break; case QEMU_PSCI_CONDUIT_SMC: - bootflags = ACPI_FADT_ARM_PSCI_COMPLIANT; + fadt.arm_boot_arch = ACPI_FADT_ARM_PSCI_COMPLIANT; break; default: g_assert_not_reached(); } - /* Hardware Reduced = 1 and use PSCI 0.2+ */ - fadt->flags = cpu_to_le32(1 << ACPI_FADT_F_HW_REDUCED_ACPI); - fadt->arm_boot_flags = cpu_to_le16(bootflags); - - /* ACPI v5.1 (fadt->revision.fadt->minor_revision) */ - fadt->minor_revision = 0x1; - - /* DSDT address to be filled by Guest linker */ - bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, xdsdt_entry_offset, sizeof(fadt->x_dsdt), - ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset); - - build_header(linker, table_data, (void *)(table_data->data + fadt_start), - "FACP", table_data->len - fadt_start, 5, NULL, NULL); + build_fadt(table_data, linker, &fadt, NULL, NULL); } /* DSDT */ @@ -761,7 +752,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) /* FADT MADT GTDT MCFG SPCR pointed to by RSDT */ acpi_add_table(table_offsets, tables_blob); - build_fadt(tables_blob, tables->linker, vms, dsdt); + build_fadt_rev5(tables_blob, tables->linker, vms, dsdt); acpi_add_table(table_offsets, tables_blob); build_madt(tables_blob, tables->linker, vms); diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index a66fb2dcd2..3cf2a1679c 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -91,17 +91,11 @@ typedef struct AcpiMcfgInfo { } AcpiMcfgInfo; typedef struct AcpiPmInfo { - bool force_rev1_fadt; bool s3_disabled; bool s4_disabled; bool pcihp_bridge_en; uint8_t s4_val; - uint16_t sci_int; - uint8_t acpi_enable_cmd; - uint8_t acpi_disable_cmd; - uint32_t gpe0_blk; - uint32_t gpe0_blk_len; - uint32_t io_base; + AcpiFadtData fadt; uint16_t cpu_hp_io_base; uint16_t pcihp_io_base; uint16_t pcihp_io_len; @@ -124,21 +118,59 @@ typedef struct AcpiBuildPciBusHotplugState { bool pcihp_bridge_en; } AcpiBuildPciBusHotplugState; +static void init_common_fadt_data(Object *o, AcpiFadtData *data) +{ + uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL); + AmlAddressSpace as = AML_AS_SYSTEM_IO; + AcpiFadtData fadt = { + .rev = 3, + .flags = + (1 << ACPI_FADT_F_WBINVD) | + (1 << ACPI_FADT_F_PROC_C1) | + (1 << ACPI_FADT_F_SLP_BUTTON) | + (1 << ACPI_FADT_F_RTC_S4) | + (1 << ACPI_FADT_F_USE_PLATFORM_CLOCK) | + /* APIC destination mode ("Flat Logical") has an upper limit of 8 + * CPUs for more than 8 CPUs, "Clustered Logical" mode has to be + * used + */ + ((max_cpus > 8) ? (1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL) : 0), + .int_model = 1 /* Multiple APIC */, + .rtc_century = RTC_CENTURY, + .plvl2_lat = 0xfff /* C2 state not supported */, + .plvl3_lat = 0xfff /* C3 state not supported */, + .smi_cmd = ACPI_PORT_SMI_CMD, + .sci_int = object_property_get_uint(o, ACPI_PM_PROP_SCI_INT, NULL), + .acpi_enable_cmd = + object_property_get_uint(o, ACPI_PM_PROP_ACPI_ENABLE_CMD, NULL), + .acpi_disable_cmd = + object_property_get_uint(o, ACPI_PM_PROP_ACPI_DISABLE_CMD, NULL), + .pm1a_evt = { .space_id = as, .bit_width = 4 * 8, .address = io }, + .pm1a_cnt = { .space_id = as, .bit_width = 2 * 8, + .address = io + 0x04 }, + .pm_tmr = { .space_id = as, .bit_width = 4 * 8, .address = io + 0x08 }, + .gpe0_blk = { .space_id = as, .bit_width = + object_property_get_uint(o, ACPI_PM_PROP_GPE0_BLK_LEN, NULL) * 8, + .address = object_property_get_uint(o, ACPI_PM_PROP_GPE0_BLK, NULL) + }, + }; + *data = fadt; +} + static void acpi_get_pm_info(AcpiPmInfo *pm) { Object *piix = piix4_pm_find(); Object *lpc = ich9_lpc_find(); - Object *obj = NULL; + Object *obj = piix ? piix : lpc; QObject *o; - - pm->force_rev1_fadt = false; pm->cpu_hp_io_base = 0; pm->pcihp_io_base = 0; pm->pcihp_io_len = 0; + + init_common_fadt_data(obj, &pm->fadt); if (piix) { /* w2k requires FADT(rev1) or it won't boot, keep PC compatible */ - pm->force_rev1_fadt = true; - obj = piix; + pm->fadt.rev = 1; pm->cpu_hp_io_base = PIIX4_CPU_HOTPLUG_IO_BASE; pm->pcihp_io_base = object_property_get_uint(obj, ACPI_PCIHP_IO_BASE_PROP, NULL); @@ -146,11 +178,19 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) object_property_get_uint(obj, ACPI_PCIHP_IO_LEN_PROP, NULL); } if (lpc) { - obj = lpc; + struct AcpiGenericAddress r = { .space_id = AML_AS_SYSTEM_IO, + .bit_width = 8, .address = ICH9_RST_CNT_IOPORT }; + pm->fadt.reset_reg = r; + pm->fadt.reset_val = 0xf; + pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; } assert(obj); + /* The above need not be conditional on machine type because the reset port + * happens to be the same on PIIX (pc) and ICH9 (q35). */ + QEMU_BUILD_BUG_ON(ICH9_RST_CNT_IOPORT != RCR_IOPORT); + /* Fill in optional s3/s4 related properties */ o = object_property_get_qobject(obj, ACPI_PM_PROP_S3_DISABLED, NULL); if (o) { @@ -174,22 +214,6 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) } qobject_decref(o); - /* Fill in mandatory properties */ - pm->sci_int = object_property_get_uint(obj, ACPI_PM_PROP_SCI_INT, NULL); - - pm->acpi_enable_cmd = object_property_get_uint(obj, - ACPI_PM_PROP_ACPI_ENABLE_CMD, - NULL); - pm->acpi_disable_cmd = - object_property_get_uint(obj, - ACPI_PM_PROP_ACPI_DISABLE_CMD, - NULL); - pm->io_base = object_property_get_uint(obj, ACPI_PM_PROP_PM_IO_BASE, - NULL); - pm->gpe0_blk = object_property_get_uint(obj, ACPI_PM_PROP_GPE0_BLK, - NULL); - pm->gpe0_blk_len = object_property_get_uint(obj, ACPI_PM_PROP_GPE0_BLK_LEN, - NULL); pm->pcihp_bridge_en = object_property_get_bool(obj, "acpi-pci-hotplug-with-bridge-support", NULL); @@ -257,8 +281,6 @@ static void acpi_get_pci_holes(Range *hole, Range *hole64) NULL)); } -#define ACPI_PORT_SMI_CMD 0x00b2 /* TODO: this is APM_CNT_IOPORT */ - static void acpi_align_size(GArray *blob, unsigned align) { /* Align size to multiple of given size. This reduces the chance @@ -276,106 +298,6 @@ build_facs(GArray *table_data, BIOSLinker *linker) facs->length = cpu_to_le32(sizeof(*facs)); } -/* Load chipset information in FADT */ -static void fadt_setup(AcpiFadtDescriptorRev3 *fadt, AcpiPmInfo *pm) -{ - fadt->model = 1; - fadt->reserved1 = 0; - fadt->sci_int = cpu_to_le16(pm->sci_int); - fadt->smi_cmd = cpu_to_le32(ACPI_PORT_SMI_CMD); - fadt->acpi_enable = pm->acpi_enable_cmd; - fadt->acpi_disable = pm->acpi_disable_cmd; - /* EVT, CNT, TMR offset matches hw/acpi/core.c */ - fadt->pm1a_evt_blk = cpu_to_le32(pm->io_base); - fadt->pm1a_cnt_blk = cpu_to_le32(pm->io_base + 0x04); - fadt->pm_tmr_blk = cpu_to_le32(pm->io_base + 0x08); - fadt->gpe0_blk = cpu_to_le32(pm->gpe0_blk); - /* EVT, CNT, TMR length matches hw/acpi/core.c */ - fadt->pm1_evt_len = 4; - fadt->pm1_cnt_len = 2; - fadt->pm_tmr_len = 4; - fadt->gpe0_blk_len = pm->gpe0_blk_len; - fadt->plvl2_lat = cpu_to_le16(0xfff); /* C2 state not supported */ - fadt->plvl3_lat = cpu_to_le16(0xfff); /* C3 state not supported */ - fadt->flags = cpu_to_le32((1 << ACPI_FADT_F_WBINVD) | - (1 << ACPI_FADT_F_PROC_C1) | - (1 << ACPI_FADT_F_SLP_BUTTON) | - (1 << ACPI_FADT_F_RTC_S4)); - fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_USE_PLATFORM_CLOCK); - /* APIC destination mode ("Flat Logical") has an upper limit of 8 CPUs - * For more than 8 CPUs, "Clustered Logical" mode has to be used - */ - if (max_cpus > 8) { - fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL); - } - fadt->century = RTC_CENTURY; - if (pm->force_rev1_fadt) { - return; - } - - fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_RESET_REG_SUP); - fadt->reset_value = 0xf; - fadt->reset_register.space_id = AML_SYSTEM_IO; - fadt->reset_register.bit_width = 8; - fadt->reset_register.address = cpu_to_le64(ICH9_RST_CNT_IOPORT); - /* The above need not be conditional on machine type because the reset port - * happens to be the same on PIIX (pc) and ICH9 (q35). */ - QEMU_BUILD_BUG_ON(ICH9_RST_CNT_IOPORT != RCR_IOPORT); - - fadt->xpm1a_event_block.space_id = AML_SYSTEM_IO; - fadt->xpm1a_event_block.bit_width = fadt->pm1_evt_len * 8; - fadt->xpm1a_event_block.address = cpu_to_le64(pm->io_base); - - fadt->xpm1a_control_block.space_id = AML_SYSTEM_IO; - fadt->xpm1a_control_block.bit_width = fadt->pm1_cnt_len * 8; - fadt->xpm1a_control_block.address = cpu_to_le64(pm->io_base + 0x4); - - fadt->xpm_timer_block.space_id = AML_SYSTEM_IO; - fadt->xpm_timer_block.bit_width = fadt->pm_tmr_len * 8; - fadt->xpm_timer_block.address = cpu_to_le64(pm->io_base + 0x8); - - fadt->xgpe0_block.space_id = AML_SYSTEM_IO; - fadt->xgpe0_block.bit_width = pm->gpe0_blk_len * 8; - fadt->xgpe0_block.address = cpu_to_le64(pm->gpe0_blk); -} - - -/* FADT */ -static void -build_fadt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm, - unsigned facs_tbl_offset, unsigned dsdt_tbl_offset, - const char *oem_id, const char *oem_table_id) -{ - AcpiFadtDescriptorRev3 *fadt = acpi_data_push(table_data, sizeof(*fadt)); - unsigned fw_ctrl_offset = (char *)&fadt->firmware_ctrl - table_data->data; - unsigned dsdt_entry_offset = (char *)&fadt->dsdt - table_data->data; - unsigned xdsdt_entry_offset = (char *)&fadt->x_dsdt - table_data->data; - int fadt_size = sizeof(*fadt); - int rev = 3; - - /* FACS address to be filled by Guest linker */ - bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, fw_ctrl_offset, sizeof(fadt->firmware_ctrl), - ACPI_BUILD_TABLE_FILE, facs_tbl_offset); - - /* DSDT address to be filled by Guest linker */ - fadt_setup(fadt, pm); - bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, dsdt_entry_offset, sizeof(fadt->dsdt), - ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset); - if (pm->force_rev1_fadt) { - rev = 1; - fadt_size = offsetof(typeof(*fadt), reset_register); - } else { - bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, xdsdt_entry_offset, sizeof(fadt->x_dsdt), - ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset); - } - - build_header(linker, table_data, - (void *)fadt, "FACP", fadt_size, rev, oem_id, oem_table_id); -} - void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, const CPUArchIdList *apic_ids, GArray *entry) { @@ -2053,7 +1975,12 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); crs = aml_resource_template(); aml_append(crs, - aml_io(AML_DECODE16, pm->gpe0_blk, pm->gpe0_blk, 1, pm->gpe0_blk_len) + aml_io( + AML_DECODE16, + pm->fadt.gpe0_blk.address, + pm->fadt.gpe0_blk.address, + 1, + pm->fadt.gpe0_blk.bit_width / 8) ); aml_append(dev, aml_name_decl("_CRS", crs)); aml_append(scope, dev); @@ -2323,6 +2250,55 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) #define HOLE_640K_START (640 * 1024) #define HOLE_640K_END (1024 * 1024) +static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base, + uint64_t len, int default_node) +{ + MemoryDeviceInfoList *info_list = qmp_pc_dimm_device_list(); + MemoryDeviceInfoList *info; + MemoryDeviceInfo *mi; + PCDIMMDeviceInfo *di; + uint64_t end = base + len, cur, size; + bool is_nvdimm; + AcpiSratMemoryAffinity *numamem; + MemoryAffinityFlags flags; + + for (cur = base, info = info_list; + cur < end; + cur += size, info = info->next) { + numamem = acpi_data_push(table_data, sizeof *numamem); + + if (!info) { + build_srat_memory(numamem, cur, end - cur, default_node, + MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); + break; + } + + mi = info->value; + is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM); + di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data; + + if (cur < di->addr) { + build_srat_memory(numamem, cur, di->addr - cur, default_node, + MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); + numamem = acpi_data_push(table_data, sizeof *numamem); + } + + size = di->size; + + flags = MEM_AFFINITY_ENABLED; + if (di->hotpluggable) { + flags |= MEM_AFFINITY_HOTPLUGGABLE; + } + if (is_nvdimm) { + flags |= MEM_AFFINITY_NON_VOLATILE; + } + + build_srat_memory(numamem, di->addr, size, di->node, flags); + } + + qapi_free_MemoryDeviceInfoList(info_list); +} + static void build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) { @@ -2434,10 +2410,9 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) * providing _PXM method if necessary. */ if (hotplugabble_address_space_size) { - numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, pcms->hotplug_memory.base, - hotplugabble_address_space_size, pcms->numa_nodes - 1, - MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); + build_srat_hotpluggable_memory(table_data, pcms->hotplug_memory.base, + hotplugabble_address_space_size, + pcms->numa_nodes - 1); } build_header(linker, table_data, @@ -2700,7 +2675,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) /* ACPI tables pointed to by RSDT */ fadt = tables_blob->len; acpi_add_table(table_offsets, tables_blob); - build_fadt(tables_blob, tables->linker, &pm, facs, dsdt, + pm.fadt.facs_tbl_offset = &facs; + pm.fadt.dsdt_tbl_offset = &dsdt; + pm.fadt.xdsdt_tbl_offset = &dsdt; + build_fadt(tables_blob, tables->linker, &pm.fadt, slic_oem.id, slic_oem.table_id); aml_len += tables_blob->len - fadt; diff --git a/hw/isa/apm.c b/hw/isa/apm.c index e232b0da03..c3101ef52f 100644 --- a/hw/isa/apm.c +++ b/hw/isa/apm.c @@ -34,7 +34,6 @@ #endif /* fixed I/O location */ -#define APM_CNT_IOPORT 0xb2 #define APM_STS_IOPORT 0xb3 static void apm_ioport_writeb(void *opaque, hwaddr addr, uint64_t val, diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index 6e74b61cb6..51350d9c2d 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "hw/mem/pc-dimm.h" +#include "hw/mem/nvdimm.h" #include "qapi/error.h" #include "qemu/config-file.h" #include "qapi/visitor.h" @@ -162,45 +163,6 @@ uint64_t get_plugged_memory_size(void) return pc_existing_dimms_capacity(&error_abort); } -int qmp_pc_dimm_device_list(Object *obj, void *opaque) -{ - MemoryDeviceInfoList ***prev = opaque; - - if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { - DeviceState *dev = DEVICE(obj); - - if (dev->realized) { - MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1); - MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1); - PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1); - DeviceClass *dc = DEVICE_GET_CLASS(obj); - PCDIMMDevice *dimm = PC_DIMM(obj); - - if (dev->id) { - di->has_id = true; - di->id = g_strdup(dev->id); - } - di->hotplugged = dev->hotplugged; - di->hotpluggable = dc->hotpluggable; - di->addr = dimm->addr; - di->slot = dimm->slot; - di->node = dimm->node; - di->size = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP, - NULL); - di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem)); - - info->u.dimm.data = di; - elem->value = info; - elem->next = NULL; - **prev = elem; - *prev = &elem->next; - } - } - - object_child_foreach(obj, qmp_pc_dimm_device_list, opaque); - return 0; -} - static int pc_dimm_slot2bitmap(Object *obj, void *opaque) { unsigned long *bitmap = opaque; @@ -276,6 +238,57 @@ static int pc_dimm_built_list(Object *obj, void *opaque) return 0; } +MemoryDeviceInfoList *qmp_pc_dimm_device_list(void) +{ + GSList *dimms = NULL, *item; + MemoryDeviceInfoList *list = NULL, *prev = NULL; + + object_child_foreach(qdev_get_machine(), pc_dimm_built_list, &dimms); + + for (item = dimms; item; item = g_slist_next(item)) { + PCDIMMDevice *dimm = PC_DIMM(item->data); + Object *obj = OBJECT(dimm); + MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1); + MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1); + PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1); + bool is_nvdimm = object_dynamic_cast(obj, TYPE_NVDIMM); + DeviceClass *dc = DEVICE_GET_CLASS(obj); + DeviceState *dev = DEVICE(obj); + + if (dev->id) { + di->has_id = true; + di->id = g_strdup(dev->id); + } + di->hotplugged = dev->hotplugged; + di->hotpluggable = dc->hotpluggable; + di->addr = dimm->addr; + di->slot = dimm->slot; + di->node = dimm->node; + di->size = object_property_get_uint(obj, PC_DIMM_SIZE_PROP, NULL); + di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem)); + + if (!is_nvdimm) { + info->u.dimm.data = di; + info->type = MEMORY_DEVICE_INFO_KIND_DIMM; + } else { + info->u.nvdimm.data = di; + info->type = MEMORY_DEVICE_INFO_KIND_NVDIMM; + } + elem->value = info; + elem->next = NULL; + if (prev) { + prev->next = elem; + } else { + list = elem; + } + prev = elem; + } + + g_slist_free(dimms); + + return list; +} + uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, uint64_t address_space_size, uint64_t *hint, uint64_t align, uint64_t size, diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 188744e17d..67ad38cfe4 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -26,6 +26,7 @@ #include "qapi/qapi-events-net.h" #include "hw/virtio/virtio-access.h" #include "migration/misc.h" +#include "standard-headers/linux/ethtool.h" #define VIRTIO_NET_VM_VERSION 11 @@ -48,19 +49,21 @@ (offsetof(container, field) + sizeof(((container *)0)->field)) typedef struct VirtIOFeature { - uint32_t flags; + uint64_t flags; size_t end; } VirtIOFeature; static VirtIOFeature feature_sizes[] = { - {.flags = 1 << VIRTIO_NET_F_MAC, + {.flags = 1ULL << VIRTIO_NET_F_MAC, .end = endof(struct virtio_net_config, mac)}, - {.flags = 1 << VIRTIO_NET_F_STATUS, + {.flags = 1ULL << VIRTIO_NET_F_STATUS, .end = endof(struct virtio_net_config, status)}, - {.flags = 1 << VIRTIO_NET_F_MQ, + {.flags = 1ULL << VIRTIO_NET_F_MQ, .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, - {.flags = 1 << VIRTIO_NET_F_MTU, + {.flags = 1ULL << VIRTIO_NET_F_MTU, .end = endof(struct virtio_net_config, mtu)}, + {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, + .end = endof(struct virtio_net_config, duplex)}, {} }; @@ -89,6 +92,8 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues); virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); memcpy(netcfg.mac, n->mac, ETH_ALEN); + virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); + netcfg.duplex = n->net_conf.duplex; memcpy(config, &netcfg, n->config_size); } @@ -1938,7 +1943,26 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) int i; if (n->net_conf.mtu) { - n->host_features |= (0x1 << VIRTIO_NET_F_MTU); + n->host_features |= (1ULL << VIRTIO_NET_F_MTU); + } + + if (n->net_conf.duplex_str) { + if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { + n->net_conf.duplex = DUPLEX_HALF; + } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { + n->net_conf.duplex = DUPLEX_FULL; + } else { + error_setg(errp, "'duplex' must be 'half' or 'full'"); + } + n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); + } else { + n->net_conf.duplex = DUPLEX_UNKNOWN; + } + + if (n->net_conf.speed < SPEED_UNKNOWN) { + error_setg(errp, "'speed' must be between 0 and INT_MAX"); + } else if (n->net_conf.speed >= 0) { + n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); } virtio_net_set_config_size(n, n->host_features); @@ -2109,45 +2133,46 @@ static const VMStateDescription vmstate_virtio_net = { }; static Property virtio_net_properties[] = { - DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true), - DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features, + DEFINE_PROP_BIT64("csum", VirtIONet, host_features, + VIRTIO_NET_F_CSUM, true), + DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, VIRTIO_NET_F_GUEST_CSUM, true), - DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), - DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features, + DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), + DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, VIRTIO_NET_F_GUEST_TSO4, true), - DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features, + DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, VIRTIO_NET_F_GUEST_TSO6, true), - DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features, + DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, VIRTIO_NET_F_GUEST_ECN, true), - DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features, + DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, VIRTIO_NET_F_GUEST_UFO, true), - DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features, + DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, VIRTIO_NET_F_GUEST_ANNOUNCE, true), - DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features, + DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, VIRTIO_NET_F_HOST_TSO4, true), - DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features, + DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, VIRTIO_NET_F_HOST_TSO6, true), - DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features, + DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, VIRTIO_NET_F_HOST_ECN, true), - DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features, + DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, VIRTIO_NET_F_HOST_UFO, true), - DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features, + DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, VIRTIO_NET_F_MRG_RXBUF, true), - DEFINE_PROP_BIT("status", VirtIONet, host_features, + DEFINE_PROP_BIT64("status", VirtIONet, host_features, VIRTIO_NET_F_STATUS, true), - DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features, + DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, VIRTIO_NET_F_CTRL_VQ, true), - DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features, + DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, VIRTIO_NET_F_CTRL_RX, true), - DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features, + DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, VIRTIO_NET_F_CTRL_VLAN, true), - DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features, + DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, VIRTIO_NET_F_CTRL_RX_EXTRA, true), - DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features, + DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, VIRTIO_NET_F_CTRL_MAC_ADDR, true), - DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features, + DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), - DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), + DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, TX_TIMER_INTERVAL), @@ -2160,6 +2185,8 @@ static Property virtio_net_properties[] = { DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, true), + DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), + DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 67a3f72bd6..80bc45930d 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2048,18 +2048,6 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) } } -static void pci_default_realize(PCIDevice *dev, Error **errp) -{ - PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); - - if (pc->init) { - if (pc->init(dev) < 0) { - error_setg(errp, "Device initialization failed"); - return; - } - } -} - PCIDevice *pci_create_multifunction(PCIBus *bus, int devfn, bool multifunction, const char *name) { @@ -2532,13 +2520,11 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev) static void pci_device_class_init(ObjectClass *klass, void *data) { DeviceClass *k = DEVICE_CLASS(klass); - PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass); k->realize = pci_qdev_realize; k->unrealize = pci_qdev_unrealize; k->bus_type = TYPE_PCI_BUS; k->props = pci_props; - pc->realize = pci_default_realize; } static void pci_device_class_base_init(ObjectClass *klass, void *data) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index fba76abee2..2c0be8c898 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -722,8 +722,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) } if (hotplug_lmb_start) { - MemoryDeviceInfoList **prev = &dimms; - qmp_pc_dimm_device_list(qdev_get_machine(), &prev); + dimms = qmp_pc_dimm_device_list(); } /* ibm,dynamic-memory */ diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 742ff0f90b..1422ff03ab 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -3,9 +3,23 @@ # hw/virtio/vhost.c vhost_commit(bool started, bool changed) "Started: %d Changed: %d" vhost_region_add_section(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 -vhost_region_add_section_abut(const char *name, uint64_t new_size) "%s: 0x%"PRIx64 +vhost_region_add_section_merge(const char *name, uint64_t new_size, uint64_t gpa, uint64_t owr) "%s: size: 0x%"PRIx64 " gpa: 0x%"PRIx64 " owr: 0x%"PRIx64 +vhost_region_add_section_aligned(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 vhost_section(const char *name, int r) "%s:%d" +# hw/virtio/vhost-user.c +vhost_user_postcopy_end_entry(void) "" +vhost_user_postcopy_end_exit(void) "" +vhost_user_postcopy_fault_handler(const char *name, uint64_t fault_address, int nregions) "%s: @0x%"PRIx64" nregions:%d" +vhost_user_postcopy_fault_handler_loop(int i, uint64_t client_base, uint64_t size) "%d: client 0x%"PRIx64" +0x%"PRIx64 +vhost_user_postcopy_fault_handler_found(int i, uint64_t region_offset, uint64_t rb_offset) "%d: region_offset: 0x%"PRIx64" rb_offset:0x%"PRIx64 +vhost_user_postcopy_listen(void) "" +vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d region %d" +vhost_user_set_mem_table_withfd(int index, const char *name, uint64_t memory_size, uint64_t guest_phys_addr, uint64_t userspace_addr, uint64_t offset) "%d:%s: size:0x%"PRIx64" GPA:0x%"PRIx64" QVA/userspace:0x%"PRIx64" RB offset:0x%"PRIx64 +vhost_user_postcopy_waker(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64 +vhost_user_postcopy_waker_found(uint64_t client_addr) "0x%"PRIx64 +vhost_user_postcopy_waker_nomatch(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64 + # hw/virtio/virtio.c virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u" diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 41ff5cff41..44aea5c0a8 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -18,11 +18,15 @@ #include "qemu/error-report.h" #include "qemu/sockets.h" #include "sysemu/cryptodev.h" +#include "migration/migration.h" +#include "migration/postcopy-ram.h" +#include "trace.h" #include <sys/ioctl.h> #include <sys/socket.h> #include <sys/un.h> #include <linux/vhost.h> +#include <linux/userfaultfd.h> #define VHOST_MEMORY_MAX_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 @@ -41,7 +45,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, - + VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, VHOST_USER_PROTOCOL_F_MAX }; @@ -76,6 +80,9 @@ typedef enum VhostUserRequest { VHOST_USER_SET_CONFIG = 25, VHOST_USER_CREATE_CRYPTO_SESSION = 26, VHOST_USER_CLOSE_CRYPTO_SESSION = 27, + VHOST_USER_POSTCOPY_ADVISE = 28, + VHOST_USER_POSTCOPY_LISTEN = 29, + VHOST_USER_POSTCOPY_END = 30, VHOST_USER_MAX } VhostUserRequest; @@ -164,8 +171,23 @@ static VhostUserMsg m __attribute__ ((unused)); #define VHOST_USER_VERSION (0x1) struct vhost_user { + struct vhost_dev *dev; CharBackend *chr; int slave_fd; + NotifierWithReturn postcopy_notifier; + struct PostCopyFD postcopy_fd; + uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; + /* Length of the region_rb and region_rb_offset arrays */ + size_t region_rb_len; + /* RAMBlock associated with a given region */ + RAMBlock **region_rb; + /* The offset from the start of the RAMBlock to the start of the + * vhost region. + */ + ram_addr_t *region_rb_offset; + + /* True once we've entered postcopy_listen */ + bool postcopy_listen; }; static bool ioeventfd_enabled(void) @@ -330,14 +352,167 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, return 0; } +static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, + struct vhost_memory *mem) +{ + struct vhost_user *u = dev->opaque; + int fds[VHOST_MEMORY_MAX_NREGIONS]; + int i, fd; + size_t fd_num = 0; + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + VhostUserMsg msg_reply; + int region_i, msg_i; + + VhostUserMsg msg = { + .hdr.request = VHOST_USER_SET_MEM_TABLE, + .hdr.flags = VHOST_USER_VERSION, + }; + + if (reply_supported) { + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; + } + + if (u->region_rb_len < dev->mem->nregions) { + u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); + u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, + dev->mem->nregions); + memset(&(u->region_rb[u->region_rb_len]), '\0', + sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); + memset(&(u->region_rb_offset[u->region_rb_len]), '\0', + sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); + u->region_rb_len = dev->mem->nregions; + } + + for (i = 0; i < dev->mem->nregions; ++i) { + struct vhost_memory_region *reg = dev->mem->regions + i; + ram_addr_t offset; + MemoryRegion *mr; + + assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); + mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, + &offset); + fd = memory_region_get_fd(mr); + if (fd > 0) { + trace_vhost_user_set_mem_table_withfd(fd_num, mr->name, + reg->memory_size, + reg->guest_phys_addr, + reg->userspace_addr, offset); + u->region_rb_offset[i] = offset; + u->region_rb[i] = mr->ram_block; + msg.payload.memory.regions[fd_num].userspace_addr = + reg->userspace_addr; + msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; + msg.payload.memory.regions[fd_num].guest_phys_addr = + reg->guest_phys_addr; + msg.payload.memory.regions[fd_num].mmap_offset = offset; + assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); + fds[fd_num++] = fd; + } else { + u->region_rb_offset[i] = 0; + u->region_rb[i] = NULL; + } + } + + msg.payload.memory.nregions = fd_num; + + if (!fd_num) { + error_report("Failed initializing vhost-user memory map, " + "consider using -object memory-backend-file share=on"); + return -1; + } + + msg.hdr.size = sizeof(msg.payload.memory.nregions); + msg.hdr.size += sizeof(msg.payload.memory.padding); + msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); + + if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return -1; + } + + if (vhost_user_read(dev, &msg_reply) < 0) { + return -1; + } + + if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { + error_report("%s: Received unexpected msg type." + "Expected %d received %d", __func__, + VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); + return -1; + } + /* We're using the same structure, just reusing one of the + * fields, so it should be the same size. + */ + if (msg_reply.hdr.size != msg.hdr.size) { + error_report("%s: Unexpected size for postcopy reply " + "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size); + return -1; + } + + memset(u->postcopy_client_bases, 0, + sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS); + + /* They're in the same order as the regions that were sent + * but some of the regions were skipped (above) if they + * didn't have fd's + */ + for (msg_i = 0, region_i = 0; + region_i < dev->mem->nregions; + region_i++) { + if (msg_i < fd_num && + msg_reply.payload.memory.regions[msg_i].guest_phys_addr == + dev->mem->regions[region_i].guest_phys_addr) { + u->postcopy_client_bases[region_i] = + msg_reply.payload.memory.regions[msg_i].userspace_addr; + trace_vhost_user_set_mem_table_postcopy( + msg_reply.payload.memory.regions[msg_i].userspace_addr, + msg.payload.memory.regions[msg_i].userspace_addr, + msg_i, region_i); + msg_i++; + } + } + if (msg_i != fd_num) { + error_report("%s: postcopy reply not fully consumed " + "%d vs %zd", + __func__, msg_i, fd_num); + return -1; + } + /* Now we've registered this with the postcopy code, we ack to the client, + * because now we're in the position to be able to deal with any faults + * it generates. + */ + /* TODO: Use this for failure cases as well with a bad value */ + msg.hdr.size = sizeof(msg.payload.u64); + msg.payload.u64 = 0; /* OK */ + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } + + if (reply_supported) { + return process_message_reply(dev, &msg); + } + + return 0; +} + static int vhost_user_set_mem_table(struct vhost_dev *dev, struct vhost_memory *mem) { + struct vhost_user *u = dev->opaque; int fds[VHOST_MEMORY_MAX_NREGIONS]; int i, fd; size_t fd_num = 0; + bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; bool reply_supported = virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_REPLY_ACK); + VHOST_USER_PROTOCOL_F_REPLY_ACK) && + !do_postcopy; + + if (do_postcopy) { + /* Postcopy has enough differences that it's best done in it's own + * version + */ + return vhost_user_set_mem_table_postcopy(dev, mem); + } VhostUserMsg msg = { .hdr.request = VHOST_USER_SET_MEM_TABLE, @@ -362,9 +537,11 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, error_report("Failed preparing vhost-user memory table msg"); return -1; } - msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr; + msg.payload.memory.regions[fd_num].userspace_addr = + reg->userspace_addr; msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; - msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; + msg.payload.memory.regions[fd_num].guest_phys_addr = + reg->guest_phys_addr; msg.payload.memory.regions[fd_num].mmap_offset = offset; fds[fd_num++] = fd; } @@ -791,6 +968,219 @@ out: return ret; } +/* + * Called back from the postcopy fault thread when a fault is received on our + * ufd. + * TODO: This is Linux specific + */ +static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, + void *ufd) +{ + struct vhost_dev *dev = pcfd->data; + struct vhost_user *u = dev->opaque; + struct uffd_msg *msg = ufd; + uint64_t faultaddr = msg->arg.pagefault.address; + RAMBlock *rb = NULL; + uint64_t rb_offset; + int i; + + trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, + dev->mem->nregions); + for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { + trace_vhost_user_postcopy_fault_handler_loop(i, + u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); + if (faultaddr >= u->postcopy_client_bases[i]) { + /* Ofset of the fault address in the vhost region */ + uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; + if (region_offset < dev->mem->regions[i].memory_size) { + rb_offset = region_offset + u->region_rb_offset[i]; + trace_vhost_user_postcopy_fault_handler_found(i, + region_offset, rb_offset); + rb = u->region_rb[i]; + return postcopy_request_shared_page(pcfd, rb, faultaddr, + rb_offset); + } + } + } + error_report("%s: Failed to find region for fault %" PRIx64, + __func__, faultaddr); + return -1; +} + +static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, + uint64_t offset) +{ + struct vhost_dev *dev = pcfd->data; + struct vhost_user *u = dev->opaque; + int i; + + trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); + + if (!u) { + return 0; + } + /* Translate the offset into an address in the clients address space */ + for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { + if (u->region_rb[i] == rb && + offset >= u->region_rb_offset[i] && + offset < (u->region_rb_offset[i] + + dev->mem->regions[i].memory_size)) { + uint64_t client_addr = (offset - u->region_rb_offset[i]) + + u->postcopy_client_bases[i]; + trace_vhost_user_postcopy_waker_found(client_addr); + return postcopy_wake_shared(pcfd, client_addr, rb); + } + } + + trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); + return 0; +} + +/* + * Called at the start of an inbound postcopy on reception of the + * 'advise' command. + */ +static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) +{ + struct vhost_user *u = dev->opaque; + CharBackend *chr = u->chr; + int ufd; + VhostUserMsg msg = { + .hdr.request = VHOST_USER_POSTCOPY_ADVISE, + .hdr.flags = VHOST_USER_VERSION, + }; + + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + error_setg(errp, "Failed to send postcopy_advise to vhost"); + return -1; + } + + if (vhost_user_read(dev, &msg) < 0) { + error_setg(errp, "Failed to get postcopy_advise reply from vhost"); + return -1; + } + + if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { + error_setg(errp, "Unexpected msg type. Expected %d received %d", + VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); + return -1; + } + + if (msg.hdr.size) { + error_setg(errp, "Received bad msg size."); + return -1; + } + ufd = qemu_chr_fe_get_msgfd(chr); + if (ufd < 0) { + error_setg(errp, "%s: Failed to get ufd", __func__); + return -1; + } + fcntl(ufd, F_SETFL, O_NONBLOCK); + + /* register ufd with userfault thread */ + u->postcopy_fd.fd = ufd; + u->postcopy_fd.data = dev; + u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; + u->postcopy_fd.waker = vhost_user_postcopy_waker; + u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ + postcopy_register_shared_ufd(&u->postcopy_fd); + return 0; +} + +/* + * Called at the switch to postcopy on reception of the 'listen' command. + */ +static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) +{ + struct vhost_user *u = dev->opaque; + int ret; + VhostUserMsg msg = { + .hdr.request = VHOST_USER_POSTCOPY_LISTEN, + .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, + }; + u->postcopy_listen = true; + trace_vhost_user_postcopy_listen(); + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + error_setg(errp, "Failed to send postcopy_listen to vhost"); + return -1; + } + + ret = process_message_reply(dev, &msg); + if (ret) { + error_setg(errp, "Failed to receive reply to postcopy_listen"); + return ret; + } + + return 0; +} + +/* + * Called at the end of postcopy + */ +static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) +{ + VhostUserMsg msg = { + .hdr.request = VHOST_USER_POSTCOPY_END, + .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, + }; + int ret; + struct vhost_user *u = dev->opaque; + + trace_vhost_user_postcopy_end_entry(); + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + error_setg(errp, "Failed to send postcopy_end to vhost"); + return -1; + } + + ret = process_message_reply(dev, &msg); + if (ret) { + error_setg(errp, "Failed to receive reply to postcopy_end"); + return ret; + } + postcopy_unregister_shared_ufd(&u->postcopy_fd); + u->postcopy_fd.handler = NULL; + + trace_vhost_user_postcopy_end_exit(); + + return 0; +} + +static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, + void *opaque) +{ + struct PostcopyNotifyData *pnd = opaque; + struct vhost_user *u = container_of(notifier, struct vhost_user, + postcopy_notifier); + struct vhost_dev *dev = u->dev; + + switch (pnd->reason) { + case POSTCOPY_NOTIFY_PROBE: + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_PAGEFAULT)) { + /* TODO: Get the device name into this error somehow */ + error_setg(pnd->errp, + "vhost-user backend not capable of postcopy"); + return -ENOENT; + } + break; + + case POSTCOPY_NOTIFY_INBOUND_ADVISE: + return vhost_user_postcopy_advise(dev, pnd->errp); + + case POSTCOPY_NOTIFY_INBOUND_LISTEN: + return vhost_user_postcopy_listen(dev, pnd->errp); + + case POSTCOPY_NOTIFY_INBOUND_END: + return vhost_user_postcopy_end(dev, pnd->errp); + + default: + /* We ignore notifications we don't know */ + break; + } + + return 0; +} + static int vhost_user_init(struct vhost_dev *dev, void *opaque) { uint64_t features, protocol_features; @@ -802,6 +1192,7 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque) u = g_new0(struct vhost_user, 1); u->chr = opaque; u->slave_fd = -1; + u->dev = dev; dev->opaque = u; err = vhost_user_get_features(dev, &features); @@ -858,6 +1249,9 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque) return err; } + u->postcopy_notifier.notify = vhost_user_postcopy_notifier; + postcopy_add_notifier(&u->postcopy_notifier); + return 0; } @@ -868,11 +1262,20 @@ static int vhost_user_cleanup(struct vhost_dev *dev) assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); u = dev->opaque; + if (u->postcopy_notifier.notify) { + postcopy_remove_notifier(&u->postcopy_notifier); + u->postcopy_notifier.notify = NULL; + } if (u->slave_fd >= 0) { qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); close(u->slave_fd); u->slave_fd = -1; } + g_free(u->region_rb); + u->region_rb = NULL; + g_free(u->region_rb_offset); + u->region_rb_offset = NULL; + u->region_rb_len = 0; g_free(u); dev->opaque = 0; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index d8d0ef92e1..250f886acb 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -522,10 +522,28 @@ static void vhost_region_add_section(struct vhost_dev *dev, uint64_t mrs_gpa = section->offset_within_address_space; uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + section->offset_within_region; + RAMBlock *mrs_rb = section->mr->ram_block; + size_t mrs_page = qemu_ram_pagesize(mrs_rb); trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, mrs_host); + /* Round the section to it's page size */ + /* First align the start down to a page boundary */ + uint64_t alignage = mrs_host & (mrs_page - 1); + if (alignage) { + mrs_host -= alignage; + mrs_size += alignage; + mrs_gpa -= alignage; + } + /* Now align the size up to a page boundary */ + alignage = mrs_size & (mrs_page - 1); + if (alignage) { + mrs_size += mrs_page - alignage; + } + trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, + mrs_host); + if (dev->n_tmp_sections) { /* Since we already have at least one section, lets see if * this extends it; since we're scanning in order, we only @@ -542,18 +560,46 @@ static void vhost_region_add_section(struct vhost_dev *dev, prev_sec->offset_within_region; uint64_t prev_host_end = range_get_last(prev_host_start, prev_size); - if (prev_gpa_end + 1 == mrs_gpa && - prev_host_end + 1 == mrs_host && - section->mr == prev_sec->mr && - (!dev->vhost_ops->vhost_backend_can_merge || - dev->vhost_ops->vhost_backend_can_merge(dev, + if (mrs_gpa <= (prev_gpa_end + 1)) { + /* OK, looks like overlapping/intersecting - it's possible that + * the rounding to page sizes has made them overlap, but they should + * match up in the same RAMBlock if they do. + */ + if (mrs_gpa < prev_gpa_start) { + error_report("%s:Section rounded to %"PRIx64 + " prior to previous %"PRIx64, + __func__, mrs_gpa, prev_gpa_start); + /* A way to cleanly fail here would be better */ + return; + } + /* Offset from the start of the previous GPA to this GPA */ + size_t offset = mrs_gpa - prev_gpa_start; + + if (prev_host_start + offset == mrs_host && + section->mr == prev_sec->mr && + (!dev->vhost_ops->vhost_backend_can_merge || + dev->vhost_ops->vhost_backend_can_merge(dev, mrs_host, mrs_size, prev_host_start, prev_size))) { - /* The two sections abut */ - need_add = false; - prev_sec->size = int128_add(prev_sec->size, section->size); - trace_vhost_region_add_section_abut(section->mr->name, - mrs_size + prev_size); + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size); + need_add = false; + prev_sec->offset_within_address_space = + MIN(prev_gpa_start, mrs_gpa); + prev_sec->offset_within_region = + MIN(prev_host_start, mrs_host) - + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr); + prev_sec->size = int128_make64(max_end - MIN(prev_host_start, + mrs_host)); + trace_vhost_region_add_section_merge(section->mr->name, + int128_get64(prev_sec->size), + prev_sec->offset_within_address_space, + prev_sec->offset_within_region); + } else { + error_report("%s: Overlapping but not coherent sections " + "at %"PRIx64, + __func__, mrs_gpa); + return; + } } } |