summaryrefslogtreecommitdiffstats
path: root/hw
diff options
context:
space:
mode:
authorPeter Maydell2018-03-20 16:48:34 +0100
committerPeter Maydell2018-03-20 16:48:34 +0100
commited627b2ad37469eeba9e9ed5fecfe315df9ecc60 (patch)
treeaca1c6bddbaa61ffe2d029b123539fe20e6ecddc /hw
parentMerge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.12-pull-request'... (diff)
parentpostcopy shared docs (diff)
downloadqemu-ed627b2ad37469eeba9e9ed5fecfe315df9ecc60.tar.gz
qemu-ed627b2ad37469eeba9e9ed5fecfe315df9ecc60.tar.xz
qemu-ed627b2ad37469eeba9e9ed5fecfe315df9ecc60.zip
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio,vhost,pci,pc: features, cleanups SRAT tables for DIMM devices new virtio net flags for speed/duplex post-copy migration support in vhost cleanups in pci Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Tue 20 Mar 2018 14:40:43 GMT # gpg: using RSA key 281F0DB8D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: (51 commits) postcopy shared docs libvhost-user: Claim support for postcopy postcopy: Allow shared memory vhost: Huge page align and merge vhost+postcopy: Wire up POSTCOPY_END notify vhost-user: Add VHOST_USER_POSTCOPY_END message libvhost-user: mprotect & madvises for postcopy vhost+postcopy: Call wakeups vhost+postcopy: Add vhost waker postcopy: postcopy_notify_shared_wake postcopy: helper for waking shared vhost+postcopy: Resolve client address postcopy-ram: add a stub for postcopy_request_shared_page vhost+postcopy: Helper to send requests to source for shared pages vhost+postcopy: Stash RAMBlock and offset vhost+postcopy: Send address back to qemu libvhost-user+postcopy: Register new regions with the ufd migration/ram: ramblock_recv_bitmap_test_byte_offset postcopy+vhost-user: Split set_mem_table for postcopy vhost+postcopy: Transmit 'listen' to slave ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org> # Conflicts: # scripts/update-linux-headers.sh
Diffstat (limited to 'hw')
-rw-r--r--hw/acpi/aml-build.c140
-rw-r--r--hw/arm/virt-acpi-build.c39
-rw-r--r--hw/i386/acpi-build.c252
-rw-r--r--hw/isa/apm.c1
-rw-r--r--hw/mem/pc-dimm.c91
-rw-r--r--hw/net/virtio-net.c81
-rw-r--r--hw/pci/pci.c14
-rw-r--r--hw/ppc/spapr.c3
-rw-r--r--hw/virtio/trace-events16
-rw-r--r--hw/virtio/vhost-user.c411
-rw-r--r--hw/virtio/vhost.c66
11 files changed, 855 insertions, 259 deletions
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 36a6cc450e..3fa557cea1 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -258,6 +258,22 @@ static void build_append_int(GArray *table, uint64_t value)
}
}
+/* Generic Address Structure (GAS)
+ * ACPI 2.0/3.0: 5.2.3.1 Generic Address Structure
+ * 2.0 compat note:
+ * @access_width must be 0, see ACPI 2.0:Table 5-1
+ */
+void build_append_gas(GArray *table, AmlAddressSpace as,
+ uint8_t bit_width, uint8_t bit_offset,
+ uint8_t access_width, uint64_t address)
+{
+ build_append_int_noprefix(table, as, 1);
+ build_append_int_noprefix(table, bit_width, 1);
+ build_append_int_noprefix(table, bit_offset, 1);
+ build_append_int_noprefix(table, access_width, 1);
+ build_append_int_noprefix(table, address, 8);
+}
+
/*
* Build NAME(XXXX, 0x00000000) where 0x00000000 is encoded as a dword,
* and return the offset to 0x00000000 for runtime patching.
@@ -1662,3 +1678,127 @@ void build_slit(GArray *table_data, BIOSLinker *linker)
"SLIT",
table_data->len - slit_start, 1, NULL, NULL);
}
+
+/* build rev1/rev3/rev5.1 FADT */
+void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f,
+ const char *oem_id, const char *oem_table_id)
+{
+ int off;
+ int fadt_start = tbl->len;
+
+ acpi_data_push(tbl, sizeof(AcpiTableHeader));
+
+ /* FACS address to be filled by Guest linker at runtime */
+ off = tbl->len;
+ build_append_int_noprefix(tbl, 0, 4); /* FIRMWARE_CTRL */
+ if (f->facs_tbl_offset) { /* don't patch if not supported by platform */
+ bios_linker_loader_add_pointer(linker,
+ ACPI_BUILD_TABLE_FILE, off, 4,
+ ACPI_BUILD_TABLE_FILE, *f->facs_tbl_offset);
+ }
+
+ /* DSDT address to be filled by Guest linker at runtime */
+ off = tbl->len;
+ build_append_int_noprefix(tbl, 0, 4); /* DSDT */
+ if (f->dsdt_tbl_offset) { /* don't patch if not supported by platform */
+ bios_linker_loader_add_pointer(linker,
+ ACPI_BUILD_TABLE_FILE, off, 4,
+ ACPI_BUILD_TABLE_FILE, *f->dsdt_tbl_offset);
+ }
+
+ /* ACPI1.0: INT_MODEL, ACPI2.0+: Reserved */
+ build_append_int_noprefix(tbl, f->int_model /* Multiple APIC */, 1);
+ /* Preferred_PM_Profile */
+ build_append_int_noprefix(tbl, 0 /* Unspecified */, 1);
+ build_append_int_noprefix(tbl, f->sci_int, 2); /* SCI_INT */
+ build_append_int_noprefix(tbl, f->smi_cmd, 4); /* SMI_CMD */
+ build_append_int_noprefix(tbl, f->acpi_enable_cmd, 1); /* ACPI_ENABLE */
+ build_append_int_noprefix(tbl, f->acpi_disable_cmd, 1); /* ACPI_DISABLE */
+ build_append_int_noprefix(tbl, 0 /* not supported */, 1); /* S4BIOS_REQ */
+ /* ACPI1.0: Reserved, ACPI2.0+: PSTATE_CNT */
+ build_append_int_noprefix(tbl, 0, 1);
+ build_append_int_noprefix(tbl, f->pm1a_evt.address, 4); /* PM1a_EVT_BLK */
+ build_append_int_noprefix(tbl, 0, 4); /* PM1b_EVT_BLK */
+ build_append_int_noprefix(tbl, f->pm1a_cnt.address, 4); /* PM1a_CNT_BLK */
+ build_append_int_noprefix(tbl, 0, 4); /* PM1b_CNT_BLK */
+ build_append_int_noprefix(tbl, 0, 4); /* PM2_CNT_BLK */
+ build_append_int_noprefix(tbl, f->pm_tmr.address, 4); /* PM_TMR_BLK */
+ build_append_int_noprefix(tbl, f->gpe0_blk.address, 4); /* GPE0_BLK */
+ build_append_int_noprefix(tbl, 0, 4); /* GPE1_BLK */
+ /* PM1_EVT_LEN */
+ build_append_int_noprefix(tbl, f->pm1a_evt.bit_width / 8, 1);
+ /* PM1_CNT_LEN */
+ build_append_int_noprefix(tbl, f->pm1a_cnt.bit_width / 8, 1);
+ build_append_int_noprefix(tbl, 0, 1); /* PM2_CNT_LEN */
+ build_append_int_noprefix(tbl, f->pm_tmr.bit_width / 8, 1); /* PM_TMR_LEN */
+ /* GPE0_BLK_LEN */
+ build_append_int_noprefix(tbl, f->gpe0_blk.bit_width / 8, 1);
+ build_append_int_noprefix(tbl, 0, 1); /* GPE1_BLK_LEN */
+ build_append_int_noprefix(tbl, 0, 1); /* GPE1_BASE */
+ build_append_int_noprefix(tbl, 0, 1); /* CST_CNT */
+ build_append_int_noprefix(tbl, f->plvl2_lat, 2); /* P_LVL2_LAT */
+ build_append_int_noprefix(tbl, f->plvl3_lat, 2); /* P_LVL3_LAT */
+ build_append_int_noprefix(tbl, 0, 2); /* FLUSH_SIZE */
+ build_append_int_noprefix(tbl, 0, 2); /* FLUSH_STRIDE */
+ build_append_int_noprefix(tbl, 0, 1); /* DUTY_OFFSET */
+ build_append_int_noprefix(tbl, 0, 1); /* DUTY_WIDTH */
+ build_append_int_noprefix(tbl, 0, 1); /* DAY_ALRM */
+ build_append_int_noprefix(tbl, 0, 1); /* MON_ALRM */
+ build_append_int_noprefix(tbl, f->rtc_century, 1); /* CENTURY */
+ build_append_int_noprefix(tbl, 0, 2); /* IAPC_BOOT_ARCH */
+ build_append_int_noprefix(tbl, 0, 1); /* Reserved */
+ build_append_int_noprefix(tbl, f->flags, 4); /* Flags */
+
+ if (f->rev == 1) {
+ goto build_hdr;
+ }
+
+ build_append_gas_from_struct(tbl, &f->reset_reg); /* RESET_REG */
+ build_append_int_noprefix(tbl, f->reset_val, 1); /* RESET_VALUE */
+ /* Since ACPI 5.1 */
+ if ((f->rev >= 6) || ((f->rev == 5) && f->minor_ver > 0)) {
+ build_append_int_noprefix(tbl, f->arm_boot_arch, 2); /* ARM_BOOT_ARCH */
+ /* FADT Minor Version */
+ build_append_int_noprefix(tbl, f->minor_ver, 1);
+ } else {
+ build_append_int_noprefix(tbl, 0, 3); /* Reserved upto ACPI 5.0 */
+ }
+ build_append_int_noprefix(tbl, 0, 8); /* X_FIRMWARE_CTRL */
+
+ /* XDSDT address to be filled by Guest linker at runtime */
+ off = tbl->len;
+ build_append_int_noprefix(tbl, 0, 8); /* X_DSDT */
+ if (f->xdsdt_tbl_offset) {
+ bios_linker_loader_add_pointer(linker,
+ ACPI_BUILD_TABLE_FILE, off, 8,
+ ACPI_BUILD_TABLE_FILE, *f->xdsdt_tbl_offset);
+ }
+
+ build_append_gas_from_struct(tbl, &f->pm1a_evt); /* X_PM1a_EVT_BLK */
+ /* X_PM1b_EVT_BLK */
+ build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0);
+ build_append_gas_from_struct(tbl, &f->pm1a_cnt); /* X_PM1a_CNT_BLK */
+ /* X_PM1b_CNT_BLK */
+ build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0);
+ /* X_PM2_CNT_BLK */
+ build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0);
+ build_append_gas_from_struct(tbl, &f->pm_tmr); /* X_PM_TMR_BLK */
+ build_append_gas_from_struct(tbl, &f->gpe0_blk); /* X_GPE0_BLK */
+ build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); /* X_GPE1_BLK */
+
+ if (f->rev <= 4) {
+ goto build_hdr;
+ }
+
+ /* SLEEP_CONTROL_REG */
+ build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0);
+ /* SLEEP_STATUS_REG */
+ build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0);
+
+ /* TODO: extra fields need to be added to support revisions above rev5 */
+ assert(f->rev == 5);
+
+build_hdr:
+ build_header(linker, tbl, (void *)(tbl->data + fadt_start),
+ "FACP", tbl->len - fadt_start, f->rev, oem_id, oem_table_id);
+}
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index f7fa795278..c7c6a57ec5 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -651,42 +651,33 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
}
/* FADT */
-static void build_fadt(GArray *table_data, BIOSLinker *linker,
- VirtMachineState *vms, unsigned dsdt_tbl_offset)
+static void build_fadt_rev5(GArray *table_data, BIOSLinker *linker,
+ VirtMachineState *vms, unsigned dsdt_tbl_offset)
{
- int fadt_start = table_data->len;
- AcpiFadtDescriptorRev5_1 *fadt = acpi_data_push(table_data, sizeof(*fadt));
- unsigned xdsdt_entry_offset = (char *)&fadt->x_dsdt - table_data->data;
- uint16_t bootflags;
+ /* ACPI v5.1 */
+ AcpiFadtData fadt = {
+ .rev = 5,
+ .minor_ver = 1,
+ .flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI,
+ .xdsdt_tbl_offset = &dsdt_tbl_offset,
+ };
switch (vms->psci_conduit) {
case QEMU_PSCI_CONDUIT_DISABLED:
- bootflags = 0;
+ fadt.arm_boot_arch = 0;
break;
case QEMU_PSCI_CONDUIT_HVC:
- bootflags = ACPI_FADT_ARM_PSCI_COMPLIANT | ACPI_FADT_ARM_PSCI_USE_HVC;
+ fadt.arm_boot_arch = ACPI_FADT_ARM_PSCI_COMPLIANT |
+ ACPI_FADT_ARM_PSCI_USE_HVC;
break;
case QEMU_PSCI_CONDUIT_SMC:
- bootflags = ACPI_FADT_ARM_PSCI_COMPLIANT;
+ fadt.arm_boot_arch = ACPI_FADT_ARM_PSCI_COMPLIANT;
break;
default:
g_assert_not_reached();
}
- /* Hardware Reduced = 1 and use PSCI 0.2+ */
- fadt->flags = cpu_to_le32(1 << ACPI_FADT_F_HW_REDUCED_ACPI);
- fadt->arm_boot_flags = cpu_to_le16(bootflags);
-
- /* ACPI v5.1 (fadt->revision.fadt->minor_revision) */
- fadt->minor_revision = 0x1;
-
- /* DSDT address to be filled by Guest linker */
- bios_linker_loader_add_pointer(linker,
- ACPI_BUILD_TABLE_FILE, xdsdt_entry_offset, sizeof(fadt->x_dsdt),
- ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset);
-
- build_header(linker, table_data, (void *)(table_data->data + fadt_start),
- "FACP", table_data->len - fadt_start, 5, NULL, NULL);
+ build_fadt(table_data, linker, &fadt, NULL, NULL);
}
/* DSDT */
@@ -761,7 +752,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
/* FADT MADT GTDT MCFG SPCR pointed to by RSDT */
acpi_add_table(table_offsets, tables_blob);
- build_fadt(tables_blob, tables->linker, vms, dsdt);
+ build_fadt_rev5(tables_blob, tables->linker, vms, dsdt);
acpi_add_table(table_offsets, tables_blob);
build_madt(tables_blob, tables->linker, vms);
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index a66fb2dcd2..3cf2a1679c 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -91,17 +91,11 @@ typedef struct AcpiMcfgInfo {
} AcpiMcfgInfo;
typedef struct AcpiPmInfo {
- bool force_rev1_fadt;
bool s3_disabled;
bool s4_disabled;
bool pcihp_bridge_en;
uint8_t s4_val;
- uint16_t sci_int;
- uint8_t acpi_enable_cmd;
- uint8_t acpi_disable_cmd;
- uint32_t gpe0_blk;
- uint32_t gpe0_blk_len;
- uint32_t io_base;
+ AcpiFadtData fadt;
uint16_t cpu_hp_io_base;
uint16_t pcihp_io_base;
uint16_t pcihp_io_len;
@@ -124,21 +118,59 @@ typedef struct AcpiBuildPciBusHotplugState {
bool pcihp_bridge_en;
} AcpiBuildPciBusHotplugState;
+static void init_common_fadt_data(Object *o, AcpiFadtData *data)
+{
+ uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
+ AmlAddressSpace as = AML_AS_SYSTEM_IO;
+ AcpiFadtData fadt = {
+ .rev = 3,
+ .flags =
+ (1 << ACPI_FADT_F_WBINVD) |
+ (1 << ACPI_FADT_F_PROC_C1) |
+ (1 << ACPI_FADT_F_SLP_BUTTON) |
+ (1 << ACPI_FADT_F_RTC_S4) |
+ (1 << ACPI_FADT_F_USE_PLATFORM_CLOCK) |
+ /* APIC destination mode ("Flat Logical") has an upper limit of 8
+ * CPUs for more than 8 CPUs, "Clustered Logical" mode has to be
+ * used
+ */
+ ((max_cpus > 8) ? (1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL) : 0),
+ .int_model = 1 /* Multiple APIC */,
+ .rtc_century = RTC_CENTURY,
+ .plvl2_lat = 0xfff /* C2 state not supported */,
+ .plvl3_lat = 0xfff /* C3 state not supported */,
+ .smi_cmd = ACPI_PORT_SMI_CMD,
+ .sci_int = object_property_get_uint(o, ACPI_PM_PROP_SCI_INT, NULL),
+ .acpi_enable_cmd =
+ object_property_get_uint(o, ACPI_PM_PROP_ACPI_ENABLE_CMD, NULL),
+ .acpi_disable_cmd =
+ object_property_get_uint(o, ACPI_PM_PROP_ACPI_DISABLE_CMD, NULL),
+ .pm1a_evt = { .space_id = as, .bit_width = 4 * 8, .address = io },
+ .pm1a_cnt = { .space_id = as, .bit_width = 2 * 8,
+ .address = io + 0x04 },
+ .pm_tmr = { .space_id = as, .bit_width = 4 * 8, .address = io + 0x08 },
+ .gpe0_blk = { .space_id = as, .bit_width =
+ object_property_get_uint(o, ACPI_PM_PROP_GPE0_BLK_LEN, NULL) * 8,
+ .address = object_property_get_uint(o, ACPI_PM_PROP_GPE0_BLK, NULL)
+ },
+ };
+ *data = fadt;
+}
+
static void acpi_get_pm_info(AcpiPmInfo *pm)
{
Object *piix = piix4_pm_find();
Object *lpc = ich9_lpc_find();
- Object *obj = NULL;
+ Object *obj = piix ? piix : lpc;
QObject *o;
-
- pm->force_rev1_fadt = false;
pm->cpu_hp_io_base = 0;
pm->pcihp_io_base = 0;
pm->pcihp_io_len = 0;
+
+ init_common_fadt_data(obj, &pm->fadt);
if (piix) {
/* w2k requires FADT(rev1) or it won't boot, keep PC compatible */
- pm->force_rev1_fadt = true;
- obj = piix;
+ pm->fadt.rev = 1;
pm->cpu_hp_io_base = PIIX4_CPU_HOTPLUG_IO_BASE;
pm->pcihp_io_base =
object_property_get_uint(obj, ACPI_PCIHP_IO_BASE_PROP, NULL);
@@ -146,11 +178,19 @@ static void acpi_get_pm_info(AcpiPmInfo *pm)
object_property_get_uint(obj, ACPI_PCIHP_IO_LEN_PROP, NULL);
}
if (lpc) {
- obj = lpc;
+ struct AcpiGenericAddress r = { .space_id = AML_AS_SYSTEM_IO,
+ .bit_width = 8, .address = ICH9_RST_CNT_IOPORT };
+ pm->fadt.reset_reg = r;
+ pm->fadt.reset_val = 0xf;
+ pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP;
pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE;
}
assert(obj);
+ /* The above need not be conditional on machine type because the reset port
+ * happens to be the same on PIIX (pc) and ICH9 (q35). */
+ QEMU_BUILD_BUG_ON(ICH9_RST_CNT_IOPORT != RCR_IOPORT);
+
/* Fill in optional s3/s4 related properties */
o = object_property_get_qobject(obj, ACPI_PM_PROP_S3_DISABLED, NULL);
if (o) {
@@ -174,22 +214,6 @@ static void acpi_get_pm_info(AcpiPmInfo *pm)
}
qobject_decref(o);
- /* Fill in mandatory properties */
- pm->sci_int = object_property_get_uint(obj, ACPI_PM_PROP_SCI_INT, NULL);
-
- pm->acpi_enable_cmd = object_property_get_uint(obj,
- ACPI_PM_PROP_ACPI_ENABLE_CMD,
- NULL);
- pm->acpi_disable_cmd =
- object_property_get_uint(obj,
- ACPI_PM_PROP_ACPI_DISABLE_CMD,
- NULL);
- pm->io_base = object_property_get_uint(obj, ACPI_PM_PROP_PM_IO_BASE,
- NULL);
- pm->gpe0_blk = object_property_get_uint(obj, ACPI_PM_PROP_GPE0_BLK,
- NULL);
- pm->gpe0_blk_len = object_property_get_uint(obj, ACPI_PM_PROP_GPE0_BLK_LEN,
- NULL);
pm->pcihp_bridge_en =
object_property_get_bool(obj, "acpi-pci-hotplug-with-bridge-support",
NULL);
@@ -257,8 +281,6 @@ static void acpi_get_pci_holes(Range *hole, Range *hole64)
NULL));
}
-#define ACPI_PORT_SMI_CMD 0x00b2 /* TODO: this is APM_CNT_IOPORT */
-
static void acpi_align_size(GArray *blob, unsigned align)
{
/* Align size to multiple of given size. This reduces the chance
@@ -276,106 +298,6 @@ build_facs(GArray *table_data, BIOSLinker *linker)
facs->length = cpu_to_le32(sizeof(*facs));
}
-/* Load chipset information in FADT */
-static void fadt_setup(AcpiFadtDescriptorRev3 *fadt, AcpiPmInfo *pm)
-{
- fadt->model = 1;
- fadt->reserved1 = 0;
- fadt->sci_int = cpu_to_le16(pm->sci_int);
- fadt->smi_cmd = cpu_to_le32(ACPI_PORT_SMI_CMD);
- fadt->acpi_enable = pm->acpi_enable_cmd;
- fadt->acpi_disable = pm->acpi_disable_cmd;
- /* EVT, CNT, TMR offset matches hw/acpi/core.c */
- fadt->pm1a_evt_blk = cpu_to_le32(pm->io_base);
- fadt->pm1a_cnt_blk = cpu_to_le32(pm->io_base + 0x04);
- fadt->pm_tmr_blk = cpu_to_le32(pm->io_base + 0x08);
- fadt->gpe0_blk = cpu_to_le32(pm->gpe0_blk);
- /* EVT, CNT, TMR length matches hw/acpi/core.c */
- fadt->pm1_evt_len = 4;
- fadt->pm1_cnt_len = 2;
- fadt->pm_tmr_len = 4;
- fadt->gpe0_blk_len = pm->gpe0_blk_len;
- fadt->plvl2_lat = cpu_to_le16(0xfff); /* C2 state not supported */
- fadt->plvl3_lat = cpu_to_le16(0xfff); /* C3 state not supported */
- fadt->flags = cpu_to_le32((1 << ACPI_FADT_F_WBINVD) |
- (1 << ACPI_FADT_F_PROC_C1) |
- (1 << ACPI_FADT_F_SLP_BUTTON) |
- (1 << ACPI_FADT_F_RTC_S4));
- fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_USE_PLATFORM_CLOCK);
- /* APIC destination mode ("Flat Logical") has an upper limit of 8 CPUs
- * For more than 8 CPUs, "Clustered Logical" mode has to be used
- */
- if (max_cpus > 8) {
- fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL);
- }
- fadt->century = RTC_CENTURY;
- if (pm->force_rev1_fadt) {
- return;
- }
-
- fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_RESET_REG_SUP);
- fadt->reset_value = 0xf;
- fadt->reset_register.space_id = AML_SYSTEM_IO;
- fadt->reset_register.bit_width = 8;
- fadt->reset_register.address = cpu_to_le64(ICH9_RST_CNT_IOPORT);
- /* The above need not be conditional on machine type because the reset port
- * happens to be the same on PIIX (pc) and ICH9 (q35). */
- QEMU_BUILD_BUG_ON(ICH9_RST_CNT_IOPORT != RCR_IOPORT);
-
- fadt->xpm1a_event_block.space_id = AML_SYSTEM_IO;
- fadt->xpm1a_event_block.bit_width = fadt->pm1_evt_len * 8;
- fadt->xpm1a_event_block.address = cpu_to_le64(pm->io_base);
-
- fadt->xpm1a_control_block.space_id = AML_SYSTEM_IO;
- fadt->xpm1a_control_block.bit_width = fadt->pm1_cnt_len * 8;
- fadt->xpm1a_control_block.address = cpu_to_le64(pm->io_base + 0x4);
-
- fadt->xpm_timer_block.space_id = AML_SYSTEM_IO;
- fadt->xpm_timer_block.bit_width = fadt->pm_tmr_len * 8;
- fadt->xpm_timer_block.address = cpu_to_le64(pm->io_base + 0x8);
-
- fadt->xgpe0_block.space_id = AML_SYSTEM_IO;
- fadt->xgpe0_block.bit_width = pm->gpe0_blk_len * 8;
- fadt->xgpe0_block.address = cpu_to_le64(pm->gpe0_blk);
-}
-
-
-/* FADT */
-static void
-build_fadt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm,
- unsigned facs_tbl_offset, unsigned dsdt_tbl_offset,
- const char *oem_id, const char *oem_table_id)
-{
- AcpiFadtDescriptorRev3 *fadt = acpi_data_push(table_data, sizeof(*fadt));
- unsigned fw_ctrl_offset = (char *)&fadt->firmware_ctrl - table_data->data;
- unsigned dsdt_entry_offset = (char *)&fadt->dsdt - table_data->data;
- unsigned xdsdt_entry_offset = (char *)&fadt->x_dsdt - table_data->data;
- int fadt_size = sizeof(*fadt);
- int rev = 3;
-
- /* FACS address to be filled by Guest linker */
- bios_linker_loader_add_pointer(linker,
- ACPI_BUILD_TABLE_FILE, fw_ctrl_offset, sizeof(fadt->firmware_ctrl),
- ACPI_BUILD_TABLE_FILE, facs_tbl_offset);
-
- /* DSDT address to be filled by Guest linker */
- fadt_setup(fadt, pm);
- bios_linker_loader_add_pointer(linker,
- ACPI_BUILD_TABLE_FILE, dsdt_entry_offset, sizeof(fadt->dsdt),
- ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset);
- if (pm->force_rev1_fadt) {
- rev = 1;
- fadt_size = offsetof(typeof(*fadt), reset_register);
- } else {
- bios_linker_loader_add_pointer(linker,
- ACPI_BUILD_TABLE_FILE, xdsdt_entry_offset, sizeof(fadt->x_dsdt),
- ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset);
- }
-
- build_header(linker, table_data,
- (void *)fadt, "FACP", fadt_size, rev, oem_id, oem_table_id);
-}
-
void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid,
const CPUArchIdList *apic_ids, GArray *entry)
{
@@ -2053,7 +1975,12 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(dev, aml_name_decl("_STA", aml_int(0xB)));
crs = aml_resource_template();
aml_append(crs,
- aml_io(AML_DECODE16, pm->gpe0_blk, pm->gpe0_blk, 1, pm->gpe0_blk_len)
+ aml_io(
+ AML_DECODE16,
+ pm->fadt.gpe0_blk.address,
+ pm->fadt.gpe0_blk.address,
+ 1,
+ pm->fadt.gpe0_blk.bit_width / 8)
);
aml_append(dev, aml_name_decl("_CRS", crs));
aml_append(scope, dev);
@@ -2323,6 +2250,55 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog)
#define HOLE_640K_START (640 * 1024)
#define HOLE_640K_END (1024 * 1024)
+static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base,
+ uint64_t len, int default_node)
+{
+ MemoryDeviceInfoList *info_list = qmp_pc_dimm_device_list();
+ MemoryDeviceInfoList *info;
+ MemoryDeviceInfo *mi;
+ PCDIMMDeviceInfo *di;
+ uint64_t end = base + len, cur, size;
+ bool is_nvdimm;
+ AcpiSratMemoryAffinity *numamem;
+ MemoryAffinityFlags flags;
+
+ for (cur = base, info = info_list;
+ cur < end;
+ cur += size, info = info->next) {
+ numamem = acpi_data_push(table_data, sizeof *numamem);
+
+ if (!info) {
+ build_srat_memory(numamem, cur, end - cur, default_node,
+ MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
+ break;
+ }
+
+ mi = info->value;
+ is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
+ di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
+
+ if (cur < di->addr) {
+ build_srat_memory(numamem, cur, di->addr - cur, default_node,
+ MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
+ numamem = acpi_data_push(table_data, sizeof *numamem);
+ }
+
+ size = di->size;
+
+ flags = MEM_AFFINITY_ENABLED;
+ if (di->hotpluggable) {
+ flags |= MEM_AFFINITY_HOTPLUGGABLE;
+ }
+ if (is_nvdimm) {
+ flags |= MEM_AFFINITY_NON_VOLATILE;
+ }
+
+ build_srat_memory(numamem, di->addr, size, di->node, flags);
+ }
+
+ qapi_free_MemoryDeviceInfoList(info_list);
+}
+
static void
build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
{
@@ -2434,10 +2410,9 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
* providing _PXM method if necessary.
*/
if (hotplugabble_address_space_size) {
- numamem = acpi_data_push(table_data, sizeof *numamem);
- build_srat_memory(numamem, pcms->hotplug_memory.base,
- hotplugabble_address_space_size, pcms->numa_nodes - 1,
- MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
+ build_srat_hotpluggable_memory(table_data, pcms->hotplug_memory.base,
+ hotplugabble_address_space_size,
+ pcms->numa_nodes - 1);
}
build_header(linker, table_data,
@@ -2700,7 +2675,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
/* ACPI tables pointed to by RSDT */
fadt = tables_blob->len;
acpi_add_table(table_offsets, tables_blob);
- build_fadt(tables_blob, tables->linker, &pm, facs, dsdt,
+ pm.fadt.facs_tbl_offset = &facs;
+ pm.fadt.dsdt_tbl_offset = &dsdt;
+ pm.fadt.xdsdt_tbl_offset = &dsdt;
+ build_fadt(tables_blob, tables->linker, &pm.fadt,
slic_oem.id, slic_oem.table_id);
aml_len += tables_blob->len - fadt;
diff --git a/hw/isa/apm.c b/hw/isa/apm.c
index e232b0da03..c3101ef52f 100644
--- a/hw/isa/apm.c
+++ b/hw/isa/apm.c
@@ -34,7 +34,6 @@
#endif
/* fixed I/O location */
-#define APM_CNT_IOPORT 0xb2
#define APM_STS_IOPORT 0xb3
static void apm_ioport_writeb(void *opaque, hwaddr addr, uint64_t val,
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 6e74b61cb6..51350d9c2d 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -20,6 +20,7 @@
#include "qemu/osdep.h"
#include "hw/mem/pc-dimm.h"
+#include "hw/mem/nvdimm.h"
#include "qapi/error.h"
#include "qemu/config-file.h"
#include "qapi/visitor.h"
@@ -162,45 +163,6 @@ uint64_t get_plugged_memory_size(void)
return pc_existing_dimms_capacity(&error_abort);
}
-int qmp_pc_dimm_device_list(Object *obj, void *opaque)
-{
- MemoryDeviceInfoList ***prev = opaque;
-
- if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
- DeviceState *dev = DEVICE(obj);
-
- if (dev->realized) {
- MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1);
- MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1);
- PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1);
- DeviceClass *dc = DEVICE_GET_CLASS(obj);
- PCDIMMDevice *dimm = PC_DIMM(obj);
-
- if (dev->id) {
- di->has_id = true;
- di->id = g_strdup(dev->id);
- }
- di->hotplugged = dev->hotplugged;
- di->hotpluggable = dc->hotpluggable;
- di->addr = dimm->addr;
- di->slot = dimm->slot;
- di->node = dimm->node;
- di->size = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP,
- NULL);
- di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem));
-
- info->u.dimm.data = di;
- elem->value = info;
- elem->next = NULL;
- **prev = elem;
- *prev = &elem->next;
- }
- }
-
- object_child_foreach(obj, qmp_pc_dimm_device_list, opaque);
- return 0;
-}
-
static int pc_dimm_slot2bitmap(Object *obj, void *opaque)
{
unsigned long *bitmap = opaque;
@@ -276,6 +238,57 @@ static int pc_dimm_built_list(Object *obj, void *opaque)
return 0;
}
+MemoryDeviceInfoList *qmp_pc_dimm_device_list(void)
+{
+ GSList *dimms = NULL, *item;
+ MemoryDeviceInfoList *list = NULL, *prev = NULL;
+
+ object_child_foreach(qdev_get_machine(), pc_dimm_built_list, &dimms);
+
+ for (item = dimms; item; item = g_slist_next(item)) {
+ PCDIMMDevice *dimm = PC_DIMM(item->data);
+ Object *obj = OBJECT(dimm);
+ MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1);
+ MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1);
+ PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1);
+ bool is_nvdimm = object_dynamic_cast(obj, TYPE_NVDIMM);
+ DeviceClass *dc = DEVICE_GET_CLASS(obj);
+ DeviceState *dev = DEVICE(obj);
+
+ if (dev->id) {
+ di->has_id = true;
+ di->id = g_strdup(dev->id);
+ }
+ di->hotplugged = dev->hotplugged;
+ di->hotpluggable = dc->hotpluggable;
+ di->addr = dimm->addr;
+ di->slot = dimm->slot;
+ di->node = dimm->node;
+ di->size = object_property_get_uint(obj, PC_DIMM_SIZE_PROP, NULL);
+ di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem));
+
+ if (!is_nvdimm) {
+ info->u.dimm.data = di;
+ info->type = MEMORY_DEVICE_INFO_KIND_DIMM;
+ } else {
+ info->u.nvdimm.data = di;
+ info->type = MEMORY_DEVICE_INFO_KIND_NVDIMM;
+ }
+ elem->value = info;
+ elem->next = NULL;
+ if (prev) {
+ prev->next = elem;
+ } else {
+ list = elem;
+ }
+ prev = elem;
+ }
+
+ g_slist_free(dimms);
+
+ return list;
+}
+
uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
uint64_t address_space_size,
uint64_t *hint, uint64_t align, uint64_t size,
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 188744e17d..67ad38cfe4 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -26,6 +26,7 @@
#include "qapi/qapi-events-net.h"
#include "hw/virtio/virtio-access.h"
#include "migration/misc.h"
+#include "standard-headers/linux/ethtool.h"
#define VIRTIO_NET_VM_VERSION 11
@@ -48,19 +49,21 @@
(offsetof(container, field) + sizeof(((container *)0)->field))
typedef struct VirtIOFeature {
- uint32_t flags;
+ uint64_t flags;
size_t end;
} VirtIOFeature;
static VirtIOFeature feature_sizes[] = {
- {.flags = 1 << VIRTIO_NET_F_MAC,
+ {.flags = 1ULL << VIRTIO_NET_F_MAC,
.end = endof(struct virtio_net_config, mac)},
- {.flags = 1 << VIRTIO_NET_F_STATUS,
+ {.flags = 1ULL << VIRTIO_NET_F_STATUS,
.end = endof(struct virtio_net_config, status)},
- {.flags = 1 << VIRTIO_NET_F_MQ,
+ {.flags = 1ULL << VIRTIO_NET_F_MQ,
.end = endof(struct virtio_net_config, max_virtqueue_pairs)},
- {.flags = 1 << VIRTIO_NET_F_MTU,
+ {.flags = 1ULL << VIRTIO_NET_F_MTU,
.end = endof(struct virtio_net_config, mtu)},
+ {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
+ .end = endof(struct virtio_net_config, duplex)},
{}
};
@@ -89,6 +92,8 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
memcpy(netcfg.mac, n->mac, ETH_ALEN);
+ virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
+ netcfg.duplex = n->net_conf.duplex;
memcpy(config, &netcfg, n->config_size);
}
@@ -1938,7 +1943,26 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
int i;
if (n->net_conf.mtu) {
- n->host_features |= (0x1 << VIRTIO_NET_F_MTU);
+ n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
+ }
+
+ if (n->net_conf.duplex_str) {
+ if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
+ n->net_conf.duplex = DUPLEX_HALF;
+ } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
+ n->net_conf.duplex = DUPLEX_FULL;
+ } else {
+ error_setg(errp, "'duplex' must be 'half' or 'full'");
+ }
+ n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
+ } else {
+ n->net_conf.duplex = DUPLEX_UNKNOWN;
+ }
+
+ if (n->net_conf.speed < SPEED_UNKNOWN) {
+ error_setg(errp, "'speed' must be between 0 and INT_MAX");
+ } else if (n->net_conf.speed >= 0) {
+ n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
}
virtio_net_set_config_size(n, n->host_features);
@@ -2109,45 +2133,46 @@ static const VMStateDescription vmstate_virtio_net = {
};
static Property virtio_net_properties[] = {
- DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true),
- DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
+ VIRTIO_NET_F_CSUM, true),
+ DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_CSUM, true),
- DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
- DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
+ DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_TSO4, true),
- DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_TSO6, true),
- DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_ECN, true),
- DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_UFO, true),
- DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_ANNOUNCE, true),
- DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
VIRTIO_NET_F_HOST_TSO4, true),
- DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
VIRTIO_NET_F_HOST_TSO6, true),
- DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
VIRTIO_NET_F_HOST_ECN, true),
- DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
VIRTIO_NET_F_HOST_UFO, true),
- DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
VIRTIO_NET_F_MRG_RXBUF, true),
- DEFINE_PROP_BIT("status", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("status", VirtIONet, host_features,
VIRTIO_NET_F_STATUS, true),
- DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_VQ, true),
- DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_RX, true),
- DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_VLAN, true),
- DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_RX_EXTRA, true),
- DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_MAC_ADDR, true),
- DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
- DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
+ DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
TX_TIMER_INTERVAL),
@@ -2160,6 +2185,8 @@ static Property virtio_net_properties[] = {
DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
true),
+ DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
+ DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 67a3f72bd6..80bc45930d 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2048,18 +2048,6 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
}
}
-static void pci_default_realize(PCIDevice *dev, Error **errp)
-{
- PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
-
- if (pc->init) {
- if (pc->init(dev) < 0) {
- error_setg(errp, "Device initialization failed");
- return;
- }
- }
-}
-
PCIDevice *pci_create_multifunction(PCIBus *bus, int devfn, bool multifunction,
const char *name)
{
@@ -2532,13 +2520,11 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev)
static void pci_device_class_init(ObjectClass *klass, void *data)
{
DeviceClass *k = DEVICE_CLASS(klass);
- PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
k->realize = pci_qdev_realize;
k->unrealize = pci_qdev_unrealize;
k->bus_type = TYPE_PCI_BUS;
k->props = pci_props;
- pc->realize = pci_default_realize;
}
static void pci_device_class_base_init(ObjectClass *klass, void *data)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index fba76abee2..2c0be8c898 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -722,8 +722,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
}
if (hotplug_lmb_start) {
- MemoryDeviceInfoList **prev = &dimms;
- qmp_pc_dimm_device_list(qdev_get_machine(), &prev);
+ dimms = qmp_pc_dimm_device_list();
}
/* ibm,dynamic-memory */
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 742ff0f90b..1422ff03ab 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -3,9 +3,23 @@
# hw/virtio/vhost.c
vhost_commit(bool started, bool changed) "Started: %d Changed: %d"
vhost_region_add_section(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64
-vhost_region_add_section_abut(const char *name, uint64_t new_size) "%s: 0x%"PRIx64
+vhost_region_add_section_merge(const char *name, uint64_t new_size, uint64_t gpa, uint64_t owr) "%s: size: 0x%"PRIx64 " gpa: 0x%"PRIx64 " owr: 0x%"PRIx64
+vhost_region_add_section_aligned(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64
vhost_section(const char *name, int r) "%s:%d"
+# hw/virtio/vhost-user.c
+vhost_user_postcopy_end_entry(void) ""
+vhost_user_postcopy_end_exit(void) ""
+vhost_user_postcopy_fault_handler(const char *name, uint64_t fault_address, int nregions) "%s: @0x%"PRIx64" nregions:%d"
+vhost_user_postcopy_fault_handler_loop(int i, uint64_t client_base, uint64_t size) "%d: client 0x%"PRIx64" +0x%"PRIx64
+vhost_user_postcopy_fault_handler_found(int i, uint64_t region_offset, uint64_t rb_offset) "%d: region_offset: 0x%"PRIx64" rb_offset:0x%"PRIx64
+vhost_user_postcopy_listen(void) ""
+vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d region %d"
+vhost_user_set_mem_table_withfd(int index, const char *name, uint64_t memory_size, uint64_t guest_phys_addr, uint64_t userspace_addr, uint64_t offset) "%d:%s: size:0x%"PRIx64" GPA:0x%"PRIx64" QVA/userspace:0x%"PRIx64" RB offset:0x%"PRIx64
+vhost_user_postcopy_waker(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64
+vhost_user_postcopy_waker_found(uint64_t client_addr) "0x%"PRIx64
+vhost_user_postcopy_waker_nomatch(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64
+
# hw/virtio/virtio.c
virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u"
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 41ff5cff41..44aea5c0a8 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -18,11 +18,15 @@
#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "sysemu/cryptodev.h"
+#include "migration/migration.h"
+#include "migration/postcopy-ram.h"
+#include "trace.h"
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <linux/vhost.h>
+#include <linux/userfaultfd.h>
#define VHOST_MEMORY_MAX_NREGIONS 8
#define VHOST_USER_F_PROTOCOL_FEATURES 30
@@ -41,7 +45,7 @@ enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
-
+ VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
VHOST_USER_PROTOCOL_F_MAX
};
@@ -76,6 +80,9 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_CONFIG = 25,
VHOST_USER_CREATE_CRYPTO_SESSION = 26,
VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
+ VHOST_USER_POSTCOPY_ADVISE = 28,
+ VHOST_USER_POSTCOPY_LISTEN = 29,
+ VHOST_USER_POSTCOPY_END = 30,
VHOST_USER_MAX
} VhostUserRequest;
@@ -164,8 +171,23 @@ static VhostUserMsg m __attribute__ ((unused));
#define VHOST_USER_VERSION (0x1)
struct vhost_user {
+ struct vhost_dev *dev;
CharBackend *chr;
int slave_fd;
+ NotifierWithReturn postcopy_notifier;
+ struct PostCopyFD postcopy_fd;
+ uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS];
+ /* Length of the region_rb and region_rb_offset arrays */
+ size_t region_rb_len;
+ /* RAMBlock associated with a given region */
+ RAMBlock **region_rb;
+ /* The offset from the start of the RAMBlock to the start of the
+ * vhost region.
+ */
+ ram_addr_t *region_rb_offset;
+
+ /* True once we've entered postcopy_listen */
+ bool postcopy_listen;
};
static bool ioeventfd_enabled(void)
@@ -330,14 +352,167 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
return 0;
}
+static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
+ struct vhost_memory *mem)
+{
+ struct vhost_user *u = dev->opaque;
+ int fds[VHOST_MEMORY_MAX_NREGIONS];
+ int i, fd;
+ size_t fd_num = 0;
+ bool reply_supported = virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_REPLY_ACK);
+ VhostUserMsg msg_reply;
+ int region_i, msg_i;
+
+ VhostUserMsg msg = {
+ .hdr.request = VHOST_USER_SET_MEM_TABLE,
+ .hdr.flags = VHOST_USER_VERSION,
+ };
+
+ if (reply_supported) {
+ msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
+ }
+
+ if (u->region_rb_len < dev->mem->nregions) {
+ u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
+ u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
+ dev->mem->nregions);
+ memset(&(u->region_rb[u->region_rb_len]), '\0',
+ sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
+ memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
+ sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
+ u->region_rb_len = dev->mem->nregions;
+ }
+
+ for (i = 0; i < dev->mem->nregions; ++i) {
+ struct vhost_memory_region *reg = dev->mem->regions + i;
+ ram_addr_t offset;
+ MemoryRegion *mr;
+
+ assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
+ mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
+ &offset);
+ fd = memory_region_get_fd(mr);
+ if (fd > 0) {
+ trace_vhost_user_set_mem_table_withfd(fd_num, mr->name,
+ reg->memory_size,
+ reg->guest_phys_addr,
+ reg->userspace_addr, offset);
+ u->region_rb_offset[i] = offset;
+ u->region_rb[i] = mr->ram_block;
+ msg.payload.memory.regions[fd_num].userspace_addr =
+ reg->userspace_addr;
+ msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
+ msg.payload.memory.regions[fd_num].guest_phys_addr =
+ reg->guest_phys_addr;
+ msg.payload.memory.regions[fd_num].mmap_offset = offset;
+ assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
+ fds[fd_num++] = fd;
+ } else {
+ u->region_rb_offset[i] = 0;
+ u->region_rb[i] = NULL;
+ }
+ }
+
+ msg.payload.memory.nregions = fd_num;
+
+ if (!fd_num) {
+ error_report("Failed initializing vhost-user memory map, "
+ "consider using -object memory-backend-file share=on");
+ return -1;
+ }
+
+ msg.hdr.size = sizeof(msg.payload.memory.nregions);
+ msg.hdr.size += sizeof(msg.payload.memory.padding);
+ msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
+
+ if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
+ return -1;
+ }
+
+ if (vhost_user_read(dev, &msg_reply) < 0) {
+ return -1;
+ }
+
+ if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
+ error_report("%s: Received unexpected msg type."
+ "Expected %d received %d", __func__,
+ VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
+ return -1;
+ }
+ /* We're using the same structure, just reusing one of the
+ * fields, so it should be the same size.
+ */
+ if (msg_reply.hdr.size != msg.hdr.size) {
+ error_report("%s: Unexpected size for postcopy reply "
+ "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size);
+ return -1;
+ }
+
+ memset(u->postcopy_client_bases, 0,
+ sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
+
+ /* They're in the same order as the regions that were sent
+ * but some of the regions were skipped (above) if they
+ * didn't have fd's
+ */
+ for (msg_i = 0, region_i = 0;
+ region_i < dev->mem->nregions;
+ region_i++) {
+ if (msg_i < fd_num &&
+ msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
+ dev->mem->regions[region_i].guest_phys_addr) {
+ u->postcopy_client_bases[region_i] =
+ msg_reply.payload.memory.regions[msg_i].userspace_addr;
+ trace_vhost_user_set_mem_table_postcopy(
+ msg_reply.payload.memory.regions[msg_i].userspace_addr,
+ msg.payload.memory.regions[msg_i].userspace_addr,
+ msg_i, region_i);
+ msg_i++;
+ }
+ }
+ if (msg_i != fd_num) {
+ error_report("%s: postcopy reply not fully consumed "
+ "%d vs %zd",
+ __func__, msg_i, fd_num);
+ return -1;
+ }
+ /* Now we've registered this with the postcopy code, we ack to the client,
+ * because now we're in the position to be able to deal with any faults
+ * it generates.
+ */
+ /* TODO: Use this for failure cases as well with a bad value */
+ msg.hdr.size = sizeof(msg.payload.u64);
+ msg.payload.u64 = 0; /* OK */
+ if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+ return -1;
+ }
+
+ if (reply_supported) {
+ return process_message_reply(dev, &msg);
+ }
+
+ return 0;
+}
+
static int vhost_user_set_mem_table(struct vhost_dev *dev,
struct vhost_memory *mem)
{
+ struct vhost_user *u = dev->opaque;
int fds[VHOST_MEMORY_MAX_NREGIONS];
int i, fd;
size_t fd_num = 0;
+ bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
bool reply_supported = virtio_has_feature(dev->protocol_features,
- VHOST_USER_PROTOCOL_F_REPLY_ACK);
+ VHOST_USER_PROTOCOL_F_REPLY_ACK) &&
+ !do_postcopy;
+
+ if (do_postcopy) {
+ /* Postcopy has enough differences that it's best done in it's own
+ * version
+ */
+ return vhost_user_set_mem_table_postcopy(dev, mem);
+ }
VhostUserMsg msg = {
.hdr.request = VHOST_USER_SET_MEM_TABLE,
@@ -362,9 +537,11 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
error_report("Failed preparing vhost-user memory table msg");
return -1;
}
- msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
+ msg.payload.memory.regions[fd_num].userspace_addr =
+ reg->userspace_addr;
msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
- msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
+ msg.payload.memory.regions[fd_num].guest_phys_addr =
+ reg->guest_phys_addr;
msg.payload.memory.regions[fd_num].mmap_offset = offset;
fds[fd_num++] = fd;
}
@@ -791,6 +968,219 @@ out:
return ret;
}
+/*
+ * Called back from the postcopy fault thread when a fault is received on our
+ * ufd.
+ * TODO: This is Linux specific
+ */
+static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
+ void *ufd)
+{
+ struct vhost_dev *dev = pcfd->data;
+ struct vhost_user *u = dev->opaque;
+ struct uffd_msg *msg = ufd;
+ uint64_t faultaddr = msg->arg.pagefault.address;
+ RAMBlock *rb = NULL;
+ uint64_t rb_offset;
+ int i;
+
+ trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
+ dev->mem->nregions);
+ for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
+ trace_vhost_user_postcopy_fault_handler_loop(i,
+ u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
+ if (faultaddr >= u->postcopy_client_bases[i]) {
+ /* Ofset of the fault address in the vhost region */
+ uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
+ if (region_offset < dev->mem->regions[i].memory_size) {
+ rb_offset = region_offset + u->region_rb_offset[i];
+ trace_vhost_user_postcopy_fault_handler_found(i,
+ region_offset, rb_offset);
+ rb = u->region_rb[i];
+ return postcopy_request_shared_page(pcfd, rb, faultaddr,
+ rb_offset);
+ }
+ }
+ }
+ error_report("%s: Failed to find region for fault %" PRIx64,
+ __func__, faultaddr);
+ return -1;
+}
+
+static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
+ uint64_t offset)
+{
+ struct vhost_dev *dev = pcfd->data;
+ struct vhost_user *u = dev->opaque;
+ int i;
+
+ trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
+
+ if (!u) {
+ return 0;
+ }
+ /* Translate the offset into an address in the clients address space */
+ for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
+ if (u->region_rb[i] == rb &&
+ offset >= u->region_rb_offset[i] &&
+ offset < (u->region_rb_offset[i] +
+ dev->mem->regions[i].memory_size)) {
+ uint64_t client_addr = (offset - u->region_rb_offset[i]) +
+ u->postcopy_client_bases[i];
+ trace_vhost_user_postcopy_waker_found(client_addr);
+ return postcopy_wake_shared(pcfd, client_addr, rb);
+ }
+ }
+
+ trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
+ return 0;
+}
+
+/*
+ * Called at the start of an inbound postcopy on reception of the
+ * 'advise' command.
+ */
+static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
+{
+ struct vhost_user *u = dev->opaque;
+ CharBackend *chr = u->chr;
+ int ufd;
+ VhostUserMsg msg = {
+ .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
+ .hdr.flags = VHOST_USER_VERSION,
+ };
+
+ if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+ error_setg(errp, "Failed to send postcopy_advise to vhost");
+ return -1;
+ }
+
+ if (vhost_user_read(dev, &msg) < 0) {
+ error_setg(errp, "Failed to get postcopy_advise reply from vhost");
+ return -1;
+ }
+
+ if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
+ error_setg(errp, "Unexpected msg type. Expected %d received %d",
+ VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
+ return -1;
+ }
+
+ if (msg.hdr.size) {
+ error_setg(errp, "Received bad msg size.");
+ return -1;
+ }
+ ufd = qemu_chr_fe_get_msgfd(chr);
+ if (ufd < 0) {
+ error_setg(errp, "%s: Failed to get ufd", __func__);
+ return -1;
+ }
+ fcntl(ufd, F_SETFL, O_NONBLOCK);
+
+ /* register ufd with userfault thread */
+ u->postcopy_fd.fd = ufd;
+ u->postcopy_fd.data = dev;
+ u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
+ u->postcopy_fd.waker = vhost_user_postcopy_waker;
+ u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
+ postcopy_register_shared_ufd(&u->postcopy_fd);
+ return 0;
+}
+
+/*
+ * Called at the switch to postcopy on reception of the 'listen' command.
+ */
+static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
+{
+ struct vhost_user *u = dev->opaque;
+ int ret;
+ VhostUserMsg msg = {
+ .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
+ .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
+ };
+ u->postcopy_listen = true;
+ trace_vhost_user_postcopy_listen();
+ if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+ error_setg(errp, "Failed to send postcopy_listen to vhost");
+ return -1;
+ }
+
+ ret = process_message_reply(dev, &msg);
+ if (ret) {
+ error_setg(errp, "Failed to receive reply to postcopy_listen");
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * Called at the end of postcopy
+ */
+static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
+{
+ VhostUserMsg msg = {
+ .hdr.request = VHOST_USER_POSTCOPY_END,
+ .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
+ };
+ int ret;
+ struct vhost_user *u = dev->opaque;
+
+ trace_vhost_user_postcopy_end_entry();
+ if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+ error_setg(errp, "Failed to send postcopy_end to vhost");
+ return -1;
+ }
+
+ ret = process_message_reply(dev, &msg);
+ if (ret) {
+ error_setg(errp, "Failed to receive reply to postcopy_end");
+ return ret;
+ }
+ postcopy_unregister_shared_ufd(&u->postcopy_fd);
+ u->postcopy_fd.handler = NULL;
+
+ trace_vhost_user_postcopy_end_exit();
+
+ return 0;
+}
+
+static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
+ void *opaque)
+{
+ struct PostcopyNotifyData *pnd = opaque;
+ struct vhost_user *u = container_of(notifier, struct vhost_user,
+ postcopy_notifier);
+ struct vhost_dev *dev = u->dev;
+
+ switch (pnd->reason) {
+ case POSTCOPY_NOTIFY_PROBE:
+ if (!virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
+ /* TODO: Get the device name into this error somehow */
+ error_setg(pnd->errp,
+ "vhost-user backend not capable of postcopy");
+ return -ENOENT;
+ }
+ break;
+
+ case POSTCOPY_NOTIFY_INBOUND_ADVISE:
+ return vhost_user_postcopy_advise(dev, pnd->errp);
+
+ case POSTCOPY_NOTIFY_INBOUND_LISTEN:
+ return vhost_user_postcopy_listen(dev, pnd->errp);
+
+ case POSTCOPY_NOTIFY_INBOUND_END:
+ return vhost_user_postcopy_end(dev, pnd->errp);
+
+ default:
+ /* We ignore notifications we don't know */
+ break;
+ }
+
+ return 0;
+}
+
static int vhost_user_init(struct vhost_dev *dev, void *opaque)
{
uint64_t features, protocol_features;
@@ -802,6 +1192,7 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque)
u = g_new0(struct vhost_user, 1);
u->chr = opaque;
u->slave_fd = -1;
+ u->dev = dev;
dev->opaque = u;
err = vhost_user_get_features(dev, &features);
@@ -858,6 +1249,9 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque)
return err;
}
+ u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
+ postcopy_add_notifier(&u->postcopy_notifier);
+
return 0;
}
@@ -868,11 +1262,20 @@ static int vhost_user_cleanup(struct vhost_dev *dev)
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
u = dev->opaque;
+ if (u->postcopy_notifier.notify) {
+ postcopy_remove_notifier(&u->postcopy_notifier);
+ u->postcopy_notifier.notify = NULL;
+ }
if (u->slave_fd >= 0) {
qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
close(u->slave_fd);
u->slave_fd = -1;
}
+ g_free(u->region_rb);
+ u->region_rb = NULL;
+ g_free(u->region_rb_offset);
+ u->region_rb_offset = NULL;
+ u->region_rb_len = 0;
g_free(u);
dev->opaque = 0;
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index d8d0ef92e1..250f886acb 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -522,10 +522,28 @@ static void vhost_region_add_section(struct vhost_dev *dev,
uint64_t mrs_gpa = section->offset_within_address_space;
uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
section->offset_within_region;
+ RAMBlock *mrs_rb = section->mr->ram_block;
+ size_t mrs_page = qemu_ram_pagesize(mrs_rb);
trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size,
mrs_host);
+ /* Round the section to it's page size */
+ /* First align the start down to a page boundary */
+ uint64_t alignage = mrs_host & (mrs_page - 1);
+ if (alignage) {
+ mrs_host -= alignage;
+ mrs_size += alignage;
+ mrs_gpa -= alignage;
+ }
+ /* Now align the size up to a page boundary */
+ alignage = mrs_size & (mrs_page - 1);
+ if (alignage) {
+ mrs_size += mrs_page - alignage;
+ }
+ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size,
+ mrs_host);
+
if (dev->n_tmp_sections) {
/* Since we already have at least one section, lets see if
* this extends it; since we're scanning in order, we only
@@ -542,18 +560,46 @@ static void vhost_region_add_section(struct vhost_dev *dev,
prev_sec->offset_within_region;
uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
- if (prev_gpa_end + 1 == mrs_gpa &&
- prev_host_end + 1 == mrs_host &&
- section->mr == prev_sec->mr &&
- (!dev->vhost_ops->vhost_backend_can_merge ||
- dev->vhost_ops->vhost_backend_can_merge(dev,
+ if (mrs_gpa <= (prev_gpa_end + 1)) {
+ /* OK, looks like overlapping/intersecting - it's possible that
+ * the rounding to page sizes has made them overlap, but they should
+ * match up in the same RAMBlock if they do.
+ */
+ if (mrs_gpa < prev_gpa_start) {
+ error_report("%s:Section rounded to %"PRIx64
+ " prior to previous %"PRIx64,
+ __func__, mrs_gpa, prev_gpa_start);
+ /* A way to cleanly fail here would be better */
+ return;
+ }
+ /* Offset from the start of the previous GPA to this GPA */
+ size_t offset = mrs_gpa - prev_gpa_start;
+
+ if (prev_host_start + offset == mrs_host &&
+ section->mr == prev_sec->mr &&
+ (!dev->vhost_ops->vhost_backend_can_merge ||
+ dev->vhost_ops->vhost_backend_can_merge(dev,
mrs_host, mrs_size,
prev_host_start, prev_size))) {
- /* The two sections abut */
- need_add = false;
- prev_sec->size = int128_add(prev_sec->size, section->size);
- trace_vhost_region_add_section_abut(section->mr->name,
- mrs_size + prev_size);
+ uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
+ need_add = false;
+ prev_sec->offset_within_address_space =
+ MIN(prev_gpa_start, mrs_gpa);
+ prev_sec->offset_within_region =
+ MIN(prev_host_start, mrs_host) -
+ (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
+ prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
+ mrs_host));
+ trace_vhost_region_add_section_merge(section->mr->name,
+ int128_get64(prev_sec->size),
+ prev_sec->offset_within_address_space,
+ prev_sec->offset_within_region);
+ } else {
+ error_report("%s: Overlapping but not coherent sections "
+ "at %"PRIx64,
+ __func__, mrs_gpa);
+ return;
+ }
}
}