diff options
Diffstat (limited to 'hw')
82 files changed, 2047 insertions, 660 deletions
diff --git a/hw/acpi/core.c b/hw/acpi/core.c index e890a5d675..95fcac95a2 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -561,7 +561,7 @@ static void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val) uint16_t sus_typ = (val >> 10) & 7; switch(sus_typ) { case 0: /* soft power off */ - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); break; case 1: qemu_system_suspend_request(); @@ -569,7 +569,7 @@ static void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val) default: if (sus_typ == ar->pm1.cnt.s4_val) { /* S4 request */ qapi_event_send_suspend_disk(&error_abort); - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } break; } diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c index f50f5cf186..c1cf7802a4 100644 --- a/hw/alpha/typhoon.c +++ b/hw/alpha/typhoon.c @@ -664,7 +664,7 @@ static bool window_translate(TyphoonWindow *win, hwaddr addr, /* TODO: A translation failure here ought to set PCI error codes on the Pchip and generate a machine check interrupt. */ static IOMMUTLBEntry typhoon_translate_iommu(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { TyphoonPchip *pchip = container_of(iommu, TyphoonPchip, iommu); IOMMUTLBEntry ret; diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c index 0a4508cef3..d209b97dee 100644 --- a/hw/arm/highbank.c +++ b/hw/arm/highbank.c @@ -108,9 +108,9 @@ static void hb_regs_write(void *opaque, hwaddr offset, if (offset == 0xf00) { if (value == 1 || value == 2) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } else if (value == 3) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } } diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c index 5610ffc9ce..ca3eca1d16 100644 --- a/hw/arm/integratorcp.c +++ b/hw/arm/integratorcp.c @@ -158,7 +158,7 @@ static void integratorcm_do_remap(IntegratorCMState *s) static void integratorcm_set_ctrl(IntegratorCMState *s, uint32_t value) { if (value & 8) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } if ((s->cm_ctrl ^ value) & 1) { /* (value & 1) != 0 means the green "MISC LED" is lit. diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c index cbbca4e17a..9c710f74b4 100644 --- a/hw/arm/musicpal.c +++ b/hw/arm/musicpal.c @@ -898,7 +898,7 @@ static void mv88w8618_pit_write(void *opaque, hwaddr offset, case MP_BOARD_RESET: if (value == MP_BOARD_RESET_MAGIC) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; } diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c index b3cf0ec690..54582bd148 100644 --- a/hw/arm/omap1.c +++ b/hw/arm/omap1.c @@ -355,7 +355,7 @@ static void omap_wd_timer_write(void *opaque, hwaddr addr, /* XXX: on T|E hardware somehow this has no effect, * on Zire 71 it works as specified. */ s->reset = 1; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } s->last_wr = value & 0xff; @@ -1545,8 +1545,10 @@ static inline void omap_clkm_idlect1_update(struct omap_mpu_state_s *s, if (value & (1 << 11)) { /* SETARM_IDLE */ cpu_interrupt(CPU(s->cpu), CPU_INTERRUPT_HALT); } - if (!(value & (1 << 10))) /* WKUP_MODE */ - qemu_system_shutdown_request(); /* XXX: disable wakeup from IRQ */ + if (!(value & (1 << 10))) { /* WKUP_MODE */ + /* XXX: disable wakeup from IRQ */ + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + } #define SET_CANIDLE(clock, bit) \ if (diff & (1 << bit)) { \ @@ -1693,7 +1695,7 @@ static void omap_clkm_write(void *opaque, hwaddr addr, diff = s->clkm.arm_rstct1 ^ value; s->clkm.arm_rstct1 = value & 0x0007; if (value & 9) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); s->clkm.cold_start = 0xa; } if (diff & ~value & 4) { /* DSP_RST */ diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c index cf1b4ba58f..8afb854c74 100644 --- a/hw/arm/omap2.c +++ b/hw/arm/omap2.c @@ -1610,7 +1610,7 @@ static void omap_prcm_write(void *opaque, hwaddr addr, case 0x450: /* RM_RSTCTRL_WKUP */ /* TODO: reset */ if (value & 2) - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; case 0x454: /* RM_RSTTIME_WKUP */ s->rsttime_wkup = value & 0x1fff; diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c index 324626847c..93bde14743 100644 --- a/hw/arm/spitz.c +++ b/hw/arm/spitz.c @@ -848,7 +848,7 @@ static void spitz_lcd_hsync_handler(void *opaque, int line, int level) static void spitz_reset(void *opaque, int line, int level) { if (level) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c index ea7a8094e1..cf6e7be083 100644 --- a/hw/arm/stellaris.c +++ b/hw/arm/stellaris.c @@ -1197,7 +1197,7 @@ static void do_sys_reset(void *opaque, int n, int level) { if (level) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/arm/tosa.c b/hw/arm/tosa.c index 9f58a23fb5..2421b8150d 100644 --- a/hw/arm/tosa.c +++ b/hw/arm/tosa.c @@ -90,7 +90,7 @@ static void tosa_out_switch(void *opaque, int line, int level) static void tosa_reset(void *opaque, int line, int level) { if (level) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 7428db9f0c..381dc7c5fb 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -9,7 +9,7 @@ */ /** - * Reference Specs: http://www.nvmexpress.org, 1.1, 1.0e + * Reference Specs: http://www.nvmexpress.org, 1.2, 1.1, 1.0e * * http://www.nvmexpress.org/resources/ */ @@ -17,7 +17,11 @@ /** * Usage: add options: * -drive file=<file>,if=none,id=<drive_id> - * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]> + * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \ + * cmb_size_mb=<cmb_size_mb[optional]> + * + * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at + * offset 0 in BAR2 and supports SQS only for now. */ #include "qemu/osdep.h" @@ -34,6 +38,16 @@ static void nvme_process_sq(void *opaque); +static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) +{ + if (n->cmbsz && addr >= n->ctrl_mem.addr && + addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) { + memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size); + } else { + pci_dma_read(&n->parent_obj, addr, buf, size); + } +} + static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) { return sqid < n->num_queues && n->sq[sqid] != NULL ? 0 : -1; @@ -637,7 +651,7 @@ static void nvme_process_sq(void *opaque) while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) { addr = sq->dma_addr + sq->head * n->sqe_size; - pci_dma_read(&n->parent_obj, addr, (void *)&cmd, sizeof(cmd)); + nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd)); nvme_inc_sq_head(sq); req = QTAILQ_FIRST(&sq->req_list); @@ -852,6 +866,32 @@ static const MemoryRegionOps nvme_mmio_ops = { }, }; +static void nvme_cmb_write(void *opaque, hwaddr addr, uint64_t data, + unsigned size) +{ + NvmeCtrl *n = (NvmeCtrl *)opaque; + memcpy(&n->cmbuf[addr], &data, size); +} + +static uint64_t nvme_cmb_read(void *opaque, hwaddr addr, unsigned size) +{ + uint64_t val; + NvmeCtrl *n = (NvmeCtrl *)opaque; + + memcpy(&val, &n->cmbuf[addr], size); + return val; +} + +static const MemoryRegionOps nvme_cmb_ops = { + .read = nvme_cmb_read, + .write = nvme_cmb_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 2, + .max_access_size = 8, + }, +}; + static int nvme_init(PCIDevice *pci_dev) { NvmeCtrl *n = NVME(pci_dev); @@ -936,9 +976,31 @@ static int nvme_init(PCIDevice *pci_dev) NVME_CAP_SET_CSS(n->bar.cap, 1); NVME_CAP_SET_MPSMAX(n->bar.cap, 4); - n->bar.vs = 0x00010100; + n->bar.vs = 0x00010200; n->bar.intmc = n->bar.intms = 0; + if (n->cmb_size_mb) { + + NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); + NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); + + NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ + NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->cmb_size_mb); + + n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); + memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, + "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); + pci_register_bar(&n->parent_obj, NVME_CMBLOC_BIR(n->bar.cmbloc), + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); + + } + for (i = 0; i < n->num_namespaces; i++) { NvmeNamespace *ns = &n->namespaces[i]; NvmeIdNs *id_ns = &ns->id_ns; @@ -964,12 +1026,17 @@ static void nvme_exit(PCIDevice *pci_dev) g_free(n->namespaces); g_free(n->cq); g_free(n->sq); + if (n->cmbsz) { + memory_region_unref(&n->ctrl_mem); + } + msix_uninit_exclusive_bar(pci_dev); } static Property nvme_props[] = { DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), DEFINE_PROP_STRING("serial", NvmeCtrl, serial), + DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/block/nvme.h b/hw/block/nvme.h index a0d15649f9..b4961d2547 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -14,6 +14,8 @@ typedef struct NvmeBar { uint32_t aqa; uint64_t asq; uint64_t acq; + uint32_t cmbloc; + uint32_t cmbsz; } NvmeBar; enum NvmeCapShift { @@ -138,6 +140,72 @@ enum NvmeAqaMask { #define NVME_AQA_ASQS(aqa) ((aqa >> AQA_ASQS_SHIFT) & AQA_ASQS_MASK) #define NVME_AQA_ACQS(aqa) ((aqa >> AQA_ACQS_SHIFT) & AQA_ACQS_MASK) +enum NvmeCmblocShift { + CMBLOC_BIR_SHIFT = 0, + CMBLOC_OFST_SHIFT = 12, +}; + +enum NvmeCmblocMask { + CMBLOC_BIR_MASK = 0x7, + CMBLOC_OFST_MASK = 0xfffff, +}; + +#define NVME_CMBLOC_BIR(cmbloc) ((cmbloc >> CMBLOC_BIR_SHIFT) & \ + CMBLOC_BIR_MASK) +#define NVME_CMBLOC_OFST(cmbloc)((cmbloc >> CMBLOC_OFST_SHIFT) & \ + CMBLOC_OFST_MASK) + +#define NVME_CMBLOC_SET_BIR(cmbloc, val) \ + (cmbloc |= (uint64_t)(val & CMBLOC_BIR_MASK) << CMBLOC_BIR_SHIFT) +#define NVME_CMBLOC_SET_OFST(cmbloc, val) \ + (cmbloc |= (uint64_t)(val & CMBLOC_OFST_MASK) << CMBLOC_OFST_SHIFT) + +enum NvmeCmbszShift { + CMBSZ_SQS_SHIFT = 0, + CMBSZ_CQS_SHIFT = 1, + CMBSZ_LISTS_SHIFT = 2, + CMBSZ_RDS_SHIFT = 3, + CMBSZ_WDS_SHIFT = 4, + CMBSZ_SZU_SHIFT = 8, + CMBSZ_SZ_SHIFT = 12, +}; + +enum NvmeCmbszMask { + CMBSZ_SQS_MASK = 0x1, + CMBSZ_CQS_MASK = 0x1, + CMBSZ_LISTS_MASK = 0x1, + CMBSZ_RDS_MASK = 0x1, + CMBSZ_WDS_MASK = 0x1, + CMBSZ_SZU_MASK = 0xf, + CMBSZ_SZ_MASK = 0xfffff, +}; + +#define NVME_CMBSZ_SQS(cmbsz) ((cmbsz >> CMBSZ_SQS_SHIFT) & CMBSZ_SQS_MASK) +#define NVME_CMBSZ_CQS(cmbsz) ((cmbsz >> CMBSZ_CQS_SHIFT) & CMBSZ_CQS_MASK) +#define NVME_CMBSZ_LISTS(cmbsz)((cmbsz >> CMBSZ_LISTS_SHIFT) & CMBSZ_LISTS_MASK) +#define NVME_CMBSZ_RDS(cmbsz) ((cmbsz >> CMBSZ_RDS_SHIFT) & CMBSZ_RDS_MASK) +#define NVME_CMBSZ_WDS(cmbsz) ((cmbsz >> CMBSZ_WDS_SHIFT) & CMBSZ_WDS_MASK) +#define NVME_CMBSZ_SZU(cmbsz) ((cmbsz >> CMBSZ_SZU_SHIFT) & CMBSZ_SZU_MASK) +#define NVME_CMBSZ_SZ(cmbsz) ((cmbsz >> CMBSZ_SZ_SHIFT) & CMBSZ_SZ_MASK) + +#define NVME_CMBSZ_SET_SQS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_SQS_MASK) << CMBSZ_SQS_SHIFT) +#define NVME_CMBSZ_SET_CQS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_CQS_MASK) << CMBSZ_CQS_SHIFT) +#define NVME_CMBSZ_SET_LISTS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_LISTS_MASK) << CMBSZ_LISTS_SHIFT) +#define NVME_CMBSZ_SET_RDS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_RDS_MASK) << CMBSZ_RDS_SHIFT) +#define NVME_CMBSZ_SET_WDS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_WDS_MASK) << CMBSZ_WDS_SHIFT) +#define NVME_CMBSZ_SET_SZU(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_SZU_MASK) << CMBSZ_SZU_SHIFT) +#define NVME_CMBSZ_SET_SZ(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_SZ_MASK) << CMBSZ_SZ_SHIFT) + +#define NVME_CMBSZ_GETSIZE(cmbsz) \ + (NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz)))) + typedef struct NvmeCmd { uint8_t opcode; uint8_t fuse; @@ -688,6 +756,7 @@ typedef struct NvmeNamespace { typedef struct NvmeCtrl { PCIDevice parent_obj; MemoryRegion iomem; + MemoryRegion ctrl_mem; NvmeBar bar; BlockConf conf; @@ -701,6 +770,10 @@ typedef struct NvmeCtrl { uint32_t num_queues; uint32_t max_q_ents; uint64_t ns_size; + uint32_t cmb_size_mb; + uint32_t cmbsz; + uint32_t cmbloc; + uint8_t *cmbuf; char *serial; NvmeNamespace *namespaces; diff --git a/hw/core/machine.c b/hw/core/machine.c index fd6a436064..3adebf14c4 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -21,6 +21,7 @@ #include "qemu/error-report.h" #include "qemu/cutils.h" #include "sysemu/numa.h" +#include "sysemu/qtest.h" static char *machine_get_accel(Object *obj, Error **errp) { @@ -722,7 +723,7 @@ static void machine_numa_validate(MachineState *machine) g_free(cpu_str); } } - if (s->len) { + if (s->len && !qtest_enabled()) { error_report("warning: CPU(s) not present in any NUMA nodes: %s", s->str); error_report("warning: All CPU(s) up to maxcpus should be described " diff --git a/hw/dma/rc4030.c b/hw/dma/rc4030.c index 0080141905..edf9432051 100644 --- a/hw/dma/rc4030.c +++ b/hw/dma/rc4030.c @@ -489,7 +489,7 @@ static const MemoryRegionOps jazzio_ops = { }; static IOMMUTLBEntry rc4030_dma_translate(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { rc4030State *s = container_of(iommu, rc4030State, dma_mr); IOMMUTLBEntry ret = { diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index afcadacd2e..82bd44f38e 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2404,14 +2404,17 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) } /* - * Entry is required for Windows to enable memory hotplug in OS. + * Entry is required for Windows to enable memory hotplug in OS + * and for Linux to enable SWIOTLB when booted with less than + * 4G of RAM. Windows works better if the entry sets proximity + * to the highest NUMA node in the machine. * Memory devices may override proximity set by this entry, * providing _PXM method if necessary. */ if (hotplugabble_address_space_size) { numamem = acpi_data_push(table_data, sizeof *numamem); build_srat_memory(numamem, pcms->hotplug_memory.base, - hotplugabble_address_space_size, 0, + hotplugabble_address_space_size, pcms->numa_nodes - 1, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); } diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 329058dac8..7b6d4ea3f3 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -988,7 +988,7 @@ static inline bool amdvi_is_interrupt_addr(hwaddr addr) } static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu); AMDVIState *s = as->iommu_state; @@ -1017,7 +1017,7 @@ static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr, return ret; } - amdvi_do_translate(as, addr, is_write, &ret); + amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret); trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn), PCI_FUNC(as->devfn), addr, ret.translated_addr); return ret; diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 9ba2162cd9..15610b9de8 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -512,7 +512,7 @@ static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index, return 0; } -static inline bool vtd_context_entry_present(VTDContextEntry *context) +static inline bool vtd_ce_present(VTDContextEntry *context) { return context->lo & VTD_CONTEXT_ENTRY_P; } @@ -533,7 +533,7 @@ static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index, return 0; } -static inline dma_addr_t vtd_get_slpt_base_from_context(VTDContextEntry *ce) +static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce) { return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; } @@ -585,19 +585,49 @@ static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level) /* Get the page-table level that hardware should use for the second-level * page-table walk from the Address Width field of context-entry. */ -static inline uint32_t vtd_get_level_from_context_entry(VTDContextEntry *ce) +static inline uint32_t vtd_ce_get_level(VTDContextEntry *ce) { return 2 + (ce->hi & VTD_CONTEXT_ENTRY_AW); } -static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce) +static inline uint32_t vtd_ce_get_agaw(VTDContextEntry *ce) { return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9; } +static inline uint32_t vtd_ce_get_type(VTDContextEntry *ce) +{ + return ce->lo & VTD_CONTEXT_ENTRY_TT; +} + +/* Return true if check passed, otherwise false */ +static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu, + VTDContextEntry *ce) +{ + switch (vtd_ce_get_type(ce)) { + case VTD_CONTEXT_TT_MULTI_LEVEL: + /* Always supported */ + break; + case VTD_CONTEXT_TT_DEV_IOTLB: + if (!x86_iommu->dt_supported) { + return false; + } + break; + case VTD_CONTEXT_TT_PASS_THROUGH: + if (!x86_iommu->pt_supported) { + return false; + } + break; + default: + /* Unknwon type */ + return false; + } + return true; +} + static inline uint64_t vtd_iova_limit(VTDContextEntry *ce) { - uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce); + uint32_t ce_agaw = vtd_ce_get_agaw(ce); return 1ULL << MIN(ce_agaw, VTD_MGAW); } @@ -635,6 +665,29 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) } } +/* Find the VTD address space associated with a given bus number */ +static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) +{ + VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num]; + if (!vtd_bus) { + /* + * Iterate over the registered buses to find the one which + * currently hold this bus number, and update the bus_num + * lookup table: + */ + GHashTableIter iter; + + g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); + while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { + if (pci_bus_num(vtd_bus->bus) == bus_num) { + s->vtd_as_by_bus_num[bus_num] = vtd_bus; + return vtd_bus; + } + } + } + return vtd_bus; +} + /* Given the @iova, get relevant @slptep. @slpte_level will be the last level * of the translation, can be used for deciding the size of large page. */ @@ -642,8 +695,8 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, uint64_t *slptep, uint32_t *slpte_level, bool *reads, bool *writes) { - dma_addr_t addr = vtd_get_slpt_base_from_context(ce); - uint32_t level = vtd_get_level_from_context_entry(ce); + dma_addr_t addr = vtd_ce_get_slpt_base(ce); + uint32_t level = vtd_ce_get_level(ce); uint32_t offset; uint64_t slpte; uint64_t access_right_check; @@ -664,7 +717,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, VTD_DPRINTF(GENERAL, "error: fail to access second-level paging " "entry at level %"PRIu32 " for iova 0x%"PRIx64, level, iova); - if (level == vtd_get_level_from_context_entry(ce)) { + if (level == vtd_ce_get_level(ce)) { /* Invalid programming of context-entry */ return -VTD_FR_CONTEXT_ENTRY_INV; } else { @@ -809,8 +862,8 @@ static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end, vtd_page_walk_hook hook_fn, void *private, bool notify_unmap) { - dma_addr_t addr = vtd_get_slpt_base_from_context(ce); - uint32_t level = vtd_get_level_from_context_entry(ce); + dma_addr_t addr = vtd_ce_get_slpt_base(ce); + uint32_t level = vtd_ce_get_level(ce); if (!vtd_iova_range_check(start, ce)) { return -VTD_FR_ADDR_BEYOND_MGAW; @@ -831,6 +884,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, { VTDRootEntry re; int ret_fr; + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); ret_fr = vtd_get_root_entry(s, bus_num, &re); if (ret_fr) { @@ -841,7 +895,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, /* Not error - it's okay we don't have root entry. */ trace_vtd_re_not_present(bus_num); return -VTD_FR_ROOT_ENTRY_P; - } else if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) { + } + + if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) { trace_vtd_re_invalid(re.rsvd, re.val); return -VTD_FR_ROOT_ENTRY_RSVD; } @@ -851,31 +907,116 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, return ret_fr; } - if (!vtd_context_entry_present(ce)) { + if (!vtd_ce_present(ce)) { /* Not error - it's okay we don't have context entry. */ trace_vtd_ce_not_present(bus_num, devfn); return -VTD_FR_CONTEXT_ENTRY_P; - } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || - (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) { + } + + if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || + (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) { trace_vtd_ce_invalid(ce->hi, ce->lo); return -VTD_FR_CONTEXT_ENTRY_RSVD; } + /* Check if the programming of context-entry is valid */ - if (!vtd_is_level_supported(s, vtd_get_level_from_context_entry(ce))) { + if (!vtd_is_level_supported(s, vtd_ce_get_level(ce))) { + trace_vtd_ce_invalid(ce->hi, ce->lo); + return -VTD_FR_CONTEXT_ENTRY_INV; + } + + /* Do translation type check */ + if (!vtd_ce_type_check(x86_iommu, ce)) { trace_vtd_ce_invalid(ce->hi, ce->lo); return -VTD_FR_CONTEXT_ENTRY_INV; + } + + return 0; +} + +/* + * Fetch translation type for specific device. Returns <0 if error + * happens, otherwise return the shifted type to check against + * VTD_CONTEXT_TT_*. + */ +static int vtd_dev_get_trans_type(VTDAddressSpace *as) +{ + IntelIOMMUState *s; + VTDContextEntry ce; + int ret; + + s = as->iommu_state; + + ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus), + as->devfn, &ce); + if (ret) { + return ret; + } + + return vtd_ce_get_type(&ce); +} + +static bool vtd_dev_pt_enabled(VTDAddressSpace *as) +{ + int ret; + + assert(as); + + ret = vtd_dev_get_trans_type(as); + if (ret < 0) { + /* + * Possibly failed to parse the context entry for some reason + * (e.g., during init, or any guest configuration errors on + * context entries). We should assume PT not enabled for + * safety. + */ + return false; + } + + return ret == VTD_CONTEXT_TT_PASS_THROUGH; +} + +/* Return whether the device is using IOMMU translation. */ +static bool vtd_switch_address_space(VTDAddressSpace *as) +{ + bool use_iommu; + + assert(as); + + use_iommu = as->iommu_state->dmar_enabled & !vtd_dev_pt_enabled(as); + + trace_vtd_switch_address_space(pci_bus_num(as->bus), + VTD_PCI_SLOT(as->devfn), + VTD_PCI_FUNC(as->devfn), + use_iommu); + + /* Turn off first then on the other */ + if (use_iommu) { + memory_region_set_enabled(&as->sys_alias, false); + memory_region_set_enabled(&as->iommu, true); } else { - switch (ce->lo & VTD_CONTEXT_ENTRY_TT) { - case VTD_CONTEXT_TT_MULTI_LEVEL: - /* fall through */ - case VTD_CONTEXT_TT_DEV_IOTLB: - break; - default: - trace_vtd_ce_invalid(ce->hi, ce->lo); - return -VTD_FR_CONTEXT_ENTRY_INV; + memory_region_set_enabled(&as->iommu, false); + memory_region_set_enabled(&as->sys_alias, true); + } + + return use_iommu; +} + +static void vtd_switch_address_space_all(IntelIOMMUState *s) +{ + GHashTableIter iter; + VTDBus *vtd_bus; + int i; + + g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); + while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { + for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) { + if (!vtd_bus->dev_as[i]) { + continue; + } + vtd_switch_address_space(vtd_bus->dev_as[i]); } } - return 0; } static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn) @@ -915,6 +1056,31 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST; } +static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id) +{ + VTDBus *vtd_bus; + VTDAddressSpace *vtd_as; + bool success = false; + + vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id)); + if (!vtd_bus) { + goto out; + } + + vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)]; + if (!vtd_as) { + goto out; + } + + if (vtd_switch_address_space(vtd_as) == false) { + /* We switched off IOMMU region successfully. */ + success = true; + } + +out: + trace_vtd_pt_enable_fast_path(source_id, success); +} + /* Map dev to context-entry then do a paging-structures walk to do a iommu * translation. * @@ -986,6 +1152,30 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, cc_entry->context_cache_gen = s->context_cache_gen; } + /* + * We don't need to translate for pass-through context entries. + * Also, let's ignore IOTLB caching as well for PT devices. + */ + if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) { + entry->translated_addr = entry->iova; + entry->addr_mask = VTD_PAGE_SIZE - 1; + entry->perm = IOMMU_RW; + trace_vtd_translate_pt(source_id, entry->iova); + + /* + * When this happens, it means firstly caching-mode is not + * enabled, and this is the first passthrough translation for + * the device. Let's enable the fast path for passthrough. + * + * When passthrough is disabled again for the device, we can + * capture it via the context entry invalidation, then the + * IOMMU region can be swapped back. + */ + vtd_pt_enable_fast_path(s, source_id); + + return; + } + ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level, &reads, &writes); if (ret_fr) { @@ -1005,7 +1195,7 @@ out: entry->iova = addr & page_mask; entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask; entry->addr_mask = ~page_mask; - entry->perm = (writes ? 2 : 0) + (reads ? 1 : 0); + entry->perm = IOMMU_ACCESS_FLAG(reads, writes); } static void vtd_root_table_setup(IntelIOMMUState *s) @@ -1055,6 +1245,7 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s) if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) { vtd_reset_context_cache(s); } + vtd_switch_address_space_all(s); /* * From VT-d spec 6.5.2.1, a global context entry invalidation * should be followed by a IOTLB global invalidation, so we should @@ -1065,29 +1256,6 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s) vtd_iommu_replay_all(s); } - -/* Find the VTD address space currently associated with a given bus number, - */ -static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) -{ - VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num]; - if (!vtd_bus) { - /* Iterate over the registered buses to find the one - * which currently hold this bus number, and update the bus_num lookup table: - */ - GHashTableIter iter; - - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); - while (g_hash_table_iter_next (&iter, NULL, (void**)&vtd_bus)) { - if (pci_bus_num(vtd_bus->bus) == bus_num) { - s->vtd_as_by_bus_num[bus_num] = vtd_bus; - return vtd_bus; - } - } - } - return vtd_bus; -} - /* Do a context-cache device-selective invalidation. * @func_mask: FM field after shifting */ @@ -1130,6 +1298,11 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, VTD_PCI_FUNC(devfn_it)); vtd_as->context_cache_entry.context_cache_gen = 0; /* + * Do switch address space when needed, in case if the + * device passthrough bit is switched. + */ + vtd_switch_address_space(vtd_as); + /* * So a device is moving out of (or moving into) a * domain, a replay() suites here to notify all the * IOMMU_NOTIFIER_MAP registers about this change. @@ -1361,42 +1534,6 @@ static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s) vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS); } -static void vtd_switch_address_space(VTDAddressSpace *as) -{ - assert(as); - - trace_vtd_switch_address_space(pci_bus_num(as->bus), - VTD_PCI_SLOT(as->devfn), - VTD_PCI_FUNC(as->devfn), - as->iommu_state->dmar_enabled); - - /* Turn off first then on the other */ - if (as->iommu_state->dmar_enabled) { - memory_region_set_enabled(&as->sys_alias, false); - memory_region_set_enabled(&as->iommu, true); - } else { - memory_region_set_enabled(&as->iommu, false); - memory_region_set_enabled(&as->sys_alias, true); - } -} - -static void vtd_switch_address_space_all(IntelIOMMUState *s) -{ - GHashTableIter iter; - VTDBus *vtd_bus; - int i; - - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); - while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { - for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) { - if (!vtd_bus->dev_as[i]) { - continue; - } - vtd_switch_address_space(vtd_bus->dev_as[i]); - } - } -} - /* Handle Translation Enable/Disable */ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) { @@ -2221,7 +2358,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr, } static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); IntelIOMMUState *s = vtd_as->iommu_state; @@ -2243,7 +2380,7 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr, } vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr, - is_write, &ret); + flag & IOMMU_WO, &ret); VTD_DPRINTF(MMU, "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8 " iova 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus), @@ -2844,6 +2981,10 @@ static void vtd_init(IntelIOMMUState *s) s->ecap |= VTD_ECAP_DT; } + if (x86_iommu->pt_supported) { + s->ecap |= VTD_ECAP_PT; + } + if (s->caching_mode) { s->cap |= VTD_CAP_CM; } diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 29d67075f4..0e73a65bf2 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -187,6 +187,7 @@ /* Interrupt Remapping support */ #define VTD_ECAP_IR (1ULL << 3) #define VTD_ECAP_EIM (1ULL << 4) +#define VTD_ECAP_PT (1ULL << 6) #define VTD_ECAP_MHMV (15ULL << 20) /* CAP_REG */ diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 816bfa872c..107a34125b 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -519,7 +519,7 @@ static void port92_write(void *opaque, hwaddr addr, uint64_t val, s->outport = val; qemu_set_irq(s->a20_out, (val >> 1) & 1); if ((val & 1) && !(oldval & 1)) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/i386/trace-events b/hw/i386/trace-events index 04a6980800..72556dad48 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -38,6 +38,8 @@ vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"P vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set" vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64 +vtd_translate_pt(uint16_t sid, uint64_t addr) "source id 0x%"PRIu16", iova 0x%"PRIx64 +vtd_pt_enable_fast_path(uint16_t sid, bool success) "sid 0x%"PRIu16" %d" # hw/i386/amd_iommu.c amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32 diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index 23dcd3f039..293caf83ef 100644 --- a/hw/i386/x86-iommu.c +++ b/hw/i386/x86-iommu.c @@ -88,55 +88,23 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) x86_iommu_set_default(X86_IOMMU_DEVICE(dev)); } +static Property x86_iommu_properties[] = { + DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false), + DEFINE_PROP_BOOL("device-iotlb", X86IOMMUState, dt_supported, false), + DEFINE_PROP_BOOL("pt", X86IOMMUState, pt_supported, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void x86_iommu_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = x86_iommu_realize; -} - -static bool x86_iommu_intremap_prop_get(Object *o, Error **errp) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - return s->intr_supported; -} - -static void x86_iommu_intremap_prop_set(Object *o, bool value, Error **errp) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - s->intr_supported = value; -} - -static bool x86_iommu_device_iotlb_prop_get(Object *o, Error **errp) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - return s->dt_supported; -} - -static void x86_iommu_device_iotlb_prop_set(Object *o, bool value, Error **errp) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - s->dt_supported = value; -} - -static void x86_iommu_instance_init(Object *o) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - - /* By default, do not support IR */ - s->intr_supported = false; - object_property_add_bool(o, "intremap", x86_iommu_intremap_prop_get, - x86_iommu_intremap_prop_set, NULL); - s->dt_supported = false; - object_property_add_bool(o, "device-iotlb", - x86_iommu_device_iotlb_prop_get, - x86_iommu_device_iotlb_prop_set, - NULL); + dc->props = x86_iommu_properties; } static const TypeInfo x86_iommu_info = { .name = TYPE_X86_IOMMU_DEVICE, .parent = TYPE_SYS_BUS_DEVICE, - .instance_init = x86_iommu_instance_init, .instance_size = sizeof(X86IOMMUState), .class_init = x86_iommu_class_init, .class_size = sizeof(X86IOMMUClass), diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index b1c05ffb86..919f09b694 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -1089,11 +1089,14 @@ static void cpu_handle_ioreq(void *opaque) * causes Xen to powerdown the domain. */ if (runstate_is_running()) { + ShutdownCause request; + if (qemu_shutdown_requested_get()) { destroy_hvm_domain(false); } - if (qemu_reset_requested_get()) { - qemu_system_reset(VMRESET_REPORT); + request = qemu_reset_requested_get(); + if (request) { + qemu_system_reset(request); destroy_hvm_domain(true); } } @@ -1395,7 +1398,7 @@ void xen_shutdown_fatal_error(const char *fmt, ...) va_end(ap); fprintf(stderr, "Will destroy the domain.\n"); /* destroy the domain */ - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR); } void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c index d414288839..c479f827b6 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c @@ -226,7 +226,7 @@ static void outport_write(KBDState *s, uint32_t val) s->outport = val; qemu_set_irq(s->a20_out, (val >> 1) & 1); if (!(val & 1)) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } @@ -301,7 +301,7 @@ static void kbd_write_command(void *opaque, hwaddr addr, s->outport &= ~KBD_OUT_A20; break; case KBD_CCMD_RESET: - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; case KBD_CCMD_NO_OP: /* ignore that */ diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 292fffecd3..ea3516794a 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -357,6 +357,10 @@ static void icp_realize(DeviceState *dev, Error **errp) qemu_register_reset(icp_reset, dev); } +static void icp_unrealize(DeviceState *dev, Error **errp) +{ + qemu_unregister_reset(icp_reset, dev); +} static void icp_class_init(ObjectClass *klass, void *data) { @@ -364,6 +368,7 @@ static void icp_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_icp_server; dc->realize = icp_realize; + dc->unrealize = icp_unrealize; } static const TypeInfo icp_info = { diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index dd93531ae3..14b8f6f6e4 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -42,6 +42,14 @@ static int kernel_xics_fd = -1; +typedef struct KVMEnabledICP { + unsigned long vcpu_id; + QLIST_ENTRY(KVMEnabledICP) node; +} KVMEnabledICP; + +static QLIST_HEAD(, KVMEnabledICP) + kvm_enabled_icps = QLIST_HEAD_INITIALIZER(&kvm_enabled_icps); + /* * ICP-KVM */ @@ -121,6 +129,8 @@ static void icp_kvm_reset(void *dev) static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu) { CPUState *cs = CPU(cpu); + KVMEnabledICP *enabled_icp; + unsigned long vcpu_id = kvm_arch_vcpu_id(cs); int ret; if (kernel_xics_fd == -1) { @@ -132,18 +142,21 @@ static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu) * which was hot-removed earlier we don't have to renable * KVM_CAP_IRQ_XICS capability again. */ - if (icp->cap_irq_xics_enabled) { - return; + QLIST_FOREACH(enabled_icp, &kvm_enabled_icps, node) { + if (enabled_icp->vcpu_id == vcpu_id) { + return; + } } - ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd, - kvm_arch_vcpu_id(cs)); + ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd, vcpu_id); if (ret < 0) { - error_report("Unable to connect CPU%ld to kernel XICS: %s", - kvm_arch_vcpu_id(cs), strerror(errno)); + error_report("Unable to connect CPU%ld to kernel XICS: %s", vcpu_id, + strerror(errno)); exit(1); } - icp->cap_irq_xics_enabled = true; + enabled_icp = g_malloc(sizeof(*enabled_icp)); + enabled_icp->vcpu_id = vcpu_id; + QLIST_INSERT_HEAD(&kvm_enabled_icps, enabled_icp, node); } static void icp_kvm_realize(DeviceState *dev, Error **errp) @@ -151,12 +164,18 @@ static void icp_kvm_realize(DeviceState *dev, Error **errp) qemu_register_reset(icp_kvm_reset, dev); } +static void icp_kvm_unrealize(DeviceState *dev, Error **errp) +{ + qemu_unregister_reset(icp_kvm_reset, dev); +} + static void icp_kvm_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); ICPStateClass *icpc = ICP_CLASS(klass); dc->realize = icp_kvm_realize; + dc->unrealize = icp_kvm_unrealize; icpc->pre_save = icp_get_kvm_state; icpc->post_load = icp_set_kvm_state; icpc->cpu_setup = icp_kvm_cpu_setup; diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index f05308b897..d98ea8b130 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -229,7 +229,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -int xics_spapr_init(sPAPRMachineState *spapr, Error **errp) +void xics_spapr_init(sPAPRMachineState *spapr) { /* Registration of global state belongs into realize */ spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive); @@ -243,7 +243,6 @@ int xics_spapr_init(sPAPRMachineState *spapr, Error **errp) spapr_register_hypercall(H_XIRR_X, h_xirr_x); spapr_register_hypercall(H_EOI, h_eoi); spapr_register_hypercall(H_IPOLL, h_ipoll); - return 0; } #define ICS_IRQ_FREE(ics, srcno) \ diff --git a/hw/ipmi/ipmi.c b/hw/ipmi/ipmi.c index 5cf1caa88a..afafe1400f 100644 --- a/hw/ipmi/ipmi.c +++ b/hw/ipmi/ipmi.c @@ -44,14 +44,14 @@ static int ipmi_do_hw_op(IPMIInterface *s, enum ipmi_op op, int checkonly) if (checkonly) { return 0; } - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return 0; case IPMI_POWEROFF_CHASSIS: if (checkonly) { return 0; } - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); return 0; case IPMI_SEND_NMI: diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index e2215dcf4d..ac8416d42b 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -606,7 +606,7 @@ static void ich9_rst_cnt_write(void *opaque, hwaddr addr, uint64_t val, ICH9LPCState *lpc = opaque; if (val & 4) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return; } lpc->rst_cnt = val & 0xA; /* keep FULL_RST (bit 3) and SYS_RST (bit 1) */ diff --git a/hw/mips/boston.c b/hw/mips/boston.c index 83f7b82386..53d1e0ce45 100644 --- a/hw/mips/boston.c +++ b/hw/mips/boston.c @@ -232,7 +232,7 @@ static void boston_platreg_write(void *opaque, hwaddr addr, break; case PLAT_SOFTRST_CTL: if (val & PLAT_SOFTRST_CTL_SYSRESET) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; default: diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c index 5dd177e961..7814c39654 100644 --- a/hw/mips/mips_malta.c +++ b/hw/mips/mips_malta.c @@ -470,7 +470,7 @@ static void malta_fpga_write(void *opaque, hwaddr addr, /* SOFTRES Register */ case 0x00500: if (val == 0x42) - qemu_system_reset_request (); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; /* BRKRES Register */ diff --git a/hw/mips/mips_r4k.c b/hw/mips/mips_r4k.c index 748586ed77..f4de9fc343 100644 --- a/hw/mips/mips_r4k.c +++ b/hw/mips/mips_r4k.c @@ -53,9 +53,9 @@ static void mips_qemu_write (void *opaque, hwaddr addr, uint64_t val, unsigned size) { if ((addr & 0xffff) == 0 && val == 42) - qemu_system_reset_request (); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); else if ((addr & 0xffff) == 4 && val == 42) - qemu_system_shutdown_request (); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } static uint64_t mips_qemu_read (void *opaque, hwaddr addr, diff --git a/hw/misc/arm_sysctl.c b/hw/misc/arm_sysctl.c index 8524008708..b20b44ea20 100644 --- a/hw/misc/arm_sysctl.c +++ b/hw/misc/arm_sysctl.c @@ -351,13 +351,13 @@ static bool vexpress_cfgctrl_write(arm_sysctl_state *s, unsigned int dcc, break; case SYS_CFG_SHUTDOWN: if (site == SYS_CFG_SITE_MB && device == 0) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); return true; } break; case SYS_CFG_REBOOT: if (site == SYS_CFG_SITE_MB && device == 0) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return true; } break; @@ -429,7 +429,7 @@ static void arm_sysctl_write(void *opaque, hwaddr offset, if (s->lockval == LOCK_VALUE) { s->resetlevel = val; if (val & 0x100) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } break; @@ -438,7 +438,7 @@ static void arm_sysctl_write(void *opaque, hwaddr offset, if (s->lockval == LOCK_VALUE) { s->resetlevel = val; if (val & 0x04) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } break; diff --git a/hw/misc/cbus.c b/hw/misc/cbus.c index 0c207e3104..677274ce3e 100644 --- a/hw/misc/cbus.c +++ b/hw/misc/cbus.c @@ -356,7 +356,7 @@ static inline void retu_write(CBusRetu *s, int reg, uint16_t val) case RETU_REG_WATCHDOG: if (val == 0 && (s->cc[0] & 2)) - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); break; case RETU_REG_TXCR: diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c index 05c02fb3a4..008d8bd4d5 100644 --- a/hw/misc/macio/cuda.c +++ b/hw/misc/macio/cuda.c @@ -612,7 +612,7 @@ static bool cuda_cmd_powerdown(CUDAState *s, return false; } - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); return true; } @@ -624,7 +624,7 @@ static bool cuda_cmd_reset_system(CUDAState *s, return false; } - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return true; } diff --git a/hw/misc/slavio_misc.c b/hw/misc/slavio_misc.c index edd5de0702..18ff677512 100644 --- a/hw/misc/slavio_misc.c +++ b/hw/misc/slavio_misc.c @@ -258,7 +258,7 @@ static void slavio_aux2_mem_writeb(void *opaque, hwaddr addr, val &= AUX2_PWROFF; s->aux2 = val; if (val & AUX2_PWROFF) - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); slavio_misc_update_irq(s); } @@ -338,7 +338,7 @@ static void slavio_sysctrl_mem_writel(void *opaque, hwaddr addr, case 0: if (val & SYS_RESET) { s->sysctrl = SYS_RESETSTAT; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; default: diff --git a/hw/misc/zynq_slcr.c b/hw/misc/zynq_slcr.c index 7891219001..44304d48be 100644 --- a/hw/misc/zynq_slcr.c +++ b/hw/misc/zynq_slcr.c @@ -405,7 +405,7 @@ static void zynq_slcr_write(void *opaque, hwaddr offset, switch (offset) { case PSS_RST_CTRL: if (val & R_PSS_RST_CTRL_SOFT_RST) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; } diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 98bd683f31..9a3d769aa2 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -589,7 +589,15 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, if (!get_vhost_net(nc->peer)) { return features; } - return vhost_net_get_features(get_vhost_net(nc->peer), features); + features = vhost_net_get_features(get_vhost_net(nc->peer), features); + vdev->backend_features = features; + + if (n->mtu_bypass_backend && + (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { + features |= (1ULL << VIRTIO_NET_F_MTU); + } + + return features; } static uint64_t virtio_net_bad_features(VirtIODevice *vdev) @@ -640,6 +648,11 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) VirtIONet *n = VIRTIO_NET(vdev); int i; + if (n->mtu_bypass_backend && + !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { + features &= ~(1ULL << VIRTIO_NET_F_MTU); + } + virtio_net_set_multiqueue(n, virtio_has_feature(features, VIRTIO_NET_F_MQ)); @@ -2093,6 +2106,8 @@ static Property virtio_net_properties[] = { DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), + DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, + true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index edc88f4c65..326f5ef024 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -209,7 +209,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) /* Called from RCU critical section */ static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { IOMMUState *is = container_of(iommu, IOMMUState, iommu); hwaddr baseaddr, offset; @@ -482,9 +482,9 @@ static void apb_config_writel (void *opaque, hwaddr addr, s->reset_control |= val & RESET_WMASK; if (val & SOFT_POR) { s->nr_resets = 0; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } else if (val & SOFT_XIR) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } break; diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c index 85a3bb0dd2..89133a9dd3 100644 --- a/hw/pci-host/bonito.c +++ b/hw/pci-host/bonito.c @@ -269,7 +269,7 @@ static void bonito_writel(void *opaque, hwaddr addr, } s->regs[saddr] = val; if (reset) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; case BONITO_INTENSET: diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index 2d02de12d9..4ce201ea65 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -632,7 +632,7 @@ static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len) PIIX3State *d = opaque; if (val & 4) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return; } d->rcr = val & 2; /* keep System Reset type only */ diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index f7df2388c1..62f1857206 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -774,7 +774,7 @@ static qemu_irq *ppce500_init_mpic(MachineState *machine, PPCE500Params *params, static void ppce500_power_off(void *opaque, int line, int on) { if (on) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } } diff --git a/hw/ppc/mpc8544_guts.c b/hw/ppc/mpc8544_guts.c index ba69178d69..ce1254b5d4 100644 --- a/hw/ppc/mpc8544_guts.c +++ b/hw/ppc/mpc8544_guts.c @@ -98,7 +98,7 @@ static void mpc8544_guts_write(void *opaque, hwaddr addr, switch (addr) { case MPC8544_GUTS_ADDR_RSTCR: if (value & MPC8544_GUTS_RSTCR_RESET) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; default: diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 5f93083d4a..224184d66d 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -412,7 +412,7 @@ static void ppce500_set_irq(void *opaque, int pin, int level) if (level) { LOG_IRQ("%s: reset the PowerPC system\n", __func__); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; case PPCE500_INPUT_RESET_CORE: diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c index d5df94aa6e..fc32e96bf4 100644 --- a/hw/ppc/ppc405_uc.c +++ b/hw/ppc/ppc405_uc.c @@ -1807,7 +1807,7 @@ void ppc40x_chip_reset(PowerPCCPU *cpu) void ppc40x_system_reset(PowerPCCPU *cpu) { printf("Reset PowerPC system\n"); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } void store_40x_dbcr0 (CPUPPCState *env, uint32_t val) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 0980d733cd..ab3aab1279 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -101,21 +101,26 @@ static ICSState *spapr_ics_create(sPAPRMachineState *spapr, const char *type_ics, int nr_irqs, Error **errp) { - Error *err = NULL, *local_err = NULL; + Error *local_err = NULL; Object *obj; obj = object_new(type_ics); - object_property_add_child(OBJECT(spapr), "ics", obj, NULL); + object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort); object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort); - object_property_set_int(obj, nr_irqs, "nr-irqs", &err); + object_property_set_int(obj, nr_irqs, "nr-irqs", &local_err); + if (local_err) { + goto error; + } object_property_set_bool(obj, true, "realized", &local_err); - error_propagate(&err, local_err); - if (err) { - error_propagate(errp, err); - return NULL; + if (local_err) { + goto error; } return ICS_SIMPLE(obj); + +error: + error_propagate(errp, local_err); + return NULL; } static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) @@ -123,25 +128,24 @@ static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) sPAPRMachineState *spapr = SPAPR_MACHINE(machine); if (kvm_enabled()) { - Error *err = NULL; - if (machine_kernel_irqchip_allowed(machine) && !xics_kvm_init(spapr, errp)) { spapr->icp_type = TYPE_KVM_ICP; - spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, &err); + spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, errp); } if (machine_kernel_irqchip_required(machine) && !spapr->ics) { - error_reportf_err(err, - "kernel_irqchip requested but unavailable: "); - } else { - error_free(err); + error_prepend(errp, "kernel_irqchip requested but unavailable: "); + return; } } if (!spapr->ics) { - xics_spapr_init(spapr, errp); + xics_spapr_init(spapr); spapr->icp_type = TYPE_ICP; spapr->ics = spapr_ics_create(spapr, TYPE_ICS_SIMPLE, nr_irqs, errp); + if (!spapr->ics) { + return; + } } } @@ -1222,16 +1226,21 @@ static int spapr_hpt_shift_for_ramsize(uint64_t ramsize) return shift; } +void spapr_free_hpt(sPAPRMachineState *spapr) +{ + g_free(spapr->htab); + spapr->htab = NULL; + spapr->htab_shift = 0; + close_htab_fd(spapr); +} + static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, Error **errp) { long rc; /* Clean up any HPT info from a previous boot */ - g_free(spapr->htab); - spapr->htab = NULL; - spapr->htab_shift = 0; - close_htab_fd(spapr); + spapr_free_hpt(spapr); rc = kvmppc_reset_htab(shift); if (rc < 0) { @@ -2050,6 +2059,7 @@ static void ppc_spapr_init(MachineState *machine) msi_nonbroken = true; QLIST_INIT(&spapr->phbs); + QTAILQ_INIT(&spapr->pending_dimm_unplugs); /* Allocate RMA if necessary */ rma_alloc_size = kvmppc_alloc_rma(&rma); @@ -2569,20 +2579,6 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, uint64_t align = memory_region_get_alignment(mr); uint64_t size = memory_region_size(mr); uint64_t addr; - char *mem_dev; - - if (size % SPAPR_MEMORY_BLOCK_SIZE) { - error_setg(&local_err, "Hotplugged memory size must be a multiple of " - "%lld MB", SPAPR_MEMORY_BLOCK_SIZE/M_BYTE); - goto out; - } - - mem_dev = object_property_get_str(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, NULL); - if (mem_dev && !kvmppc_is_mem_backend_page_size_ok(mem_dev)) { - error_setg(&local_err, "Memory backend has bad page size. " - "Use 'memory-backend-file' with correct mem-path."); - goto out; - } pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err); if (local_err) { @@ -2603,56 +2599,121 @@ out: error_propagate(errp, local_err); } -typedef struct sPAPRDIMMState { +static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t size = memory_region_size(mr); + char *mem_dev; + + if (size % SPAPR_MEMORY_BLOCK_SIZE) { + error_setg(errp, "Hotplugged memory size must be a multiple of " + "%lld MB", SPAPR_MEMORY_BLOCK_SIZE / M_BYTE); + return; + } + + mem_dev = object_property_get_str(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, NULL); + if (mem_dev && !kvmppc_is_mem_backend_page_size_ok(mem_dev)) { + error_setg(errp, "Memory backend has bad page size. " + "Use 'memory-backend-file' with correct mem-path."); + return; + } +} + +struct sPAPRDIMMState { + PCDIMMDevice *dimm; uint32_t nr_lmbs; -} sPAPRDIMMState; + QTAILQ_ENTRY(sPAPRDIMMState) next; +}; -static void spapr_lmb_release(DeviceState *dev, void *opaque) +static sPAPRDIMMState *spapr_pending_dimm_unplugs_find(sPAPRMachineState *s, + PCDIMMDevice *dimm) { - sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque; - HotplugHandler *hotplug_ctrl; + sPAPRDIMMState *dimm_state = NULL; - if (--ds->nr_lmbs) { - return; + QTAILQ_FOREACH(dimm_state, &s->pending_dimm_unplugs, next) { + if (dimm_state->dimm == dimm) { + break; + } } + return dimm_state; +} - g_free(ds); +static void spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr, + sPAPRDIMMState *dimm_state) +{ + g_assert(!spapr_pending_dimm_unplugs_find(spapr, dimm_state->dimm)); + QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, dimm_state, next); +} - /* - * Now that all the LMBs have been removed by the guest, call the - * pc-dimm unplug handler to cleanup up the pc-dimm device. - */ - hotplug_ctrl = qdev_get_hotplug_handler(dev); - hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); +static void spapr_pending_dimm_unplugs_remove(sPAPRMachineState *spapr, + sPAPRDIMMState *dimm_state) +{ + QTAILQ_REMOVE(&spapr->pending_dimm_unplugs, dimm_state, next); + g_free(dimm_state); } -static void spapr_del_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, - Error **errp) +static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms, + PCDIMMDevice *dimm) { sPAPRDRConnector *drc; - sPAPRDRConnectorClass *drck; + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t size = memory_region_size(mr); uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; + uint32_t avail_lmbs = 0; + uint64_t addr_start, addr; int i; - sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState)); - uint64_t addr = addr_start; + sPAPRDIMMState *ds; - ds->nr_lmbs = nr_lmbs; + addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, + &error_abort); + + addr = addr_start; for (i = 0; i < nr_lmbs; i++) { drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, - addr / SPAPR_MEMORY_BLOCK_SIZE); + addr / SPAPR_MEMORY_BLOCK_SIZE); g_assert(drc); - - drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->detach(drc, dev, spapr_lmb_release, ds, errp); + if (drc->indicator_state != SPAPR_DR_INDICATOR_STATE_INACTIVE) { + avail_lmbs++; + } addr += SPAPR_MEMORY_BLOCK_SIZE; } - drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, - addr_start / SPAPR_MEMORY_BLOCK_SIZE); - drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB, - nr_lmbs, - drck->get_index(drc)); + ds = g_malloc0(sizeof(sPAPRDIMMState)); + ds->nr_lmbs = avail_lmbs; + ds->dimm = dimm; + spapr_pending_dimm_unplugs_add(ms, ds); + return ds; +} + +/* Callback to be called during DRC release. */ +void spapr_lmb_release(DeviceState *dev) +{ + HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); + sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl); + sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev)); + + /* This information will get lost if a migration occurs + * during the unplug process. In this case recover it. */ + if (ds == NULL) { + ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev)); + if (ds->nr_lmbs) { + return; + } + } else if (--ds->nr_lmbs) { + return; + } + + spapr_pending_dimm_unplugs_remove(spapr, ds); + + /* + * Now that all the LMBs have been removed by the guest, call the + * pc-dimm unplug handler to cleanup up the pc-dimm device. + */ + hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); } static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -2670,19 +2731,47 @@ static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { + sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_dev); Error *local_err = NULL; PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MemoryRegion *mr = ddc->get_memory_region(dimm); uint64_t size = memory_region_size(mr); - uint64_t addr; + uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; + uint64_t addr_start, addr; + int i; + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + sPAPRDIMMState *ds; - addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); + addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, + &local_err); if (local_err) { goto out; } - spapr_del_lmbs(dev, addr, size, &error_abort); + ds = g_malloc0(sizeof(sPAPRDIMMState)); + ds->nr_lmbs = nr_lmbs; + ds->dimm = dimm; + spapr_pending_dimm_unplugs_add(spapr, ds); + + addr = addr_start; + for (i = 0; i < nr_lmbs; i++) { + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr / SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + drck->detach(drc, dev, errp); + addr += SPAPR_MEMORY_BLOCK_SIZE; + } + + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr_start / SPAPR_MEMORY_BLOCK_SIZE); + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB, + nr_lmbs, + drck->get_index(drc)); out: error_propagate(errp, local_err); } @@ -2715,11 +2804,13 @@ static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, CPUCore *cc = CPU_CORE(dev); CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL); + assert(core_slot); core_slot->cpu = NULL; object_unparent(OBJECT(dev)); } -static void spapr_core_release(DeviceState *dev, void *opaque) +/* Callback to be called during DRC release. */ +void spapr_core_release(DeviceState *dev) { HotplugHandler *hotplug_ctrl; @@ -2752,7 +2843,7 @@ void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, g_assert(drc); drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->detach(drc, dev, spapr_core_release, NULL, &local_err); + drck->detach(drc, dev, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -2853,7 +2944,13 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } - if (cc->nr_threads != smp_threads) { + /* + * In general we should have homogeneous threads-per-core, but old + * (pre hotplug support) machine types allow the last core to have + * reduced threads as a compatibility hack for when we allowed + * total vcpus not a multiple of threads-per-core. + */ + if (mc->has_hotpluggable_cpus && (cc->nr_threads != smp_threads)) { error_setg(errp, "invalid nr-threads %d, must be %d", cc->nr_threads, smp_threads); return; @@ -2990,7 +3087,9 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, static void spapr_machine_device_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + spapr_memory_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { spapr_core_pre_plug(hotplug_dev, dev, errp); } } diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index a17ea07ef1..ff7058ecc0 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -143,29 +143,30 @@ static void spapr_cpu_core_realize_child(Object *child, Error **errp) Object *obj; obj = object_new(spapr->icp_type); - object_property_add_child(OBJECT(cpu), "icp", obj, NULL); + object_property_add_child(OBJECT(cpu), "icp", obj, &error_abort); + object_unref(obj); object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort); object_property_set_bool(obj, true, "realized", &local_err); if (local_err) { - error_propagate(errp, local_err); - return; + goto error; } object_property_set_bool(child, true, "realized", &local_err); if (local_err) { - object_unparent(obj); - error_propagate(errp, local_err); - return; + goto error; } spapr_cpu_init(spapr, cpu, &local_err); if (local_err) { - object_unparent(obj); - error_propagate(errp, local_err); - return; + goto error; } xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj)); + return; + +error: + object_unparent(obj); + error_propagate(errp, local_err); } static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 9fa5545991..cc2400bcd5 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -20,6 +20,7 @@ #include "qapi/visitor.h" #include "qemu/error-report.h" #include "hw/ppc/spapr.h" /* for RTAS return codes */ +#include "hw/pci-host/spapr.h" /* spapr_phb_remove_pci_device_cb callback */ #include "trace.h" #define DRC_CONTAINER_PATH "/dr-connector" @@ -99,8 +100,7 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc, if (drc->awaiting_release) { if (drc->configured) { trace_spapr_drc_set_isolation_state_finalizing(get_index(drc)); - drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, - drc->detach_cb_opaque, NULL); + drck->detach(drc, DEVICE(drc->dev), NULL); } else { trace_spapr_drc_set_isolation_state_deferring(get_index(drc)); } @@ -153,8 +153,7 @@ static uint32_t set_allocation_state(sPAPRDRConnector *drc, if (drc->awaiting_release && drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { trace_spapr_drc_set_allocation_state_finalizing(get_index(drc)); - drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, - drc->detach_cb_opaque, NULL); + drck->detach(drc, DEVICE(drc->dev), NULL); } else if (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) { drc->awaiting_allocation = false; } @@ -404,15 +403,10 @@ static void attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, NULL, 0, NULL); } -static void detach(sPAPRDRConnector *drc, DeviceState *d, - spapr_drc_detach_cb *detach_cb, - void *detach_cb_opaque, Error **errp) +static void detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp) { trace_spapr_drc_detach(get_index(drc)); - drc->detach_cb = detach_cb; - drc->detach_cb_opaque = detach_cb_opaque; - /* if we've signalled device presence to the guest, or if the guest * has gone ahead and configured the device (via manually-executed * device add via drmgr in guest, namely), we need to wait @@ -456,8 +450,21 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d, drc->indicator_state = SPAPR_DR_INDICATOR_STATE_INACTIVE; - if (drc->detach_cb) { - drc->detach_cb(drc->dev, drc->detach_cb_opaque); + /* Calling release callbacks based on drc->type. */ + switch (drc->type) { + case SPAPR_DR_CONNECTOR_TYPE_CPU: + spapr_core_release(drc->dev); + break; + case SPAPR_DR_CONNECTOR_TYPE_PCI: + spapr_phb_remove_pci_device_cb(drc->dev); + break; + case SPAPR_DR_CONNECTOR_TYPE_LMB: + spapr_lmb_release(drc->dev); + break; + case SPAPR_DR_CONNECTOR_TYPE_PHB: + case SPAPR_DR_CONNECTOR_TYPE_VIO: + default: + g_assert(false); } drc->awaiting_release = false; @@ -467,8 +474,6 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d, drc->fdt_start_offset = 0; object_property_del(OBJECT(drc), "device", NULL); drc->dev = NULL; - drc->detach_cb = NULL; - drc->detach_cb_opaque = NULL; } static bool release_pending(sPAPRDRConnector *drc) @@ -498,8 +503,7 @@ static void reset(DeviceState *d) * force removal if we are */ if (drc->awaiting_release) { - drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, - drc->detach_cb_opaque, NULL); + drck->detach(drc, DEVICE(drc->dev), NULL); } /* non-PCI devices may be awaiting a transition to UNUSABLE */ @@ -515,6 +519,60 @@ static void reset(DeviceState *d) } } +static bool spapr_drc_needed(void *opaque) +{ + sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque; + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + bool rc = false; + sPAPRDREntitySense value; + drck->entity_sense(drc, &value); + + /* If no dev is plugged in there is no need to migrate the DRC state */ + if (value != SPAPR_DR_ENTITY_SENSE_PRESENT) { + return false; + } + + /* + * If there is dev plugged in, we need to migrate the DRC state when + * it is different from cold-plugged state + */ + switch (drc->type) { + case SPAPR_DR_CONNECTOR_TYPE_PCI: + rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) && + (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) && + drc->configured && drc->signalled && !drc->awaiting_release); + break; + case SPAPR_DR_CONNECTOR_TYPE_CPU: + case SPAPR_DR_CONNECTOR_TYPE_LMB: + rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) && + (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) && + drc->configured && drc->signalled && !drc->awaiting_release); + break; + case SPAPR_DR_CONNECTOR_TYPE_PHB: + case SPAPR_DR_CONNECTOR_TYPE_VIO: + default: + g_assert(false); + } + return rc; +} + +static const VMStateDescription vmstate_spapr_drc = { + .name = "spapr_drc", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_drc_needed, + .fields = (VMStateField []) { + VMSTATE_UINT32(isolation_state, sPAPRDRConnector), + VMSTATE_UINT32(allocation_state, sPAPRDRConnector), + VMSTATE_UINT32(indicator_state, sPAPRDRConnector), + VMSTATE_BOOL(configured, sPAPRDRConnector), + VMSTATE_BOOL(awaiting_release, sPAPRDRConnector), + VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector), + VMSTATE_BOOL(signalled, sPAPRDRConnector), + VMSTATE_END_OF_LIST() + } +}; + static void realize(DeviceState *d, Error **errp) { sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d); @@ -543,6 +601,8 @@ static void realize(DeviceState *d, Error **errp) object_unref(OBJECT(drc)); } g_free(child_name); + vmstate_register(DEVICE(drc), drck->get_index(drc), &vmstate_spapr_drc, + drc); trace_spapr_drc_realize_complete(drck->get_index(drc)); } diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index f0b28d8112..73e2a1884f 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -342,20 +342,18 @@ static int rtas_event_log_to_irq(sPAPRMachineState *spapr, int log_type) return source->irq; } -static void rtas_event_log_queue(int log_type, void *data, bool exception) +static void rtas_event_log_queue(int log_type, void *data) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1); g_assert(data); entry->log_type = log_type; - entry->exception = exception; entry->data = data; QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next); } -static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, - bool exception) +static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; @@ -364,10 +362,6 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, const sPAPREventSource *source = rtas_event_log_to_source(spapr, entry->log_type); - if (entry->exception != exception) { - continue; - } - if (source->mask & event_mask) { break; } @@ -380,7 +374,7 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, return entry; } -static bool rtas_event_log_contains(uint32_t event_mask, bool exception) +static bool rtas_event_log_contains(uint32_t event_mask) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; @@ -389,10 +383,6 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception) const sPAPREventSource *source = rtas_event_log_to_source(spapr, entry->log_type); - if (entry->exception != exception) { - continue; - } - if (source->mask & event_mask) { return true; } @@ -479,7 +469,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL; epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC; - rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true); + rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow); qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), rtas_event_log_to_irq(spapr, @@ -572,7 +562,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, cpu_to_be32(drc_id->count_indexed.index); } - rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); + rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp); qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), rtas_event_log_to_irq(spapr, @@ -667,7 +657,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, xinfo |= (uint64_t)rtas_ld(args, 6) << 32; } - event = rtas_event_log_dequeue(mask, true); + event = rtas_event_log_dequeue(mask); if (!event) { goto out_no_events; } @@ -690,7 +680,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, * interrupts. */ for (i = 0; i < EVENT_CLASS_MAX; i++) { - if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) { + if (rtas_event_log_contains(EVENT_CLASS_MASK(i))) { const sPAPREventSource *source = spapr_event_sources_get_source(spapr->event_sources, i); @@ -710,38 +700,10 @@ static void event_scan(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong args, uint32_t nret, target_ulong rets) { - uint32_t mask, buf, len, event_len; - sPAPREventLogEntry *event; - struct rtas_error_log *hdr; - if (nargs != 4 || nret != 1) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } - - mask = rtas_ld(args, 0); - buf = rtas_ld(args, 2); - len = rtas_ld(args, 3); - - event = rtas_event_log_dequeue(mask, false); - if (!event) { - goto out_no_events; - } - - hdr = event->data; - event_len = be32_to_cpu(hdr->extended_length) + sizeof(*hdr); - - if (event_len < len) { - len = event_len; - } - - cpu_physical_memory_write(buf, event->data, len); - rtas_st(rets, 0, RTAS_OUT_SUCCESS); - g_free(event->data); - g_free(event); - return; - -out_no_events: rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); } diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 0d608d6e28..aae5a62a61 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -913,10 +913,7 @@ static void spapr_check_setup_free_hpt(sPAPRMachineState *spapr, /* We assume RADIX, so this catches all the "Do Nothing" cases */ } else if (!(patbe_old & PATBE1_GR)) { /* HASH->RADIX : Free HPT */ - g_free(spapr->htab); - spapr->htab = NULL; - spapr->htab_shift = 0; - close_htab_fd(spapr); + spapr_free_hpt(spapr); } else if (!(patbe_new & PATBE1_GR)) { /* RADIX->HASH || NOTHING->HASH : Allocate HPT */ spapr_setup_hpt_and_vrma(spapr); @@ -1047,19 +1044,13 @@ static target_ulong h_signal_sys_reset(PowerPCCPU *cpu, } } -static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - sPAPRMachineState *spapr, - target_ulong opcode, - target_ulong *args) +static uint32_t cas_check_pvr(PowerPCCPU *cpu, target_ulong *addr, + Error **errp) { - target_ulong list = ppc64_phys_to_real(args[0]); - target_ulong ov_table; bool explicit_match = false; /* Matched the CPU's real PVR */ uint32_t max_compat = cpu->max_compat; uint32_t best_compat = 0; int i; - sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates; - bool guest_radix; /* * We scan the supplied table of PVRs looking for two things @@ -1069,9 +1060,9 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, for (i = 0; i < 512; ++i) { uint32_t pvr, pvr_mask; - pvr_mask = ldl_be_phys(&address_space_memory, list); - pvr = ldl_be_phys(&address_space_memory, list + 4); - list += 8; + pvr_mask = ldl_be_phys(&address_space_memory, *addr); + pvr = ldl_be_phys(&address_space_memory, *addr + 4); + *addr += 8; if (~pvr_mask & pvr) { break; /* Terminator record */ @@ -1090,17 +1081,38 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, /* We couldn't find a suitable compatibility mode, and either * the guest doesn't support "raw" mode for this CPU, or raw * mode is disabled because a maximum compat mode is set */ - return H_HARDWARE; + error_setg(errp, "Couldn't negotiate a suitable PVR during CAS"); + return 0; } /* Parsing finished */ trace_spapr_cas_pvr(cpu->compat_pvr, explicit_match, best_compat); - /* Update CPUs */ - if (cpu->compat_pvr != best_compat) { - Error *local_err = NULL; + return best_compat; +} - ppc_set_compat_all(best_compat, &local_err); +static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + /* Working address in data buffer */ + target_ulong addr = ppc64_phys_to_real(args[0]); + target_ulong ov_table; + uint32_t cas_pvr; + sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates; + bool guest_radix; + Error *local_err = NULL; + + cas_pvr = cas_check_pvr(cpu, &addr, &local_err); + if (local_err) { + error_report_err(local_err); + return H_HARDWARE; + } + + /* Update CPUs */ + if (cpu->compat_pvr != cas_pvr) { + ppc_set_compat_all(cas_pvr, &local_err); if (local_err) { error_report_err(local_err); return H_HARDWARE; @@ -1108,7 +1120,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, } /* For the future use: here @ov_table points to the first option vector */ - ov_table = list; + ov_table = addr; ov1_guest = spapr_ovec_parse_vector(ov_table, 1); ov5_guest = spapr_ovec_parse_vector(ov_table, 5); @@ -1162,7 +1174,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, spapr_ovec_cleanup(ov5_updates); if (spapr->cas_reboot) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } else { /* If ppc_spapr_reset() did not set up a HPT but one is necessary * (because the guest isn't going to use radix) then set it up here. */ diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 29c80bb3c8..0341bc069d 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -111,7 +111,7 @@ static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table) /* Called from RCU critical section */ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu); uint64_t tce; diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index a7cff32bbf..e4daf8d5f1 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1369,7 +1369,8 @@ out: } } -static void spapr_phb_remove_pci_device_cb(DeviceState *dev, void *opaque) +/* Callback to be called during DRC release. */ +void spapr_phb_remove_pci_device_cb(DeviceState *dev) { /* some version guests do not wait for completion of a device * cleanup (generally done asynchronously by the kernel) before @@ -1392,7 +1393,7 @@ static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc, { sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp); + drck->detach(drc, DEVICE(pdev), errp); } static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb, diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 619f32c054..128d993d04 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -110,7 +110,7 @@ static void rtas_power_off(PowerPCCPU *cpu, sPAPRMachineState *spapr, rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); cpu_stop_current(); rtas_st(rets, 0, RTAS_OUT_SUCCESS); } @@ -124,7 +124,7 @@ static void rtas_system_reboot(PowerPCCPU *cpu, sPAPRMachineState *spapr, rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); rtas_st(rets, 0, RTAS_OUT_SUCCESS); } diff --git a/hw/s390x/3270-ccw.c b/hw/s390x/3270-ccw.c index a7a5b412e4..6e6eee4e90 100644 --- a/hw/s390x/3270-ccw.c +++ b/hw/s390x/3270-ccw.c @@ -98,9 +98,13 @@ static void emulated_ccw_3270_realize(DeviceState *ds, Error **errp) EmulatedCcw3270Class *ck = EMULATED_CCW_3270_GET_CLASS(dev); CcwDevice *cdev = CCW_DEVICE(ds); CCWDeviceClass *cdk = CCW_DEVICE_GET_CLASS(cdev); - SubchDev *sch = css_create_virtual_sch(cdev->devno, errp); + DeviceState *parent = DEVICE(cdev); + BusState *qbus = qdev_get_parent_bus(parent); + VirtualCssBus *cbus = VIRTUAL_CSS_BUS(qbus); + SubchDev *sch; Error *err = NULL; + sch = css_create_sch(cdev->devno, true, cbus->squash_mcss, errp); if (!sch) { return; } diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs index 36bd4b1645..a8e5575a8a 100644 --- a/hw/s390x/Makefile.objs +++ b/hw/s390x/Makefile.objs @@ -14,3 +14,4 @@ obj-y += ccw-device.o obj-y += s390-pci-bus.o s390-pci-inst.o obj-y += s390-skeys.o obj-$(CONFIG_KVM) += s390-skeys-kvm.o +obj-y += s390-ccw.o diff --git a/hw/s390x/css-bridge.c b/hw/s390x/css-bridge.c index b54ac01d37..823747fcd7 100644 --- a/hw/s390x/css-bridge.c +++ b/hw/s390x/css-bridge.c @@ -17,6 +17,7 @@ #include "hw/s390x/css.h" #include "ccw-device.h" #include "hw/s390x/css-bridge.h" +#include "cpu.h" /* * Invoke device-specific unplug handler, disable the subchannel @@ -103,6 +104,7 @@ VirtualCssBus *virtual_css_bus_init(void) /* Create bus on bridge device */ bus = qbus_create(TYPE_VIRTUAL_CSS_BUS, dev, "virtual-css"); cbus = VIRTUAL_CSS_BUS(bus); + cbus->squash_mcss = s390_get_squash_mcss(); /* Enable hotplugging */ qbus_set_hotplug_handler(bus, dev, &error_abort); diff --git a/hw/s390x/css.c b/hw/s390x/css.c index 15c4f4b249..1e2f26b65a 100644 --- a/hw/s390x/css.c +++ b/hw/s390x/css.c @@ -13,6 +13,7 @@ #include "qapi/error.h" #include "qapi/visitor.h" #include "hw/qdev.h" +#include "qemu/error-report.h" #include "qemu/bitops.h" #include "exec/address-spaces.h" #include "cpu.h" @@ -258,7 +259,7 @@ uint16_t css_build_subchannel_id(SubchDev *sch) return css_do_build_subchannel_id(sch->cssid, sch->ssid); } -static void css_inject_io_interrupt(SubchDev *sch) +void css_inject_io_interrupt(SubchDev *sch) { uint8_t isc = (sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ISC) >> 11; @@ -523,7 +524,7 @@ static int css_interpret_ccw(SubchDev *sch, hwaddr ccw_addr, return ret; } -static void sch_handle_start_func(SubchDev *sch, ORB *orb) +static void sch_handle_start_func_virtual(SubchDev *sch, ORB *orb) { PMCW *p = &sch->curr_status.pmcw; @@ -625,13 +626,58 @@ static void sch_handle_start_func(SubchDev *sch, ORB *orb) } +static int sch_handle_start_func_passthrough(SubchDev *sch, ORB *orb) +{ + + PMCW *p = &sch->curr_status.pmcw; + SCSW *s = &sch->curr_status.scsw; + int ret; + + if (!(s->ctrl & SCSW_ACTL_SUSP)) { + assert(orb != NULL); + p->intparm = orb->intparm; + } + + /* + * Only support prefetch enable mode. + * Only support 64bit addressing idal. + */ + if (!(orb->ctrl0 & ORB_CTRL0_MASK_PFCH) || + !(orb->ctrl0 & ORB_CTRL0_MASK_C64)) { + return -EINVAL; + } + + ret = s390_ccw_cmd_request(orb, s, sch->driver_data); + switch (ret) { + /* Currently we don't update control block and just return the cc code. */ + case 0: + break; + case -EBUSY: + break; + case -ENODEV: + break; + case -EACCES: + /* Let's reflect an inaccessible host device by cc 3. */ + ret = -ENODEV; + break; + default: + /* + * All other return codes will trigger a program check, + * or set cc to 1. + */ + break; + }; + + return ret; +} + /* * On real machines, this would run asynchronously to the main vcpus. * We might want to make some parts of the ssch handling (interpreting * read/writes) asynchronous later on if we start supporting more than * our current very simple devices. */ -static void do_subchannel_work(SubchDev *sch, ORB *orb) +int do_subchannel_work_virtual(SubchDev *sch, ORB *orb) { SCSW *s = &sch->curr_status.scsw; @@ -642,12 +688,45 @@ static void do_subchannel_work(SubchDev *sch, ORB *orb) sch_handle_halt_func(sch); } else if (s->ctrl & SCSW_FCTL_START_FUNC) { /* Triggered by both ssch and rsch. */ - sch_handle_start_func(sch, orb); + sch_handle_start_func_virtual(sch, orb); } else { /* Cannot happen. */ - return; + return 0; } css_inject_io_interrupt(sch); + return 0; +} + +int do_subchannel_work_passthrough(SubchDev *sch, ORB *orb) +{ + int ret; + SCSW *s = &sch->curr_status.scsw; + + if (s->ctrl & SCSW_FCTL_CLEAR_FUNC) { + /* TODO: Clear handling */ + sch_handle_clear_func(sch); + ret = 0; + } else if (s->ctrl & SCSW_FCTL_HALT_FUNC) { + /* TODO: Halt handling */ + sch_handle_halt_func(sch); + ret = 0; + } else if (s->ctrl & SCSW_FCTL_START_FUNC) { + ret = sch_handle_start_func_passthrough(sch, orb); + } else { + /* Cannot happen. */ + return -ENODEV; + } + + return ret; +} + +static int do_subchannel_work(SubchDev *sch, ORB *orb) +{ + if (sch->do_subchannel_work) { + return sch->do_subchannel_work(sch, orb); + } else { + return -EINVAL; + } } static void copy_pmcw_to_guest(PMCW *dest, const PMCW *src) @@ -670,7 +749,7 @@ static void copy_pmcw_to_guest(PMCW *dest, const PMCW *src) dest->chars = cpu_to_be32(src->chars); } -static void copy_scsw_to_guest(SCSW *dest, const SCSW *src) +void copy_scsw_to_guest(SCSW *dest, const SCSW *src) { dest->flags = cpu_to_be16(src->flags); dest->ctrl = cpu_to_be16(src->ctrl); @@ -966,8 +1045,7 @@ int css_do_ssch(SubchDev *sch, ORB *orb) s->ctrl |= (SCSW_FCTL_START_FUNC | SCSW_ACTL_START_PEND); s->flags &= ~SCSW_FLAGS_MASK_PNO; - do_subchannel_work(sch, orb); - ret = 0; + ret = do_subchannel_work(sch, orb); out: return ret; @@ -1326,7 +1404,8 @@ unsigned int css_find_free_chpid(uint8_t cssid) return MAX_CHPID + 1; } -static int css_add_virtual_chpid(uint8_t cssid, uint8_t chpid, uint8_t type) +static int css_add_chpid(uint8_t cssid, uint8_t chpid, uint8_t type, + bool is_virt) { CssImage *css; @@ -1340,7 +1419,7 @@ static int css_add_virtual_chpid(uint8_t cssid, uint8_t chpid, uint8_t type) } css->chpids[chpid].in_use = 1; css->chpids[chpid].type = type; - css->chpids[chpid].is_virtual = 1; + css->chpids[chpid].is_virtual = is_virt; css_generate_chp_crws(cssid, chpid); @@ -1364,7 +1443,7 @@ void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type) p->pam = 0x80; p->chpid[0] = chpid; if (!css->chpids[chpid].in_use) { - css_add_virtual_chpid(sch->cssid, chpid, type); + css_add_chpid(sch->cssid, chpid, type, true); } memset(s, 0, sizeof(SCSW)); @@ -1946,28 +2025,59 @@ PropertyInfo css_devid_ro_propinfo = { .get = get_css_devid, }; -SubchDev *css_create_virtual_sch(CssDevId bus_id, Error **errp) +SubchDev *css_create_sch(CssDevId bus_id, bool is_virtual, bool squash_mcss, + Error **errp) { uint16_t schid = 0; SubchDev *sch; if (bus_id.valid) { - /* Enforce use of virtual cssid. */ - if (bus_id.cssid != VIRTUAL_CSSID) { - error_setg(errp, "cssid %hhx not valid for virtual devices", - bus_id.cssid); + if (is_virtual != (bus_id.cssid == VIRTUAL_CSSID)) { + error_setg(errp, "cssid %hhx not valid for %s devices", + bus_id.cssid, + (is_virtual ? "virtual" : "non-virtual")); return NULL; } + } + + if (bus_id.valid) { + if (squash_mcss) { + bus_id.cssid = channel_subsys.default_cssid; + } else if (!channel_subsys.css[bus_id.cssid]) { + css_create_css_image(bus_id.cssid, false); + } + if (!css_find_free_subch_for_devno(bus_id.cssid, bus_id.ssid, bus_id.devid, &schid, errp)) { return NULL; } - } else { - bus_id.cssid = VIRTUAL_CSSID; + } else if (squash_mcss || is_virtual) { + bus_id.cssid = channel_subsys.default_cssid; + if (!css_find_free_subch_and_devno(bus_id.cssid, &bus_id.ssid, &bus_id.devid, &schid, errp)) { return NULL; } + } else { + for (bus_id.cssid = 0; bus_id.cssid < MAX_CSSID; ++bus_id.cssid) { + if (bus_id.cssid == VIRTUAL_CSSID) { + continue; + } + + if (!channel_subsys.css[bus_id.cssid]) { + css_create_css_image(bus_id.cssid, false); + } + + if (css_find_free_subch_and_devno(bus_id.cssid, &bus_id.ssid, + &bus_id.devid, &schid, + NULL)) { + break; + } + if (bus_id.cssid == MAX_CSSID) { + error_setg(errp, "Virtual channel subsystem is full!"); + return NULL; + } + } } sch = g_malloc0(sizeof(*sch)); @@ -1978,3 +2088,147 @@ SubchDev *css_create_virtual_sch(CssDevId bus_id, Error **errp) css_subch_assign(sch->cssid, sch->ssid, schid, sch->devno, sch); return sch; } + +static int css_sch_get_chpids(SubchDev *sch, CssDevId *dev_id) +{ + char *fid_path; + FILE *fd; + uint32_t chpid[8]; + int i; + PMCW *p = &sch->curr_status.pmcw; + + fid_path = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/chpids", + dev_id->cssid, dev_id->ssid, dev_id->devid); + fd = fopen(fid_path, "r"); + if (fd == NULL) { + error_report("%s: open %s failed", __func__, fid_path); + g_free(fid_path); + return -EINVAL; + } + + if (fscanf(fd, "%x %x %x %x %x %x %x %x", + &chpid[0], &chpid[1], &chpid[2], &chpid[3], + &chpid[4], &chpid[5], &chpid[6], &chpid[7]) != 8) { + fclose(fd); + g_free(fid_path); + return -EINVAL; + } + + for (i = 0; i < ARRAY_SIZE(p->chpid); i++) { + p->chpid[i] = chpid[i]; + } + + fclose(fd); + g_free(fid_path); + + return 0; +} + +static int css_sch_get_path_masks(SubchDev *sch, CssDevId *dev_id) +{ + char *fid_path; + FILE *fd; + uint32_t pim, pam, pom; + PMCW *p = &sch->curr_status.pmcw; + + fid_path = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/pimpampom", + dev_id->cssid, dev_id->ssid, dev_id->devid); + fd = fopen(fid_path, "r"); + if (fd == NULL) { + error_report("%s: open %s failed", __func__, fid_path); + g_free(fid_path); + return -EINVAL; + } + + if (fscanf(fd, "%x %x %x", &pim, &pam, &pom) != 3) { + fclose(fd); + g_free(fid_path); + return -EINVAL; + } + + p->pim = pim; + p->pam = pam; + p->pom = pom; + fclose(fd); + g_free(fid_path); + + return 0; +} + +static int css_sch_get_chpid_type(uint8_t chpid, uint32_t *type, + CssDevId *dev_id) +{ + char *fid_path; + FILE *fd; + + fid_path = g_strdup_printf("/sys/devices/css%x/chp0.%02x/type", + dev_id->cssid, chpid); + fd = fopen(fid_path, "r"); + if (fd == NULL) { + error_report("%s: open %s failed", __func__, fid_path); + g_free(fid_path); + return -EINVAL; + } + + if (fscanf(fd, "%x", type) != 1) { + fclose(fd); + g_free(fid_path); + return -EINVAL; + } + + fclose(fd); + g_free(fid_path); + + return 0; +} + +/* + * We currently retrieve the real device information from sysfs to build the + * guest subchannel information block without considering the migration feature. + * We need to revisit this problem when we want to add migration support. + */ +int css_sch_build_schib(SubchDev *sch, CssDevId *dev_id) +{ + CssImage *css = channel_subsys.css[sch->cssid]; + PMCW *p = &sch->curr_status.pmcw; + SCSW *s = &sch->curr_status.scsw; + uint32_t type; + int i, ret; + + assert(css != NULL); + memset(p, 0, sizeof(PMCW)); + p->flags |= PMCW_FLAGS_MASK_DNV; + /* We are dealing with I/O subchannels only. */ + p->devno = sch->devno; + + /* Grab path mask from sysfs. */ + ret = css_sch_get_path_masks(sch, dev_id); + if (ret) { + return ret; + } + + /* Grab chpids from sysfs. */ + ret = css_sch_get_chpids(sch, dev_id); + if (ret) { + return ret; + } + + /* Build chpid type. */ + for (i = 0; i < ARRAY_SIZE(p->chpid); i++) { + if (p->chpid[i] && !css->chpids[p->chpid[i]].in_use) { + ret = css_sch_get_chpid_type(p->chpid[i], &type, dev_id); + if (ret) { + return ret; + } + css_add_chpid(sch->cssid, p->chpid[i], type, false); + } + } + + memset(s, 0, sizeof(SCSW)); + sch->curr_status.mba = 0; + for (i = 0; i < ARRAY_SIZE(sch->curr_status.mda); i++) { + sch->curr_status.mda[i] = 0; + } + + return 0; +} diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 75d3c681a4..4e6469db0f 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -396,7 +396,7 @@ void s390_reipl_request(void) S390IPLState *ipl = get_ipl_device(); ipl->reipl_requested = true; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } void s390_ipl_prepare_cpu(S390CPU *cpu) diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c new file mode 100644 index 0000000000..8614dda6f8 --- /dev/null +++ b/hw/s390x/s390-ccw.c @@ -0,0 +1,153 @@ +/* + * s390 CCW Assignment Support + * + * Copyright 2017 IBM Corp + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + * Pierre Morel <pmorel@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 + * or (at your option) any later version. See the COPYING file in the + * top-level directory. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/sysbus.h" +#include "libgen.h" +#include "hw/s390x/css.h" +#include "hw/s390x/css-bridge.h" +#include "hw/s390x/s390-ccw.h" + +int s390_ccw_cmd_request(ORB *orb, SCSW *scsw, void *data) +{ + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(data); + + if (cdc->handle_request) { + return cdc->handle_request(orb, scsw, data); + } else { + return -ENOSYS; + } +} + +static void s390_ccw_get_dev_info(S390CCWDevice *cdev, + char *sysfsdev, + Error **errp) +{ + unsigned int cssid, ssid, devid; + char dev_path[PATH_MAX] = {0}, *tmp; + + if (!sysfsdev) { + error_setg(errp, "No host device provided"); + error_append_hint(errp, + "Use -device vfio-ccw,sysfsdev=PATH_TO_DEVICE\n"); + return; + } + + if (!realpath(sysfsdev, dev_path)) { + error_setg_errno(errp, errno, "Host device '%s' not found", sysfsdev); + return; + } + + cdev->mdevid = g_strdup(basename(dev_path)); + + tmp = basename(dirname(dev_path)); + if (sscanf(tmp, "%2x.%1x.%4x", &cssid, &ssid, &devid) != 3) { + error_setg_errno(errp, errno, "Failed to read %s", tmp); + return; + } + + cdev->hostid.cssid = cssid; + cdev->hostid.ssid = ssid; + cdev->hostid.devid = devid; + cdev->hostid.valid = true; +} + +static void s390_ccw_realize(S390CCWDevice *cdev, char *sysfsdev, Error **errp) +{ + CcwDevice *ccw_dev = CCW_DEVICE(cdev); + CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev); + DeviceState *parent = DEVICE(ccw_dev); + BusState *qbus = qdev_get_parent_bus(parent); + VirtualCssBus *cbus = VIRTUAL_CSS_BUS(qbus); + SubchDev *sch; + int ret; + Error *err = NULL; + + s390_ccw_get_dev_info(cdev, sysfsdev, &err); + if (err) { + goto out_err_propagate; + } + + sch = css_create_sch(ccw_dev->devno, false, cbus->squash_mcss, &err); + if (!sch) { + goto out_mdevid_free; + } + sch->driver_data = cdev; + sch->do_subchannel_work = do_subchannel_work_passthrough; + + ccw_dev->sch = sch; + ret = css_sch_build_schib(sch, &cdev->hostid); + if (ret) { + error_setg_errno(&err, -ret, "%s: Failed to build initial schib", + __func__); + goto out_err; + } + + ck->realize(ccw_dev, &err); + if (err) { + goto out_err; + } + + css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid, + parent->hotplugged, 1); + return; + +out_err: + css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL); + ccw_dev->sch = NULL; + g_free(sch); +out_mdevid_free: + g_free(cdev->mdevid); +out_err_propagate: + error_propagate(errp, err); +} + +static void s390_ccw_unrealize(S390CCWDevice *cdev, Error **errp) +{ + CcwDevice *ccw_dev = CCW_DEVICE(cdev); + SubchDev *sch = ccw_dev->sch; + + if (sch) { + css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL); + g_free(sch); + ccw_dev->sch = NULL; + } + + g_free(cdev->mdevid); +} + +static void s390_ccw_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); + + dc->bus_type = TYPE_VIRTUAL_CSS_BUS; + cdc->realize = s390_ccw_realize; + cdc->unrealize = s390_ccw_unrealize; +} + +static const TypeInfo s390_ccw_info = { + .name = TYPE_S390_CCW, + .parent = TYPE_CCW_DEVICE, + .instance_size = sizeof(S390CCWDevice), + .class_size = sizeof(S390CCWDeviceClass), + .class_init = s390_ccw_class_init, + .abstract = true, +}; + +static void register_s390_ccw_type(void) +{ + type_register_static(&s390_ccw_info); +} + +type_init(register_s390_ccw_type) diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index 66a6fbeb8c..5651483781 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -357,7 +357,7 @@ out: } static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *mr, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { uint64_t pte; uint32_t flags; diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c index 314a9cbad4..8bc7c98682 100644 --- a/hw/s390x/s390-pci-inst.c +++ b/hw/s390x/s390-pci-inst.c @@ -624,7 +624,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2) mr = &iommu->iommu_mr; while (start < end) { - entry = mr->iommu_ops->translate(mr, start, 0); + entry = mr->iommu_ops->translate(mr, start, IOMMU_NONE); if (!entry.translated_addr) { pbdev->state = ZPCI_FS_ERROR; diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index fdd4384ff0..c9021f2fa9 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -136,10 +136,15 @@ static void ccw_init(MachineState *machine) kvm_s390_enable_css_support(s390_cpu_addr2state(0)); } /* - * Create virtual css and set it as default so that non mcss-e - * enabled guests only see virtio devices. + * Non mcss-e enabled guests only see the devices from the default + * css, which is determined by the value of the squash_mcss property. + * Note: we must not squash non virtual devices to css 0xFE. */ - ret = css_create_css_image(VIRTUAL_CSSID, true); + if (css_bus->squash_mcss) { + ret = css_create_css_image(0, true); + } else { + ret = css_create_css_image(VIRTUAL_CSSID, true); + } assert(ret == 0); /* Create VirtIO network adapters */ @@ -303,6 +308,20 @@ static void machine_set_loadparm(Object *obj, const char *val, Error **errp) ms->loadparm[i] = ' '; /* pad right with spaces */ } } +static inline bool machine_get_squash_mcss(Object *obj, Error **errp) +{ + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); + + return ms->s390_squash_mcss; +} + +static inline void machine_set_squash_mcss(Object *obj, bool value, + Error **errp) +{ + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); + + ms->s390_squash_mcss = value; +} static inline void s390_machine_initfn(Object *obj) { @@ -328,6 +347,13 @@ static inline void s390_machine_initfn(Object *obj) " to upper case) to pass to machine loader, boot manager," " and guest kernel", NULL); + object_property_add_bool(obj, "s390-squash-mcss", + machine_get_squash_mcss, + machine_set_squash_mcss, NULL); + object_property_set_description(obj, "s390-squash-mcss", + "enable/disable squashing subchannels into the default css", + NULL); + object_property_set_bool(obj, false, "s390-squash-mcss", NULL); } static const TypeInfo ccw_machine_info = { diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index e7167e3d05..e6a6f74be3 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -680,9 +680,13 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_GET_CLASS(dev); CcwDevice *ccw_dev = CCW_DEVICE(dev); CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev); - SubchDev *sch = css_create_virtual_sch(ccw_dev->devno, errp); + DeviceState *parent = DEVICE(ccw_dev); + BusState *qbus = qdev_get_parent_bus(parent); + VirtualCssBus *cbus = VIRTUAL_CSS_BUS(qbus); + SubchDev *sch; Error *err = NULL; + sch = css_create_sch(ccw_dev->devno, true, cbus->squash_mcss, errp); if (!sch) { return; } @@ -697,6 +701,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) sch->disable_cb = virtio_sch_disable_cb; sch->id.reserved = 0xff; sch->id.cu_type = VIRTIO_CCW_CU_TYPE; + sch->do_subchannel_work = do_subchannel_work_virtual; ccw_dev->sch = sch; dev->indicators = NULL; dev->revision = -1; diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c index 8f520cec1c..e6fc74ed87 100644 --- a/hw/sh4/r2d.c +++ b/hw/sh4/r2d.c @@ -164,7 +164,7 @@ r2d_fpga_write(void *opaque, hwaddr addr, uint64_t value, unsigned int size) break; case PA_POWOFF: if (value & 1) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } break; case PA_VERREG: diff --git a/hw/timer/etraxfs_timer.c b/hw/timer/etraxfs_timer.c index 8e18236c5a..d13bc30b2d 100644 --- a/hw/timer/etraxfs_timer.c +++ b/hw/timer/etraxfs_timer.c @@ -207,7 +207,7 @@ static void watchdog_hit(void *opaque) qemu_irq_raise(t->nmi); } else - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); t->wd_hits++; } diff --git a/hw/timer/m48t59.c b/hw/timer/m48t59.c index 474981a6ac..4a064fbfd2 100644 --- a/hw/timer/m48t59.c +++ b/hw/timer/m48t59.c @@ -1,7 +1,7 @@ /* * QEMU M48T59 and M48T08 NVRAM emulation for PPC PREP and Sparc platforms * - * Copyright (c) 2003-2005, 2007 Jocelyn Mayer + * Copyright (c) 2003-2005, 2007, 2017 Jocelyn Mayer * Copyright (c) 2013 Hervé Poussineau * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -159,7 +159,7 @@ static void watchdog_cb (void *opaque) NVRAM->buffer[0x1FF7] = 0x00; NVRAM->buffer[0x1FFC] &= ~0x40; /* May it be a hw CPU Reset instead ? */ - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } else { qemu_set_irq(NVRAM->IRQ, 1); qemu_set_irq(NVRAM->IRQ, 0); diff --git a/hw/timer/milkymist-sysctl.c b/hw/timer/milkymist-sysctl.c index 44885907c9..93bc6e1790 100644 --- a/hw/timer/milkymist-sysctl.c +++ b/hw/timer/milkymist-sysctl.c @@ -90,7 +90,7 @@ static void sysctl_icap_write(MilkymistSysctlState *s, uint32_t value) trace_milkymist_sysctl_icap_write(value); switch (value & 0xffff) { case 0x000e: - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); break; } } @@ -195,7 +195,7 @@ static void sysctl_write(void *opaque, hwaddr addr, uint64_t value, s->regs[addr] = 1; break; case R_SYSTEM_ID: - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; case R_GPIO_IN: diff --git a/hw/timer/pxa2xx_timer.c b/hw/timer/pxa2xx_timer.c index 59002b407e..68ba5a70b3 100644 --- a/hw/timer/pxa2xx_timer.c +++ b/hw/timer/pxa2xx_timer.c @@ -401,7 +401,7 @@ static void pxa2xx_timer_tick(void *opaque) if (t->num == 3) if (i->reset3 & 1) { i->reset3 = 0; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs index 5958be8ce3..97f1c4561a 100644 --- a/hw/usb/Makefile.objs +++ b/hw/usb/Makefile.objs @@ -8,6 +8,7 @@ common-obj-$(CONFIG_USB_OHCI) += hcd-ohci.o common-obj-$(CONFIG_USB_EHCI) += hcd-ehci.o hcd-ehci-pci.o common-obj-$(CONFIG_USB_EHCI_SYSBUS) += hcd-ehci-sysbus.o common-obj-$(CONFIG_USB_XHCI) += hcd-xhci.o +common-obj-$(CONFIG_USB_XHCI_NEC) += hcd-xhci-nec.o common-obj-$(CONFIG_USB_MUSB) += hcd-musb.o obj-$(CONFIG_TUSB6010) += tusb6010.o diff --git a/hw/usb/core.c b/hw/usb/core.c index 45fa00c517..241ae66b15 100644 --- a/hw/usb/core.c +++ b/hw/usb/core.c @@ -98,6 +98,14 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream) USBDevice *dev = ep->dev; USBBus *bus = usb_bus_from_device(dev); + if (!qdev_hotplug) { + /* + * This is machine init cold plug. No need to wakeup anyone, + * all devices will be reset anyway. And trying to wakeup can + * cause problems due to hitting uninitialized devices. + */ + return; + } if (dev->remote_wakeup && dev->port && dev->port->ops->wakeup) { dev->port->ops->wakeup(dev->port); } diff --git a/hw/usb/dev-hub.c b/hw/usb/dev-hub.c index 47b7519910..e82a6a6c44 100644 --- a/hw/usb/dev-hub.c +++ b/hw/usb/dev-hub.c @@ -402,7 +402,20 @@ static void usb_hub_handle_control(USBDevice *dev, USBPacket *p, port->wPortChange &= ~PORT_STAT_C_ENABLE; break; case PORT_SUSPEND: - port->wPortStatus &= ~PORT_STAT_SUSPEND; + if (port->wPortStatus & PORT_STAT_SUSPEND) { + port->wPortStatus &= ~PORT_STAT_SUSPEND; + + /* + * USB Spec rev2.0 11.24.2.7.2.3 C_PORT_SUSPEND + * "This bit is set on the following transitions: + * - On transition from the Resuming state to the + * SendEOP [sic] state" + * + * Note that this includes both remote wake-up and + * explicit ClearPortFeature(PORT_SUSPEND). + */ + port->wPortChange |= PORT_STAT_C_SUSPEND; + } break; case PORT_C_SUSPEND: port->wPortChange &= ~PORT_STAT_C_SUSPEND; diff --git a/hw/usb/dev-serial.c b/hw/usb/dev-serial.c index 83a4f0e6fb..76ceca1f5c 100644 --- a/hw/usb/dev-serial.c +++ b/hw/usb/dev-serial.c @@ -516,27 +516,16 @@ static USBDevice *usb_serial_init(USBBus *bus, const char *filename) char label[32]; static int index; - while (*filename && *filename != ':') { - const char *p; - - if (strstart(filename, "vendorid=", &p)) { - error_report("vendorid is not supported anymore"); - return NULL; - } else if (strstart(filename, "productid=", &p)) { - error_report("productid is not supported anymore"); - return NULL; - } else { - error_report("unrecognized serial USB option %s", filename); - return NULL; - } - while(*filename == ',') - filename++; + if (*filename == ':') { + filename++; + } else if (*filename) { + error_report("unrecognized serial USB option %s", filename); + return NULL; } if (!*filename) { error_report("character device specification needed"); return NULL; } - filename++; snprintf(label, sizeof(label), "usbserial%d", index++); cdrv = qemu_chr_new(label, filename); diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c index 50ef817f93..17c572c55f 100644 --- a/hw/usb/hcd-ehci.c +++ b/hw/usb/hcd-ehci.c @@ -2232,13 +2232,13 @@ static void ehci_update_frindex(EHCIState *ehci, int uframes) ehci->frindex = (ehci->frindex + uframes) % 0x4000; } -static void ehci_frame_timer(void *opaque) +static void ehci_work_bh(void *opaque) { EHCIState *ehci = opaque; int need_timer = 0; int64_t expire_time, t_now; uint64_t ns_elapsed; - int uframes, skipped_uframes; + uint64_t uframes, skipped_uframes; int i; t_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); @@ -2324,6 +2324,13 @@ static void ehci_frame_timer(void *opaque) } } +static void ehci_work_timer(void *opaque) +{ + EHCIState *ehci = opaque; + + qemu_bh_schedule(ehci->async_bh); +} + static const MemoryRegionOps ehci_mmio_caps_ops = { .read = ehci_caps_read, .write = ehci_caps_write, @@ -2478,8 +2485,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp) s->ports[i].dev = 0; } - s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_frame_timer, s); - s->async_bh = qemu_bh_new(ehci_frame_timer, s); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s); + s->async_bh = qemu_bh_new(ehci_work_bh, s); s->device = dev; s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s); diff --git a/hw/usb/hcd-xhci-nec.c b/hw/usb/hcd-xhci-nec.c new file mode 100644 index 0000000000..75715a048a --- /dev/null +++ b/hw/usb/hcd-xhci-nec.c @@ -0,0 +1,63 @@ +/* + * USB xHCI controller emulation + * + * Copyright (c) 2011 Securiforest + * Date: 2011-05-11 ; Author: Hector Martin <hector@marcansoft.com> + * Based on usb-ohci.c, emulates Renesas NEC USB 3.0 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "hw/hw.h" +#include "hw/usb.h" +#include "hw/pci/pci.h" + +#include "hcd-xhci.h" + +static Property nec_xhci_properties[] = { + DEFINE_PROP_ON_OFF_AUTO("msi", XHCIState, msi, ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO("msix", XHCIState, msix, ON_OFF_AUTO_AUTO), + DEFINE_PROP_BIT("superspeed-ports-first", + XHCIState, flags, XHCI_FLAG_SS_FIRST, true), + DEFINE_PROP_BIT("force-pcie-endcap", XHCIState, flags, + XHCI_FLAG_FORCE_PCIE_ENDCAP, false), + DEFINE_PROP_UINT32("intrs", XHCIState, numintrs, MAXINTRS), + DEFINE_PROP_UINT32("slots", XHCIState, numslots, MAXSLOTS), + DEFINE_PROP_END_OF_LIST(), +}; + +static void nec_xhci_class_init(ObjectClass *klass, void *data) +{ + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->props = nec_xhci_properties; + k->vendor_id = PCI_VENDOR_ID_NEC; + k->device_id = PCI_DEVICE_ID_NEC_UPD720200; + k->revision = 0x03; +} + +static const TypeInfo nec_xhci_info = { + .name = TYPE_NEC_XHCI, + .parent = TYPE_XHCI, + .class_init = nec_xhci_class_init, +}; + +static void nec_xhci_register_types(void) +{ + type_register_static(&nec_xhci_info); +} + +type_init(nec_xhci_register_types) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index 77d8e1137a..a0c7960a7b 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -29,6 +29,8 @@ #include "trace.h" #include "qapi/error.h" +#include "hcd-xhci.h" + //#define DEBUG_XHCI //#define DEBUG_DATA @@ -40,16 +42,6 @@ #define FIXME(_msg) do { fprintf(stderr, "FIXME %s:%d %s\n", \ __func__, __LINE__, _msg); abort(); } while (0) -#define MAXPORTS_2 15 -#define MAXPORTS_3 15 - -#define MAXPORTS (MAXPORTS_2+MAXPORTS_3) -#define MAXSLOTS 64 -#define MAXINTRS 16 - -/* Very pessimistic, let's hope it's enough for all cases */ -#define EV_QUEUE (((3 * 24) + 16) * MAXSLOTS) - #define TRB_LINK_LIMIT 32 #define COMMAND_LIMIT 256 #define TRANSFER_LIMIT 256 @@ -164,84 +156,8 @@ enum { PLS_RESUME = 15, }; -typedef enum TRBType { - TRB_RESERVED = 0, - TR_NORMAL, - TR_SETUP, - TR_DATA, - TR_STATUS, - TR_ISOCH, - TR_LINK, - TR_EVDATA, - TR_NOOP, - CR_ENABLE_SLOT, - CR_DISABLE_SLOT, - CR_ADDRESS_DEVICE, - CR_CONFIGURE_ENDPOINT, - CR_EVALUATE_CONTEXT, - CR_RESET_ENDPOINT, - CR_STOP_ENDPOINT, - CR_SET_TR_DEQUEUE, - CR_RESET_DEVICE, - CR_FORCE_EVENT, - CR_NEGOTIATE_BW, - CR_SET_LATENCY_TOLERANCE, - CR_GET_PORT_BANDWIDTH, - CR_FORCE_HEADER, - CR_NOOP, - ER_TRANSFER = 32, - ER_COMMAND_COMPLETE, - ER_PORT_STATUS_CHANGE, - ER_BANDWIDTH_REQUEST, - ER_DOORBELL, - ER_HOST_CONTROLLER, - ER_DEVICE_NOTIFICATION, - ER_MFINDEX_WRAP, - /* vendor specific bits */ - CR_VENDOR_NEC_FIRMWARE_REVISION = 49, - CR_VENDOR_NEC_CHALLENGE_RESPONSE = 50, -} TRBType; - #define CR_LINK TR_LINK -typedef enum TRBCCode { - CC_INVALID = 0, - CC_SUCCESS, - CC_DATA_BUFFER_ERROR, - CC_BABBLE_DETECTED, - CC_USB_TRANSACTION_ERROR, - CC_TRB_ERROR, - CC_STALL_ERROR, - CC_RESOURCE_ERROR, - CC_BANDWIDTH_ERROR, - CC_NO_SLOTS_ERROR, - CC_INVALID_STREAM_TYPE_ERROR, - CC_SLOT_NOT_ENABLED_ERROR, - CC_EP_NOT_ENABLED_ERROR, - CC_SHORT_PACKET, - CC_RING_UNDERRUN, - CC_RING_OVERRUN, - CC_VF_ER_FULL, - CC_PARAMETER_ERROR, - CC_BANDWIDTH_OVERRUN, - CC_CONTEXT_STATE_ERROR, - CC_NO_PING_RESPONSE_ERROR, - CC_EVENT_RING_FULL_ERROR, - CC_INCOMPATIBLE_DEVICE_ERROR, - CC_MISSED_SERVICE_ERROR, - CC_COMMAND_RING_STOPPED, - CC_COMMAND_ABORTED, - CC_STOPPED, - CC_STOPPED_LENGTH_INVALID, - CC_MAX_EXIT_LATENCY_TOO_LARGE_ERROR = 29, - CC_ISOCH_BUFFER_OVERRUN = 31, - CC_EVENT_LOST_ERROR, - CC_UNDEFINED_ERROR, - CC_INVALID_STREAM_ID_ERROR, - CC_SECONDARY_BANDWIDTH_ERROR, - CC_SPLIT_TRANSACTION_ERROR -} TRBCCode; - #define TRB_C (1<<0) #define TRB_TYPE_SHIFT 10 #define TRB_TYPE_MASK 0x3f @@ -301,10 +217,6 @@ typedef enum TRBCCode { #define SLOT_CONTEXT_ENTRIES_MASK 0x1f #define SLOT_CONTEXT_ENTRIES_SHIFT 27 -typedef struct XHCIState XHCIState; -typedef struct XHCIStreamContext XHCIStreamContext; -typedef struct XHCIEPContext XHCIEPContext; - #define get_field(data, field) \ (((data) >> field##_SHIFT) & field##_MASK) @@ -326,21 +238,6 @@ typedef enum EPType { ET_INTR_IN, } EPType; -typedef struct XHCIRing { - dma_addr_t dequeue; - bool ccs; -} XHCIRing; - -typedef struct XHCIPort { - XHCIState *xhci; - uint32_t portsc; - uint32_t portnr; - USBPort *uport; - uint32_t speedmask; - char name[16]; - MemoryRegion mem; -} XHCIPort; - typedef struct XHCITransfer { XHCIEPContext *epctx; USBPacket packet; @@ -402,101 +299,6 @@ struct XHCIEPContext { QEMUTimer *kick_timer; }; -typedef struct XHCISlot { - bool enabled; - bool addressed; - dma_addr_t ctx; - USBPort *uport; - XHCIEPContext * eps[31]; -} XHCISlot; - -typedef struct XHCIEvent { - TRBType type; - TRBCCode ccode; - uint64_t ptr; - uint32_t length; - uint32_t flags; - uint8_t slotid; - uint8_t epid; -} XHCIEvent; - -typedef struct XHCIInterrupter { - uint32_t iman; - uint32_t imod; - uint32_t erstsz; - uint32_t erstba_low; - uint32_t erstba_high; - uint32_t erdp_low; - uint32_t erdp_high; - - bool msix_used, er_pcs; - - dma_addr_t er_start; - uint32_t er_size; - unsigned int er_ep_idx; - - /* kept for live migration compat only */ - bool er_full_unused; - XHCIEvent ev_buffer[EV_QUEUE]; - unsigned int ev_buffer_put; - unsigned int ev_buffer_get; - -} XHCIInterrupter; - -struct XHCIState { - /*< private >*/ - PCIDevice parent_obj; - /*< public >*/ - - USBBus bus; - MemoryRegion mem; - MemoryRegion mem_cap; - MemoryRegion mem_oper; - MemoryRegion mem_runtime; - MemoryRegion mem_doorbell; - - /* properties */ - uint32_t numports_2; - uint32_t numports_3; - uint32_t numintrs; - uint32_t numslots; - uint32_t flags; - uint32_t max_pstreams_mask; - OnOffAuto msi; - OnOffAuto msix; - - /* Operational Registers */ - uint32_t usbcmd; - uint32_t usbsts; - uint32_t dnctrl; - uint32_t crcr_low; - uint32_t crcr_high; - uint32_t dcbaap_low; - uint32_t dcbaap_high; - uint32_t config; - - USBPort uports[MAX(MAXPORTS_2, MAXPORTS_3)]; - XHCIPort ports[MAXPORTS]; - XHCISlot slots[MAXSLOTS]; - uint32_t numports; - - /* Runtime Registers */ - int64_t mfindex_start; - QEMUTimer *mfwrap_timer; - XHCIInterrupter intr[MAXINTRS]; - - XHCIRing cmd_ring; - - bool nec_quirks; -}; - -#define TYPE_XHCI "base-xhci" -#define TYPE_NEC_XHCI "nec-usb-xhci" -#define TYPE_QEMU_XHCI "qemu-xhci" - -#define XHCI(obj) \ - OBJECT_CHECK(XHCIState, (obj), TYPE_XHCI) - typedef struct XHCIEvRingSeg { uint32_t addr_low; uint32_t addr_high; @@ -504,12 +306,6 @@ typedef struct XHCIEvRingSeg { uint32_t rsvd; } XHCIEvRingSeg; -enum xhci_flags { - XHCI_FLAG_SS_FIRST = 1, - XHCI_FLAG_FORCE_PCIE_ENDCAP, - XHCI_FLAG_ENABLE_STREAMS, -}; - static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, unsigned int epid, unsigned int streamid); static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid); @@ -3843,18 +3639,6 @@ static const VMStateDescription vmstate_xhci = { } }; -static Property nec_xhci_properties[] = { - DEFINE_PROP_ON_OFF_AUTO("msi", XHCIState, msi, ON_OFF_AUTO_AUTO), - DEFINE_PROP_ON_OFF_AUTO("msix", XHCIState, msix, ON_OFF_AUTO_AUTO), - DEFINE_PROP_BIT("superspeed-ports-first", - XHCIState, flags, XHCI_FLAG_SS_FIRST, true), - DEFINE_PROP_BIT("force-pcie-endcap", XHCIState, flags, - XHCI_FLAG_FORCE_PCIE_ENDCAP, false), - DEFINE_PROP_UINT32("intrs", XHCIState, numintrs, MAXINTRS), - DEFINE_PROP_UINT32("slots", XHCIState, numslots, MAXSLOTS), - DEFINE_PROP_END_OF_LIST(), -}; - static Property xhci_properties[] = { DEFINE_PROP_BIT("streams", XHCIState, flags, XHCI_FLAG_ENABLE_STREAMS, true), @@ -3886,23 +3670,6 @@ static const TypeInfo xhci_info = { .abstract = true, }; -static void nec_xhci_class_init(ObjectClass *klass, void *data) -{ - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->props = nec_xhci_properties; - k->vendor_id = PCI_VENDOR_ID_NEC; - k->device_id = PCI_DEVICE_ID_NEC_UPD720200; - k->revision = 0x03; -} - -static const TypeInfo nec_xhci_info = { - .name = TYPE_NEC_XHCI, - .parent = TYPE_XHCI, - .class_init = nec_xhci_class_init, -}; - static void qemu_xhci_class_init(ObjectClass *klass, void *data) { PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); @@ -3933,7 +3700,6 @@ static const TypeInfo qemu_xhci_info = { static void xhci_register_types(void) { type_register_static(&xhci_info); - type_register_static(&nec_xhci_info); type_register_static(&qemu_xhci_info); } diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h new file mode 100644 index 0000000000..fc36a4c787 --- /dev/null +++ b/hw/usb/hcd-xhci.h @@ -0,0 +1,226 @@ +/* + * USB xHCI controller emulation + * + * Copyright (c) 2011 Securiforest + * Date: 2011-05-11 ; Author: Hector Martin <hector@marcansoft.com> + * Based on usb-ohci.c, emulates Renesas NEC USB 3.0 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#define TYPE_XHCI "base-xhci" +#define TYPE_NEC_XHCI "nec-usb-xhci" +#define TYPE_QEMU_XHCI "qemu-xhci" + +#define XHCI(obj) \ + OBJECT_CHECK(XHCIState, (obj), TYPE_XHCI) + +#define MAXPORTS_2 15 +#define MAXPORTS_3 15 + +#define MAXPORTS (MAXPORTS_2 + MAXPORTS_3) +#define MAXSLOTS 64 +#define MAXINTRS 16 + +/* Very pessimistic, let's hope it's enough for all cases */ +#define EV_QUEUE (((3 * 24) + 16) * MAXSLOTS) + +typedef struct XHCIState XHCIState; +typedef struct XHCIStreamContext XHCIStreamContext; +typedef struct XHCIEPContext XHCIEPContext; + +enum xhci_flags { + XHCI_FLAG_SS_FIRST = 1, + XHCI_FLAG_FORCE_PCIE_ENDCAP, + XHCI_FLAG_ENABLE_STREAMS, +}; + +typedef enum TRBType { + TRB_RESERVED = 0, + TR_NORMAL, + TR_SETUP, + TR_DATA, + TR_STATUS, + TR_ISOCH, + TR_LINK, + TR_EVDATA, + TR_NOOP, + CR_ENABLE_SLOT, + CR_DISABLE_SLOT, + CR_ADDRESS_DEVICE, + CR_CONFIGURE_ENDPOINT, + CR_EVALUATE_CONTEXT, + CR_RESET_ENDPOINT, + CR_STOP_ENDPOINT, + CR_SET_TR_DEQUEUE, + CR_RESET_DEVICE, + CR_FORCE_EVENT, + CR_NEGOTIATE_BW, + CR_SET_LATENCY_TOLERANCE, + CR_GET_PORT_BANDWIDTH, + CR_FORCE_HEADER, + CR_NOOP, + ER_TRANSFER = 32, + ER_COMMAND_COMPLETE, + ER_PORT_STATUS_CHANGE, + ER_BANDWIDTH_REQUEST, + ER_DOORBELL, + ER_HOST_CONTROLLER, + ER_DEVICE_NOTIFICATION, + ER_MFINDEX_WRAP, + /* vendor specific bits */ + CR_VENDOR_NEC_FIRMWARE_REVISION = 49, + CR_VENDOR_NEC_CHALLENGE_RESPONSE = 50, +} TRBType; + +typedef enum TRBCCode { + CC_INVALID = 0, + CC_SUCCESS, + CC_DATA_BUFFER_ERROR, + CC_BABBLE_DETECTED, + CC_USB_TRANSACTION_ERROR, + CC_TRB_ERROR, + CC_STALL_ERROR, + CC_RESOURCE_ERROR, + CC_BANDWIDTH_ERROR, + CC_NO_SLOTS_ERROR, + CC_INVALID_STREAM_TYPE_ERROR, + CC_SLOT_NOT_ENABLED_ERROR, + CC_EP_NOT_ENABLED_ERROR, + CC_SHORT_PACKET, + CC_RING_UNDERRUN, + CC_RING_OVERRUN, + CC_VF_ER_FULL, + CC_PARAMETER_ERROR, + CC_BANDWIDTH_OVERRUN, + CC_CONTEXT_STATE_ERROR, + CC_NO_PING_RESPONSE_ERROR, + CC_EVENT_RING_FULL_ERROR, + CC_INCOMPATIBLE_DEVICE_ERROR, + CC_MISSED_SERVICE_ERROR, + CC_COMMAND_RING_STOPPED, + CC_COMMAND_ABORTED, + CC_STOPPED, + CC_STOPPED_LENGTH_INVALID, + CC_MAX_EXIT_LATENCY_TOO_LARGE_ERROR = 29, + CC_ISOCH_BUFFER_OVERRUN = 31, + CC_EVENT_LOST_ERROR, + CC_UNDEFINED_ERROR, + CC_INVALID_STREAM_ID_ERROR, + CC_SECONDARY_BANDWIDTH_ERROR, + CC_SPLIT_TRANSACTION_ERROR +} TRBCCode; + +typedef struct XHCIRing { + dma_addr_t dequeue; + bool ccs; +} XHCIRing; + +typedef struct XHCIPort { + XHCIState *xhci; + uint32_t portsc; + uint32_t portnr; + USBPort *uport; + uint32_t speedmask; + char name[16]; + MemoryRegion mem; +} XHCIPort; + +typedef struct XHCISlot { + bool enabled; + bool addressed; + dma_addr_t ctx; + USBPort *uport; + XHCIEPContext *eps[31]; +} XHCISlot; + +typedef struct XHCIEvent { + TRBType type; + TRBCCode ccode; + uint64_t ptr; + uint32_t length; + uint32_t flags; + uint8_t slotid; + uint8_t epid; +} XHCIEvent; + +typedef struct XHCIInterrupter { + uint32_t iman; + uint32_t imod; + uint32_t erstsz; + uint32_t erstba_low; + uint32_t erstba_high; + uint32_t erdp_low; + uint32_t erdp_high; + + bool msix_used, er_pcs; + + dma_addr_t er_start; + uint32_t er_size; + unsigned int er_ep_idx; + + /* kept for live migration compat only */ + bool er_full_unused; + XHCIEvent ev_buffer[EV_QUEUE]; + unsigned int ev_buffer_put; + unsigned int ev_buffer_get; + +} XHCIInterrupter; + +struct XHCIState { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + USBBus bus; + MemoryRegion mem; + MemoryRegion mem_cap; + MemoryRegion mem_oper; + MemoryRegion mem_runtime; + MemoryRegion mem_doorbell; + + /* properties */ + uint32_t numports_2; + uint32_t numports_3; + uint32_t numintrs; + uint32_t numslots; + uint32_t flags; + uint32_t max_pstreams_mask; + OnOffAuto msi; + OnOffAuto msix; + + /* Operational Registers */ + uint32_t usbcmd; + uint32_t usbsts; + uint32_t dnctrl; + uint32_t crcr_low; + uint32_t crcr_high; + uint32_t dcbaap_low; + uint32_t dcbaap_high; + uint32_t config; + + USBPort uports[MAX(MAXPORTS_2, MAXPORTS_3)]; + XHCIPort ports[MAXPORTS]; + XHCISlot slots[MAXSLOTS]; + uint32_t numports; + + /* Runtime Registers */ + int64_t mfindex_start; + QEMUTimer *mfwrap_timer; + XHCIInterrupter intr[MAXINTRS]; + + XHCIRing cmd_ring; + + bool nec_quirks; +}; diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs index 05e7fbb93f..c3ab9097f1 100644 --- a/hw/vfio/Makefile.objs +++ b/hw/vfio/Makefile.objs @@ -1,6 +1,7 @@ ifeq ($(CONFIG_LINUX), y) obj-$(CONFIG_SOFTMMU) += common.o obj-$(CONFIG_PCI) += pci.o pci-quirks.o +obj-$(CONFIG_VFIO_CCW) += ccw.o obj-$(CONFIG_SOFTMMU) += platform.o obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c new file mode 100644 index 0000000000..12d0262336 --- /dev/null +++ b/hw/vfio/ccw.c @@ -0,0 +1,434 @@ +/* + * vfio based subchannel assignment support + * + * Copyright 2017 IBM Corp. + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + * Pierre Morel <pmorel@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or(at + * your option) any version. See the COPYING file in the top-level + * directory. + */ + +#include <linux/vfio.h> +#include <linux/vfio_ccw.h> +#include <sys/ioctl.h> + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/sysbus.h" +#include "hw/vfio/vfio.h" +#include "hw/vfio/vfio-common.h" +#include "hw/s390x/s390-ccw.h" +#include "hw/s390x/ccw-device.h" +#include "qemu/error-report.h" + +#define TYPE_VFIO_CCW "vfio-ccw" +typedef struct VFIOCCWDevice { + S390CCWDevice cdev; + VFIODevice vdev; + uint64_t io_region_size; + uint64_t io_region_offset; + struct ccw_io_region *io_region; + EventNotifier io_notifier; +} VFIOCCWDevice; + +static void vfio_ccw_compute_needs_reset(VFIODevice *vdev) +{ + vdev->needs_reset = false; +} + +/* + * We don't need vfio_hot_reset_multi and vfio_eoi operations for + * vfio_ccw device now. + */ +struct VFIODeviceOps vfio_ccw_ops = { + .vfio_compute_needs_reset = vfio_ccw_compute_needs_reset, +}; + +static int vfio_ccw_handle_request(ORB *orb, SCSW *scsw, void *data) +{ + S390CCWDevice *cdev = data; + VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); + struct ccw_io_region *region = vcdev->io_region; + int ret; + + QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB)); + QEMU_BUILD_BUG_ON(sizeof(region->scsw_area) != sizeof(SCSW)); + QEMU_BUILD_BUG_ON(sizeof(region->irb_area) != sizeof(IRB)); + + memset(region, 0, sizeof(*region)); + + memcpy(region->orb_area, orb, sizeof(ORB)); + memcpy(region->scsw_area, scsw, sizeof(SCSW)); + +again: + ret = pwrite(vcdev->vdev.fd, region, + vcdev->io_region_size, vcdev->io_region_offset); + if (ret != vcdev->io_region_size) { + if (errno == EAGAIN) { + goto again; + } + error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno); + return -errno; + } + + return region->ret_code; +} + +static void vfio_ccw_reset(DeviceState *dev) +{ + CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); + S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); + VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); + + ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET); +} + +static void vfio_ccw_io_notifier_handler(void *opaque) +{ + VFIOCCWDevice *vcdev = opaque; + struct ccw_io_region *region = vcdev->io_region; + S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev); + CcwDevice *ccw_dev = CCW_DEVICE(cdev); + SubchDev *sch = ccw_dev->sch; + SCSW *s = &sch->curr_status.scsw; + PMCW *p = &sch->curr_status.pmcw; + IRB irb; + int size; + + if (!event_notifier_test_and_clear(&vcdev->io_notifier)) { + return; + } + + size = pread(vcdev->vdev.fd, region, vcdev->io_region_size, + vcdev->io_region_offset); + if (size == -1) { + switch (errno) { + case ENODEV: + /* Generate a deferred cc 3 condition. */ + s->flags |= SCSW_FLAGS_MASK_CC; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND); + goto read_err; + case EFAULT: + /* Memory problem, generate channel data check. */ + s->ctrl &= ~SCSW_ACTL_START_PEND; + s->cstat = SCSW_CSTAT_DATA_CHECK; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | + SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; + goto read_err; + default: + /* Error, generate channel program check. */ + s->ctrl &= ~SCSW_ACTL_START_PEND; + s->cstat = SCSW_CSTAT_PROG_CHECK; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | + SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; + goto read_err; + } + } else if (size != vcdev->io_region_size) { + /* Information transfer error, generate channel-control check. */ + s->ctrl &= ~SCSW_ACTL_START_PEND; + s->cstat = SCSW_CSTAT_CHN_CTRL_CHK; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | + SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; + goto read_err; + } + + memcpy(&irb, region->irb_area, sizeof(IRB)); + + /* Update control block via irb. */ + copy_scsw_to_guest(s, &irb.scsw); + + /* If a uint check is pending, copy sense data. */ + if ((s->dstat & SCSW_DSTAT_UNIT_CHECK) && + (p->chars & PMCW_CHARS_MASK_CSENSE)) { + memcpy(sch->sense_data, irb.ecw, sizeof(irb.ecw)); + } + +read_err: + css_inject_io_interrupt(sch); +} + +static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) +{ + VFIODevice *vdev = &vcdev->vdev; + struct vfio_irq_info *irq_info; + struct vfio_irq_set *irq_set; + size_t argsz; + int32_t *pfd; + + if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) { + error_setg(errp, "vfio: unexpected number of io irqs %u", + vdev->num_irqs); + return; + } + + argsz = sizeof(*irq_set); + irq_info = g_malloc0(argsz); + irq_info->index = VFIO_CCW_IO_IRQ_INDEX; + irq_info->argsz = argsz; + if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, + irq_info) < 0 || irq_info->count < 1) { + error_setg_errno(errp, errno, "vfio: Error getting irq info"); + goto out_free_info; + } + + if (event_notifier_init(&vcdev->io_notifier, 0)) { + error_setg_errno(errp, errno, + "vfio: Unable to init event notifier for IO"); + goto out_free_info; + } + + argsz = sizeof(*irq_set) + sizeof(*pfd); + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | + VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_CCW_IO_IRQ_INDEX; + irq_set->start = 0; + irq_set->count = 1; + pfd = (int32_t *) &irq_set->data; + + *pfd = event_notifier_get_fd(&vcdev->io_notifier); + qemu_set_fd_handler(*pfd, vfio_ccw_io_notifier_handler, NULL, vcdev); + if (ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { + error_setg(errp, "vfio: Failed to set up io notification"); + qemu_set_fd_handler(*pfd, NULL, NULL, vcdev); + event_notifier_cleanup(&vcdev->io_notifier); + } + + g_free(irq_set); + +out_free_info: + g_free(irq_info); +} + +static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev) +{ + struct vfio_irq_set *irq_set; + size_t argsz; + int32_t *pfd; + + argsz = sizeof(*irq_set) + sizeof(*pfd); + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | + VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_CCW_IO_IRQ_INDEX; + irq_set->start = 0; + irq_set->count = 1; + pfd = (int32_t *) &irq_set->data; + *pfd = -1; + + if (ioctl(vcdev->vdev.fd, VFIO_DEVICE_SET_IRQS, irq_set)) { + error_report("vfio: Failed to de-assign device io fd: %m"); + } + + qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier), + NULL, NULL, vcdev); + event_notifier_cleanup(&vcdev->io_notifier); + + g_free(irq_set); +} + +static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) +{ + VFIODevice *vdev = &vcdev->vdev; + struct vfio_region_info *info; + int ret; + + /* Sanity check device */ + if (!(vdev->flags & VFIO_DEVICE_FLAGS_CCW)) { + error_setg(errp, "vfio: Um, this isn't a vfio-ccw device"); + return; + } + + if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) { + error_setg(errp, "vfio: Unexpected number of the I/O region %u", + vdev->num_regions); + return; + } + + ret = vfio_get_region_info(vdev, VFIO_CCW_CONFIG_REGION_INDEX, &info); + if (ret) { + error_setg_errno(errp, -ret, "vfio: Error getting config info"); + return; + } + + vcdev->io_region_size = info->size; + if (sizeof(*vcdev->io_region) != vcdev->io_region_size) { + error_setg(errp, "vfio: Unexpected size of the I/O region"); + g_free(info); + return; + } + + vcdev->io_region_offset = info->offset; + vcdev->io_region = g_malloc0(info->size); + + g_free(info); +} + +static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) +{ + g_free(vcdev->io_region); +} + +static void vfio_put_device(VFIOCCWDevice *vcdev) +{ + g_free(vcdev->vdev.name); + vfio_put_base_device(&vcdev->vdev); +} + +static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp) +{ + char *tmp, group_path[PATH_MAX]; + ssize_t len; + int groupid; + + tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group", + cdev->hostid.cssid, cdev->hostid.ssid, + cdev->hostid.devid, cdev->mdevid); + len = readlink(tmp, group_path, sizeof(group_path)); + g_free(tmp); + + if (len <= 0 || len >= sizeof(group_path)) { + error_setg(errp, "vfio: no iommu_group found"); + return NULL; + } + + group_path[len] = 0; + + if (sscanf(basename(group_path), "%d", &groupid) != 1) { + error_setg(errp, "vfio: failed to read %s", group_path); + return NULL; + } + + return vfio_get_group(groupid, &address_space_memory, errp); +} + +static void vfio_ccw_realize(DeviceState *dev, Error **errp) +{ + VFIODevice *vbasedev; + VFIOGroup *group; + CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); + S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); + VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); + Error *err = NULL; + + /* Call the class init function for subchannel. */ + if (cdc->realize) { + cdc->realize(cdev, vcdev->vdev.sysfsdev, &err); + if (err) { + goto out_err_propagate; + } + } + + group = vfio_ccw_get_group(cdev, &err); + if (!group) { + goto out_group_err; + } + + vcdev->vdev.ops = &vfio_ccw_ops; + vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW; + vcdev->vdev.name = g_strdup_printf("%x.%x.%04x", cdev->hostid.cssid, + cdev->hostid.ssid, cdev->hostid.devid); + QLIST_FOREACH(vbasedev, &group->device_list, next) { + if (strcmp(vbasedev->name, vcdev->vdev.name) == 0) { + error_setg(&err, "vfio: subchannel %s has already been attached", + vcdev->vdev.name); + goto out_device_err; + } + } + + if (vfio_get_device(group, cdev->mdevid, &vcdev->vdev, &err)) { + goto out_device_err; + } + + vfio_ccw_get_region(vcdev, &err); + if (err) { + goto out_region_err; + } + + vfio_ccw_register_io_notifier(vcdev, &err); + if (err) { + goto out_notifier_err; + } + + return; + +out_notifier_err: + vfio_ccw_put_region(vcdev); +out_region_err: + vfio_put_device(vcdev); +out_device_err: + vfio_put_group(group); +out_group_err: + if (cdc->unrealize) { + cdc->unrealize(cdev, NULL); + } +out_err_propagate: + error_propagate(errp, err); +} + +static void vfio_ccw_unrealize(DeviceState *dev, Error **errp) +{ + CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); + S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); + VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); + VFIOGroup *group = vcdev->vdev.group; + + vfio_ccw_unregister_io_notifier(vcdev); + vfio_ccw_put_region(vcdev); + vfio_put_device(vcdev); + vfio_put_group(group); + + if (cdc->unrealize) { + cdc->unrealize(cdev, errp); + } +} + +static Property vfio_ccw_properties[] = { + DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vfio_ccw_vmstate = { + .name = TYPE_VFIO_CCW, + .unmigratable = 1, +}; + +static void vfio_ccw_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); + + dc->props = vfio_ccw_properties; + dc->vmsd = &vfio_ccw_vmstate; + dc->desc = "VFIO-based subchannel assignment"; + dc->realize = vfio_ccw_realize; + dc->unrealize = vfio_ccw_unrealize; + dc->reset = vfio_ccw_reset; + + cdc->handle_request = vfio_ccw_handle_request; +} + +static const TypeInfo vfio_ccw_info = { + .name = TYPE_VFIO_CCW, + .parent = TYPE_S390_CCW, + .instance_size = sizeof(VFIOCCWDevice), + .class_init = vfio_ccw_class_init, +}; + +static void register_vfio_ccw_type(void) +{ + type_register_static(&vfio_ccw_info); +} + +type_init(register_vfio_ccw_type) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index a8f12eeb35..b9abe77f5a 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -502,7 +502,7 @@ static void vfio_listener_region_add(MemoryListener *listener, QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); memory_region_register_iommu_notifier(giommu->iommu, &giommu->n); - memory_region_iommu_replay(giommu->iommu, &giommu->n, false); + memory_region_iommu_replay(giommu->iommu, &giommu->n); return; } diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index b87a176770..dde094abb4 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -162,11 +162,11 @@ fail: } static int process_message_reply(struct vhost_dev *dev, - VhostUserMsg msg) + const VhostUserMsg *msg) { VhostUserMsg msg_reply; - if ((msg.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { + if ((msg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { return 0; } @@ -174,10 +174,10 @@ static int process_message_reply(struct vhost_dev *dev, return -1; } - if (msg_reply.request != msg.request) { + if (msg_reply.request != msg->request) { error_report("Received unexpected msg type." "Expected %d received %d", - msg.request, msg_reply.request); + msg->request, msg_reply.request); return -1; } @@ -324,7 +324,7 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, } if (reply_supported) { - return process_message_reply(dev, msg); + return process_message_reply(dev, &msg); } return 0; @@ -716,7 +716,7 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) /* If reply_ack supported, slave has to ack specified MTU is valid */ if (reply_supported) { - return process_message_reply(dev, msg); + return process_message_reply(dev, &msg); } return 0; diff --git a/hw/watchdog/watchdog.c b/hw/watchdog/watchdog.c index 2aeaf1fbc9..0c5c9cde1c 100644 --- a/hw/watchdog/watchdog.c +++ b/hw/watchdog/watchdog.c @@ -110,7 +110,7 @@ void watchdog_perform_action(void) switch (watchdog_action) { case WDT_RESET: /* same as 'system_reset' in monitor */ qapi_event_send_watchdog(WATCHDOG_EXPIRATION_ACTION_RESET, &error_abort); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; case WDT_SHUTDOWN: /* same as 'system_powerdown' in monitor */ diff --git a/hw/xenpv/xen_domainbuild.c b/hw/xenpv/xen_domainbuild.c index 457a8976c3..c89ced2e88 100644 --- a/hw/xenpv/xen_domainbuild.c +++ b/hw/xenpv/xen_domainbuild.c @@ -148,7 +148,7 @@ static void xen_domain_poll(void *opaque) return; quit: - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } static int xen_domain_watcher(void) diff --git a/hw/xtensa/xtfpga.c b/hw/xtensa/xtfpga.c index 11176e26bd..4636f8e934 100644 --- a/hw/xtensa/xtfpga.c +++ b/hw/xtensa/xtfpga.c @@ -100,7 +100,7 @@ static void lx60_fpga_write(void *opaque, hwaddr addr, case 0x10: /*board reset*/ if (val == 0xdead) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; } |