From ca27b5eb7cdd112ed465bd757358af4c06e135ea Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Mon, 4 May 2020 13:56:54 +0200 Subject: qom/object: Move Object typedef to 'qemu/typedefs.h' We use the Object type all over the place. Forward declare it in "qemu/typedefs.h". Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20200504115656.6045-2-f4bug@amsat.org> Signed-off-by: Paolo Bonzini --- include/hw/display/edid.h | 3 --- include/qemu/typedefs.h | 1 + include/qom/object.h | 2 -- include/qom/qom-qobject.h | 2 -- include/sysemu/sysemu.h | 1 - 5 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/hw/display/edid.h b/include/hw/display/edid.h index 23371ee82c..5b1de57f24 100644 --- a/include/hw/display/edid.h +++ b/include/hw/display/edid.h @@ -1,9 +1,6 @@ #ifndef EDID_H #define EDID_H -#include "qom/object.h" -#include "hw/qdev-properties.h" - typedef struct qemu_edid_info { const char *vendor; /* http://www.uefi.org/pnp_id_list */ const char *name; diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index ecf3cde26c..de68d51d52 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -76,6 +76,7 @@ typedef struct NetFilterState NetFilterState; typedef struct NICInfo NICInfo; typedef struct NodeInfo NodeInfo; typedef struct NumaNodeMem NumaNodeMem; +typedef struct Object Object; typedef struct ObjectClass ObjectClass; typedef struct PCIBridge PCIBridge; typedef struct PCIBus PCIBus; diff --git a/include/qom/object.h b/include/qom/object.h index fd453dc8d6..c7c97ead60 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -20,8 +20,6 @@ struct TypeImpl; typedef struct TypeImpl *Type; -typedef struct Object Object; - typedef struct TypeInfo TypeInfo; typedef struct InterfaceClass InterfaceClass; diff --git a/include/qom/qom-qobject.h b/include/qom/qom-qobject.h index 77cd717e3f..82136e6e80 100644 --- a/include/qom/qom-qobject.h +++ b/include/qom/qom-qobject.h @@ -13,8 +13,6 @@ #ifndef QEMU_QOM_QOBJECT_H #define QEMU_QOM_QOBJECT_H -#include "qom/object.h" - /* * object_property_get_qobject: * @obj: the object diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 3efccdba7e..4b6a5c459c 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -5,7 +5,6 @@ #include "qemu/timer.h" #include "qemu/notify.h" #include "qemu/uuid.h" -#include "qom/object.h" /* vl.c */ -- cgit v1.2.3-55-g7522 From 78f8d4975c5d035e2e2447e6e499629b96142db0 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Mon, 4 May 2020 13:56:55 +0200 Subject: io/task: Move 'qom/object.h' header to source We need "qom/object.h" to call object_ref()/object_unref(), and to test the TYPE_DUMMY. Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20200504115656.6045-3-f4bug@amsat.org> Signed-off-by: Paolo Bonzini --- include/io/task.h | 2 -- io/task.c | 1 + tests/test-io-task.c | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/io/task.h b/include/io/task.h index 1abbfb8b65..6818dfedd0 100644 --- a/include/io/task.h +++ b/include/io/task.h @@ -21,8 +21,6 @@ #ifndef QIO_TASK_H #define QIO_TASK_H -#include "qom/object.h" - typedef struct QIOTask QIOTask; typedef void (*QIOTaskFunc)(QIOTask *task, diff --git a/io/task.c b/io/task.c index 1ae7b86488..53c0bed686 100644 --- a/io/task.c +++ b/io/task.c @@ -22,6 +22,7 @@ #include "io/task.h" #include "qapi/error.h" #include "qemu/thread.h" +#include "qom/object.h" #include "trace.h" struct QIOTaskThreadData { diff --git a/tests/test-io-task.c b/tests/test-io-task.c index aa8b653bfa..c8a3813d49 100644 --- a/tests/test-io-task.c +++ b/tests/test-io-task.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" +#include "qom/object.h" #include "io/task.h" #include "qapi/error.h" #include "qemu/module.h" -- cgit v1.2.3-55-g7522 From d42cd96100724bc9fbac18cc17c5f5f9b9b2f181 Mon Sep 17 00:00:00 2001 From: Jon Doron Date: Fri, 24 Apr 2020 15:34:39 +0300 Subject: hyperv: expose API to determine if synic is enabled Signed-off-by: Jon Doron Message-Id: <20200424123444.3481728-2-arilou@gmail.com> Signed-off-by: Paolo Bonzini --- hw/hyperv/hyperv.c | 8 ++++++++ include/hw/hyperv/hyperv.h | 1 + 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c index 4b11f7a76b..a3933c34c6 100644 --- a/hw/hyperv/hyperv.c +++ b/hw/hyperv/hyperv.c @@ -38,6 +38,13 @@ typedef struct SynICState { #define TYPE_SYNIC "hyperv-synic" #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC) +static bool synic_enabled; + +bool hyperv_is_synic_enabled(void) +{ + return synic_enabled; +} + static SynICState *get_synic(CPUState *cs) { return SYNIC(object_resolve_path_component(OBJECT(cs), "synic")); @@ -134,6 +141,7 @@ void hyperv_synic_add(CPUState *cs) object_property_add_child(OBJECT(cs), "synic", obj); object_unref(obj); object_property_set_bool(obj, true, "realized", &error_abort); + synic_enabled = true; } void hyperv_synic_reset(CPUState *cs) diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h index 597381cb01..a63ee0003c 100644 --- a/include/hw/hyperv/hyperv.h +++ b/include/hw/hyperv/hyperv.h @@ -79,5 +79,6 @@ void hyperv_synic_add(CPUState *cs); void hyperv_synic_reset(CPUState *cs); void hyperv_synic_update(CPUState *cs, bool enable, hwaddr msg_page_addr, hwaddr event_page_addr); +bool hyperv_is_synic_enabled(void); #endif -- cgit v1.2.3-55-g7522 From 973b1fbd862c848dde7f710ba1b9ca340235e75f Mon Sep 17 00:00:00 2001 From: Jon Doron Date: Fri, 24 Apr 2020 15:34:40 +0300 Subject: vmbus: add vmbus protocol definitions Add a header with data structures and constants used in Hyper-V VMBus hypervisor <-> guest interactions. Based on the respective stuff from Linux kernel. Signed-off-by: Roman Kagan Signed-off-by: Maciej S. Szmigiero Signed-off-by: Jon Doron Message-Id: <20200424123444.3481728-3-arilou@gmail.com> Signed-off-by: Paolo Bonzini --- include/hw/hyperv/vmbus-proto.h | 222 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 include/hw/hyperv/vmbus-proto.h (limited to 'include') diff --git a/include/hw/hyperv/vmbus-proto.h b/include/hw/hyperv/vmbus-proto.h new file mode 100644 index 0000000000..4628d3b323 --- /dev/null +++ b/include/hw/hyperv/vmbus-proto.h @@ -0,0 +1,222 @@ +/* + * QEMU Hyper-V VMBus support + * + * Copyright (c) 2017-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_VMBUS_PROTO_H +#define HW_HYPERV_VMBUS_PROTO_H + +#define VMBUS_VERSION_WS2008 ((0 << 16) | (13)) +#define VMBUS_VERSION_WIN7 ((1 << 16) | (1)) +#define VMBUS_VERSION_WIN8 ((2 << 16) | (4)) +#define VMBUS_VERSION_WIN8_1 ((3 << 16) | (0)) +#define VMBUS_VERSION_WIN10 ((4 << 16) | (0)) +#define VMBUS_VERSION_INVAL -1 +#define VMBUS_VERSION_CURRENT VMBUS_VERSION_WIN10 + +#define VMBUS_MESSAGE_CONNECTION_ID 1 +#define VMBUS_EVENT_CONNECTION_ID 2 +#define VMBUS_MONITOR_CONNECTION_ID 3 +#define VMBUS_SINT 2 + +#define VMBUS_MSG_INVALID 0 +#define VMBUS_MSG_OFFERCHANNEL 1 +#define VMBUS_MSG_RESCIND_CHANNELOFFER 2 +#define VMBUS_MSG_REQUESTOFFERS 3 +#define VMBUS_MSG_ALLOFFERS_DELIVERED 4 +#define VMBUS_MSG_OPENCHANNEL 5 +#define VMBUS_MSG_OPENCHANNEL_RESULT 6 +#define VMBUS_MSG_CLOSECHANNEL 7 +#define VMBUS_MSG_GPADL_HEADER 8 +#define VMBUS_MSG_GPADL_BODY 9 +#define VMBUS_MSG_GPADL_CREATED 10 +#define VMBUS_MSG_GPADL_TEARDOWN 11 +#define VMBUS_MSG_GPADL_TORNDOWN 12 +#define VMBUS_MSG_RELID_RELEASED 13 +#define VMBUS_MSG_INITIATE_CONTACT 14 +#define VMBUS_MSG_VERSION_RESPONSE 15 +#define VMBUS_MSG_UNLOAD 16 +#define VMBUS_MSG_UNLOAD_RESPONSE 17 +#define VMBUS_MSG_COUNT 18 + +#define VMBUS_MESSAGE_SIZE_ALIGN sizeof(uint64_t) + +#define VMBUS_PACKET_INVALID 0x0 +#define VMBUS_PACKET_SYNCH 0x1 +#define VMBUS_PACKET_ADD_XFER_PAGESET 0x2 +#define VMBUS_PACKET_RM_XFER_PAGESET 0x3 +#define VMBUS_PACKET_ESTABLISH_GPADL 0x4 +#define VMBUS_PACKET_TEARDOWN_GPADL 0x5 +#define VMBUS_PACKET_DATA_INBAND 0x6 +#define VMBUS_PACKET_DATA_USING_XFER_PAGES 0x7 +#define VMBUS_PACKET_DATA_USING_GPADL 0x8 +#define VMBUS_PACKET_DATA_USING_GPA_DIRECT 0x9 +#define VMBUS_PACKET_CANCEL_REQUEST 0xa +#define VMBUS_PACKET_COMP 0xb +#define VMBUS_PACKET_DATA_USING_ADDITIONAL_PKT 0xc +#define VMBUS_PACKET_ADDITIONAL_DATA 0xd + +#define VMBUS_CHANNEL_USER_DATA_SIZE 120 + +#define VMBUS_OFFER_MONITOR_ALLOCATED 0x1 +#define VMBUS_OFFER_INTERRUPT_DEDICATED 0x1 + +#define VMBUS_RING_BUFFER_FEAT_PENDING_SZ (1ul << 0) + +#define VMBUS_CHANNEL_ENUMERATE_DEVICE_INTERFACE 0x1 +#define VMBUS_CHANNEL_SERVER_SUPPORTS_TRANSFER_PAGES 0x2 +#define VMBUS_CHANNEL_SERVER_SUPPORTS_GPADLS 0x4 +#define VMBUS_CHANNEL_NAMED_PIPE_MODE 0x10 +#define VMBUS_CHANNEL_LOOPBACK_OFFER 0x100 +#define VMBUS_CHANNEL_PARENT_OFFER 0x200 +#define VMBUS_CHANNEL_REQUEST_MONITORED_NOTIFICATION 0x400 +#define VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER 0x2000 + +#define VMBUS_PACKET_FLAG_REQUEST_COMPLETION 1 + +typedef struct vmbus_message_header { + uint32_t message_type; + uint32_t _padding; +} vmbus_message_header; + +typedef struct vmbus_message_initiate_contact { + vmbus_message_header header; + uint32_t version_requested; + uint32_t target_vcpu; + uint64_t interrupt_page; + uint64_t monitor_page1; + uint64_t monitor_page2; +} vmbus_message_initiate_contact; + +typedef struct vmbus_message_version_response { + vmbus_message_header header; + uint8_t version_supported; + uint8_t status; +} vmbus_message_version_response; + +typedef struct vmbus_message_offer_channel { + vmbus_message_header header; + uint8_t type_uuid[16]; + uint8_t instance_uuid[16]; + uint64_t _reserved1; + uint64_t _reserved2; + uint16_t channel_flags; + uint16_t mmio_size_mb; + uint8_t user_data[VMBUS_CHANNEL_USER_DATA_SIZE]; + uint16_t sub_channel_index; + uint16_t _reserved3; + uint32_t child_relid; + uint8_t monitor_id; + uint8_t monitor_flags; + uint16_t interrupt_flags; + uint32_t connection_id; +} vmbus_message_offer_channel; + +typedef struct vmbus_message_rescind_channel_offer { + vmbus_message_header header; + uint32_t child_relid; +} vmbus_message_rescind_channel_offer; + +typedef struct vmbus_gpa_range { + uint32_t byte_count; + uint32_t byte_offset; + uint64_t pfn_array[]; +} vmbus_gpa_range; + +typedef struct vmbus_message_gpadl_header { + vmbus_message_header header; + uint32_t child_relid; + uint32_t gpadl_id; + uint16_t range_buflen; + uint16_t rangecount; + vmbus_gpa_range range[]; +} QEMU_PACKED vmbus_message_gpadl_header; + +typedef struct vmbus_message_gpadl_body { + vmbus_message_header header; + uint32_t message_number; + uint32_t gpadl_id; + uint64_t pfn_array[]; +} vmbus_message_gpadl_body; + +typedef struct vmbus_message_gpadl_created { + vmbus_message_header header; + uint32_t child_relid; + uint32_t gpadl_id; + uint32_t status; +} vmbus_message_gpadl_created; + +typedef struct vmbus_message_gpadl_teardown { + vmbus_message_header header; + uint32_t child_relid; + uint32_t gpadl_id; +} vmbus_message_gpadl_teardown; + +typedef struct vmbus_message_gpadl_torndown { + vmbus_message_header header; + uint32_t gpadl_id; +} vmbus_message_gpadl_torndown; + +typedef struct vmbus_message_open_channel { + vmbus_message_header header; + uint32_t child_relid; + uint32_t open_id; + uint32_t ring_buffer_gpadl_id; + uint32_t target_vp; + uint32_t ring_buffer_offset; + uint8_t user_data[VMBUS_CHANNEL_USER_DATA_SIZE]; +} vmbus_message_open_channel; + +typedef struct vmbus_message_open_result { + vmbus_message_header header; + uint32_t child_relid; + uint32_t open_id; + uint32_t status; +} vmbus_message_open_result; + +typedef struct vmbus_message_close_channel { + vmbus_message_header header; + uint32_t child_relid; +} vmbus_message_close_channel; + +typedef struct vmbus_ring_buffer { + uint32_t write_index; + uint32_t read_index; + uint32_t interrupt_mask; + uint32_t pending_send_sz; + uint32_t _reserved1[12]; + uint32_t feature_bits; +} vmbus_ring_buffer; + +typedef struct vmbus_packet_hdr { + uint16_t type; + uint16_t offset_qwords; + uint16_t len_qwords; + uint16_t flags; + uint64_t transaction_id; +} vmbus_packet_hdr; + +typedef struct vmbus_pkt_gpa_direct { + uint32_t _reserved; + uint32_t rangecount; + vmbus_gpa_range range[]; +} vmbus_pkt_gpa_direct; + +typedef struct vmbus_xferpg_range { + uint32_t byte_count; + uint32_t byte_offset; +} vmbus_xferpg_range; + +typedef struct vmbus_pkt_xferpg { + uint16_t buffer_id; + uint8_t sender_owns_set; + uint8_t _reserved; + uint32_t rangecount; + vmbus_xferpg_range range[]; +} vmbus_pkt_xferpg; + +#endif -- cgit v1.2.3-55-g7522 From 0d71f7082d7aeec8d9767e32dbf7dd86b94b8260 Mon Sep 17 00:00:00 2001 From: Jon Doron Date: Fri, 24 Apr 2020 15:34:41 +0300 Subject: vmbus: vmbus implementation Add the VMBus infrastructure -- bus, devices, root bridge, vmbus state machine, vmbus channel interactions, etc. VMBus is a collection of technologies. At its lowest layer, it's a message passing and signaling mechanism, allowing efficient passing of messages to and from guest VMs. A layer higher, it's a mechanism for defining channels of communication, where each channel is tagged with a type (which implies a protocol) and a instance ID. A layer higher than that, it's a bus driver, serving as the basis of device enumeration within a VM, where a channel can optionally be exposed as a paravirtual device. When a server-side (paravirtual back-end) component wishes to offer a channel to a guest VM, it does so by specifying a channel type, a mode, and an instance ID. VMBus then exposes this in the guest. More information about VMBus can be found in the file vmbuskernelmodeclientlibapi.h in Microsoft's WDK. TODO: - split into smaller palatable pieces - more comments - check and handle corner cases Kudos to Evgeny Yakovlev (formerly eyakovlev@virtuozzo.com) and Andrey Smetatin (formerly asmetanin@virtuozzo.com) for research and prototyping. Signed-off-by: Roman Kagan Signed-off-by: Maciej S. Szmigiero Signed-off-by: Jon Doron Message-Id: <20200424123444.3481728-4-arilou@gmail.com> Signed-off-by: Paolo Bonzini --- Makefile.objs | 1 + hw/hyperv/Kconfig | 5 + hw/hyperv/Makefile.objs | 1 + hw/hyperv/trace-events | 18 + hw/hyperv/vmbus.c | 2672 ++++++++++++++++++++++++++++++++++++++ include/hw/hyperv/vmbus-bridge.h | 32 + include/hw/hyperv/vmbus.h | 227 ++++ 7 files changed, 2956 insertions(+) create mode 100644 hw/hyperv/trace-events create mode 100644 hw/hyperv/vmbus.c create mode 100644 include/hw/hyperv/vmbus-bridge.h create mode 100644 include/hw/hyperv/vmbus.h (limited to 'include') diff --git a/Makefile.objs b/Makefile.objs index 99774cfd25..c09d95dfe3 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -150,6 +150,7 @@ trace-events-subdirs += hw/block/dataplane trace-events-subdirs += hw/char trace-events-subdirs += hw/dma trace-events-subdirs += hw/hppa +trace-events-subdirs += hw/hyperv trace-events-subdirs += hw/i2c trace-events-subdirs += hw/i386 trace-events-subdirs += hw/i386/xen diff --git a/hw/hyperv/Kconfig b/hw/hyperv/Kconfig index a1fa8ff9be..3fbfe41c9e 100644 --- a/hw/hyperv/Kconfig +++ b/hw/hyperv/Kconfig @@ -6,3 +6,8 @@ config HYPERV_TESTDEV bool default y if TEST_DEVICES depends on HYPERV + +config VMBUS + bool + default y + depends on HYPERV diff --git a/hw/hyperv/Makefile.objs b/hw/hyperv/Makefile.objs index edaca2f763..5b614e040c 100644 --- a/hw/hyperv/Makefile.objs +++ b/hw/hyperv/Makefile.objs @@ -1,2 +1,3 @@ obj-y += hyperv.o obj-$(CONFIG_HYPERV_TESTDEV) += hyperv_testdev.o +obj-$(CONFIG_VMBUS) += vmbus.o diff --git a/hw/hyperv/trace-events b/hw/hyperv/trace-events new file mode 100644 index 0000000000..ba5bd62d61 --- /dev/null +++ b/hw/hyperv/trace-events @@ -0,0 +1,18 @@ +# vmbus +vmbus_recv_message(uint32_t type, uint32_t size) "type %d size %d" +vmbus_signal_event(void) "" +vmbus_channel_notify_guest(uint32_t chan_id) "channel #%d" +vmbus_post_msg(uint32_t type, uint32_t size) "type %d size %d" +vmbus_msg_cb(int status) "message status %d" +vmbus_process_incoming_message(uint32_t message_type) "type %d" +vmbus_initiate_contact(uint16_t major, uint16_t minor, uint32_t vcpu, uint64_t monitor_page1, uint64_t monitor_page2, uint64_t interrupt_page) "version %d.%d target vp %d mon pages 0x%"PRIx64",0x%"PRIx64" int page 0x%"PRIx64 +vmbus_send_offer(uint32_t chan_id, void *dev) "channel #%d dev %p" +vmbus_terminate_offers(void) "" +vmbus_gpadl_header(uint32_t gpadl_id, uint16_t num_gfns) "gpadl #%d gfns %d" +vmbus_gpadl_body(uint32_t gpadl_id) "gpadl #%d" +vmbus_gpadl_created(uint32_t gpadl_id) "gpadl #%d" +vmbus_gpadl_teardown(uint32_t gpadl_id) "gpadl #%d" +vmbus_gpadl_torndown(uint32_t gpadl_id) "gpadl #%d" +vmbus_open_channel(uint32_t chan_id, uint32_t gpadl_id, uint32_t target_vp) "channel #%d gpadl #%d target vp %d" +vmbus_channel_open(uint32_t chan_id, uint32_t status) "channel #%d status %d" +vmbus_close_channel(uint32_t chan_id) "channel #%d" diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c new file mode 100644 index 0000000000..802bbc8c96 --- /dev/null +++ b/hw/hyperv/vmbus.c @@ -0,0 +1,2672 @@ +/* + * QEMU Hyper-V VMBus + * + * Copyright (c) 2017-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/main-loop.h" +#include "qapi/error.h" +#include "migration/vmstate.h" +#include "hw/qdev-properties.h" +#include "hw/hyperv/hyperv.h" +#include "hw/hyperv/vmbus.h" +#include "hw/hyperv/vmbus-bridge.h" +#include "hw/sysbus.h" +#include "cpu.h" +#include "trace.h" + +#define TYPE_VMBUS "vmbus" +#define VMBUS(obj) OBJECT_CHECK(VMBus, (obj), TYPE_VMBUS) + +enum { + VMGPADL_INIT, + VMGPADL_ALIVE, + VMGPADL_TEARINGDOWN, + VMGPADL_TORNDOWN, +}; + +struct VMBusGpadl { + /* GPADL id */ + uint32_t id; + /* associated channel id (rudimentary?) */ + uint32_t child_relid; + + /* number of pages in the GPADL as declared in GPADL_HEADER message */ + uint32_t num_gfns; + /* + * Due to limited message size, GPADL may not fit fully in a single + * GPADL_HEADER message, and is further popluated using GPADL_BODY + * messages. @seen_gfns is the number of pages seen so far; once it + * reaches @num_gfns, the GPADL is ready to use. + */ + uint32_t seen_gfns; + /* array of GFNs (of size @num_gfns once allocated) */ + uint64_t *gfns; + + uint8_t state; + + QTAILQ_ENTRY(VMBusGpadl) link; + VMBus *vmbus; + unsigned refcount; +}; + +/* + * Wrap sequential read from / write to GPADL. + */ +typedef struct GpadlIter { + VMBusGpadl *gpadl; + AddressSpace *as; + DMADirection dir; + /* offset into GPADL where the next i/o will be performed */ + uint32_t off; + /* + * Cached mapping of the currently accessed page, up to page boundary. + * Updated lazily on i/o. + * Note: MemoryRegionCache can not be used here because pages in the GPADL + * are non-contiguous and may belong to different memory regions. + */ + void *map; + /* offset after last i/o (i.e. not affected by seek) */ + uint32_t last_off; + /* + * Indicator that the iterator is active and may have a cached mapping. + * Allows to enforce bracketing of all i/o (which may create cached + * mappings) and thus exclude mapping leaks. + */ + bool active; +} GpadlIter; + +/* + * Ring buffer. There are two of them, sitting in the same GPADL, for each + * channel. + * Each ring buffer consists of a set of pages, with the first page containing + * the ring buffer header, and the remaining pages being for data packets. + */ +typedef struct VMBusRingBufCommon { + AddressSpace *as; + /* GPA of the ring buffer header */ + dma_addr_t rb_addr; + /* start and length of the ring buffer data area within GPADL */ + uint32_t base; + uint32_t len; + + GpadlIter iter; +} VMBusRingBufCommon; + +typedef struct VMBusSendRingBuf { + VMBusRingBufCommon common; + /* current write index, to be committed at the end of send */ + uint32_t wr_idx; + /* write index at the start of send */ + uint32_t last_wr_idx; + /* space to be requested from the guest */ + uint32_t wanted; + /* space reserved for planned sends */ + uint32_t reserved; + /* last seen read index */ + uint32_t last_seen_rd_idx; +} VMBusSendRingBuf; + +typedef struct VMBusRecvRingBuf { + VMBusRingBufCommon common; + /* current read index, to be committed at the end of receive */ + uint32_t rd_idx; + /* read index at the start of receive */ + uint32_t last_rd_idx; + /* last seen write index */ + uint32_t last_seen_wr_idx; +} VMBusRecvRingBuf; + + +enum { + VMOFFER_INIT, + VMOFFER_SENDING, + VMOFFER_SENT, +}; + +enum { + VMCHAN_INIT, + VMCHAN_OPENING, + VMCHAN_OPEN, +}; + +struct VMBusChannel { + VMBusDevice *dev; + + /* channel id */ + uint32_t id; + /* + * subchannel index within the device; subchannel #0 is "primary" and + * always exists + */ + uint16_t subchan_idx; + uint32_t open_id; + /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */ + uint32_t target_vp; + /* GPADL id to use for the ring buffers */ + uint32_t ringbuf_gpadl; + /* start (in pages) of the send ring buffer within @ringbuf_gpadl */ + uint32_t ringbuf_send_offset; + + uint8_t offer_state; + uint8_t state; + bool is_open; + + /* main device worker; copied from the device class */ + VMBusChannelNotifyCb notify_cb; + /* + * guest->host notifications, either sent directly or dispatched via + * interrupt page (older VMBus) + */ + EventNotifier notifier; + + VMBus *vmbus; + /* + * SINT route to signal with host->guest notifications; may be shared with + * the main VMBus SINT route + */ + HvSintRoute *notify_route; + VMBusGpadl *gpadl; + + VMBusSendRingBuf send_ringbuf; + VMBusRecvRingBuf recv_ringbuf; + + QTAILQ_ENTRY(VMBusChannel) link; +}; + +/* + * Hyper-V spec mandates that every message port has 16 buffers, which means + * that the guest can post up to this many messages without blocking. + * Therefore a queue for incoming messages has to be provided. + * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just + * doesn't transition to a new state until the message is known to have been + * successfully delivered to the respective SynIC message slot. + */ +#define HV_MSG_QUEUE_LEN 16 + +/* Hyper-V devices never use channel #0. Must be something special. */ +#define VMBUS_FIRST_CHANID 1 +/* Each channel occupies one bit within a single event page sint slot. */ +#define VMBUS_CHANID_COUNT (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID) +/* Leave a few connection numbers for other purposes. */ +#define VMBUS_CHAN_CONNECTION_OFFSET 16 + +/* + * Since the success or failure of sending a message is reported + * asynchronously, the VMBus state machine has effectively two entry points: + * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest + * message delivery status becomes known). Both are run as oneshot BHs on the + * main aio context, ensuring serialization. + */ +enum { + VMBUS_LISTEN, + VMBUS_HANDSHAKE, + VMBUS_OFFER, + VMBUS_CREATE_GPADL, + VMBUS_TEARDOWN_GPADL, + VMBUS_OPEN_CHANNEL, + VMBUS_UNLOAD, + VMBUS_STATE_MAX +}; + +struct VMBus { + BusState parent; + + uint8_t state; + /* protection against recursive aio_poll (see vmbus_run) */ + bool in_progress; + /* whether there's a message being delivered to the guest */ + bool msg_in_progress; + uint32_t version; + /* VP_INDEX of the vCPU to send messages and interrupts to */ + uint32_t target_vp; + HvSintRoute *sint_route; + /* + * interrupt page for older protocol versions; newer ones use SynIC event + * flags directly + */ + hwaddr int_page_gpa; + + DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT); + + /* incoming message queue */ + struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN]; + uint8_t rx_queue_head; + uint8_t rx_queue_size; + QemuMutex rx_queue_lock; + + QTAILQ_HEAD(, VMBusGpadl) gpadl_list; + QTAILQ_HEAD(, VMBusChannel) channel_list; + + /* + * guest->host notifications for older VMBus, to be dispatched via + * interrupt page + */ + EventNotifier notifier; +}; + +static bool gpadl_full(VMBusGpadl *gpadl) +{ + return gpadl->seen_gfns == gpadl->num_gfns; +} + +static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id, + uint32_t child_relid, uint32_t num_gfns) +{ + VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1); + + gpadl->id = id; + gpadl->child_relid = child_relid; + gpadl->num_gfns = num_gfns; + gpadl->gfns = g_new(uint64_t, num_gfns); + QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link); + gpadl->vmbus = vmbus; + gpadl->refcount = 1; + return gpadl; +} + +static void free_gpadl(VMBusGpadl *gpadl) +{ + QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link); + g_free(gpadl->gfns); + g_free(gpadl); +} + +static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id) +{ + VMBusGpadl *gpadl; + QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { + if (gpadl->id == gpadl_id) { + return gpadl; + } + } + return NULL; +} + +VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id) +{ + VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id); + if (!gpadl || !gpadl_full(gpadl)) { + return NULL; + } + gpadl->refcount++; + return gpadl; +} + +void vmbus_put_gpadl(VMBusGpadl *gpadl) +{ + if (!gpadl) { + return; + } + if (--gpadl->refcount) { + return; + } + free_gpadl(gpadl); +} + +uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl) +{ + return gpadl->num_gfns * TARGET_PAGE_SIZE; +} + +static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl, + AddressSpace *as, DMADirection dir) +{ + iter->gpadl = gpadl; + iter->as = as; + iter->dir = dir; + iter->active = false; +} + +static inline void gpadl_iter_cache_unmap(GpadlIter *iter) +{ + uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK; + uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1; + + /* mapping is only done to do non-zero amount of i/o */ + assert(iter->last_off > 0); + assert(map_start_in_page < io_end_in_page); + + dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page, + iter->dir, io_end_in_page - map_start_in_page); +} + +/* + * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf. + * The direction of the copy is determined by @iter->dir. + * The caller must ensure the operation overflows neither @buf nor the GPADL + * (there's an assert for the latter). + * Reuse the currently mapped page in the GPADL if possible. + */ +static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len) +{ + ssize_t ret = len; + + assert(iter->active); + + while (len) { + uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK; + uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page; + uint32_t cplen = MIN(pgleft, len); + void *p; + + /* try to reuse the cached mapping */ + if (iter->map) { + uint32_t map_start_in_page = + (uintptr_t)iter->map & ~TARGET_PAGE_MASK; + uint32_t off_base = iter->off & ~TARGET_PAGE_MASK; + uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK; + if (off_base != mapped_base || off_in_page < map_start_in_page) { + gpadl_iter_cache_unmap(iter); + iter->map = NULL; + } + } + + if (!iter->map) { + dma_addr_t maddr; + dma_addr_t mlen = pgleft; + uint32_t idx = iter->off >> TARGET_PAGE_BITS; + assert(idx < iter->gpadl->num_gfns); + + maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page; + + iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir); + if (mlen != pgleft) { + dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0); + iter->map = NULL; + return -EFAULT; + } + } + + p = (void *)(((uintptr_t)iter->map & TARGET_PAGE_MASK) | off_in_page); + if (iter->dir == DMA_DIRECTION_FROM_DEVICE) { + memcpy(p, buf, cplen); + } else { + memcpy(buf, p, cplen); + } + + buf += cplen; + len -= cplen; + iter->off += cplen; + iter->last_off = iter->off; + } + + return ret; +} + +/* + * Position the iterator @iter at new offset @new_off. + * If this results in the cached mapping being unusable with the new offset, + * unmap it. + */ +static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off) +{ + assert(iter->active); + iter->off = new_off; +} + +/* + * Start a series of i/o on the GPADL. + * After this i/o and seek operations on @iter become legal. + */ +static inline void gpadl_iter_start_io(GpadlIter *iter) +{ + assert(!iter->active); + /* mapping is cached lazily on i/o */ + iter->map = NULL; + iter->active = true; +} + +/* + * End the eariler started series of i/o on the GPADL and release the cached + * mapping if any. + */ +static inline void gpadl_iter_end_io(GpadlIter *iter) +{ + assert(iter->active); + + if (iter->map) { + gpadl_iter_cache_unmap(iter); + } + + iter->active = false; +} + +static void vmbus_resched(VMBus *vmbus); +static void vmbus_msg_cb(void *data, int status); + +ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off, + const struct iovec *iov, size_t iov_cnt) +{ + GpadlIter iter; + size_t i; + ssize_t ret = 0; + + gpadl_iter_init(&iter, gpadl, chan->dev->dma_as, + DMA_DIRECTION_FROM_DEVICE); + gpadl_iter_start_io(&iter); + gpadl_iter_seek(&iter, off); + for (i = 0; i < iov_cnt; i++) { + ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len); + if (ret < 0) { + goto out; + } + } +out: + gpadl_iter_end_io(&iter); + return ret; +} + +int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, + unsigned iov_cnt, size_t len, size_t off) +{ + int ret_cnt = 0, ret; + unsigned i; + QEMUSGList *sgl = &req->sgl; + ScatterGatherEntry *sg = sgl->sg; + + for (i = 0; i < sgl->nsg; i++) { + if (sg[i].len > off) { + break; + } + off -= sg[i].len; + } + for (; len && i < sgl->nsg; i++) { + dma_addr_t mlen = MIN(sg[i].len - off, len); + dma_addr_t addr = sg[i].base + off; + len -= mlen; + off = 0; + + for (; mlen; ret_cnt++) { + dma_addr_t l = mlen; + dma_addr_t a = addr; + + if (ret_cnt == iov_cnt) { + ret = -ENOBUFS; + goto err; + } + + iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir); + if (!l) { + ret = -EFAULT; + goto err; + } + iov[ret_cnt].iov_len = l; + addr += l; + mlen -= l; + } + } + + return ret_cnt; +err: + vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0); + return ret; +} + +void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, + unsigned iov_cnt, size_t accessed) +{ + QEMUSGList *sgl = &req->sgl; + unsigned i; + + for (i = 0; i < iov_cnt; i++) { + size_t acsd = MIN(accessed, iov[i].iov_len); + dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd); + accessed -= acsd; + } +} + +static const VMStateDescription vmstate_gpadl = { + .name = "vmbus/gpadl", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT32(id, VMBusGpadl), + VMSTATE_UINT32(child_relid, VMBusGpadl), + VMSTATE_UINT32(num_gfns, VMBusGpadl), + VMSTATE_UINT32(seen_gfns, VMBusGpadl), + VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0, + vmstate_info_uint64, uint64_t), + VMSTATE_UINT8(state, VMBusGpadl), + VMSTATE_END_OF_LIST() + } +}; + +/* + * Wrap the index into a ring buffer of @len bytes. + * @idx is assumed not to exceed twice the size of the ringbuffer, so only + * single wraparound is considered. + */ +static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len) +{ + if (idx >= len) { + idx -= len; + } + return idx; +} + +/* + * Circular difference between two indices into a ring buffer of @len bytes. + * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch + * up write index but not vice versa. + */ +static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len, + bool allow_catchup) +{ + return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len); +} + +static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf) +{ + vmbus_ring_buffer *rb; + dma_addr_t mlen = sizeof(*rb); + + rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen, + DMA_DIRECTION_FROM_DEVICE); + if (mlen != sizeof(*rb)) { + dma_memory_unmap(ringbuf->as, rb, mlen, + DMA_DIRECTION_FROM_DEVICE, 0); + return NULL; + } + return rb; +} + +static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf, + vmbus_ring_buffer *rb, bool dirty) +{ + assert(rb); + + dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE, + dirty ? sizeof(*rb) : 0); +} + +static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl, + AddressSpace *as, DMADirection dir, + uint32_t begin, uint32_t end) +{ + ringbuf->as = as; + ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS; + ringbuf->base = (begin + 1) << TARGET_PAGE_BITS; + ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS; + gpadl_iter_init(&ringbuf->iter, gpadl, as, dir); +} + +static int ringbufs_init(VMBusChannel *chan) +{ + vmbus_ring_buffer *rb; + VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf; + VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf; + + if (chan->ringbuf_send_offset <= 1 || + chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) { + return -EINVAL; + } + + ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as, + DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset); + ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as, + DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset, + chan->gpadl->num_gfns); + send_ringbuf->wanted = 0; + send_ringbuf->reserved = 0; + + rb = ringbuf_map_hdr(&recv_ringbuf->common); + if (!rb) { + return -EFAULT; + } + recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index; + ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false); + + rb = ringbuf_map_hdr(&send_ringbuf->common); + if (!rb) { + return -EFAULT; + } + send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index; + send_ringbuf->last_seen_rd_idx = rb->read_index; + rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ; + ringbuf_unmap_hdr(&send_ringbuf->common, rb, true); + + if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len || + send_ringbuf->wr_idx >= send_ringbuf->common.len) { + return -EOVERFLOW; + } + + return 0; +} + +/* + * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping + * around if needed. + * @len is assumed not to exceed the size of the ringbuffer, so only single + * wraparound is considered. + */ +static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len) +{ + ssize_t ret1 = 0, ret2 = 0; + uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off; + + if (len >= remain) { + ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain); + if (ret1 < 0) { + return ret1; + } + gpadl_iter_seek(&ringbuf->iter, ringbuf->base); + buf += remain; + len -= remain; + } + ret2 = gpadl_iter_io(&ringbuf->iter, buf, len); + if (ret2 < 0) { + return ret2; + } + return ret1 + ret2; +} + +/* + * Position the circular iterator within @ringbuf to offset @new_off, wrapping + * around if needed. + * @new_off is assumed not to exceed twice the size of the ringbuffer, so only + * single wraparound is considered. + */ +static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off) +{ + gpadl_iter_seek(&ringbuf->iter, + ringbuf->base + rb_idx_wrap(new_off, ringbuf->len)); +} + +static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf) +{ + return ringbuf->iter.off - ringbuf->base; +} + +static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf) +{ + gpadl_iter_start_io(&ringbuf->iter); +} + +static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf) +{ + gpadl_iter_end_io(&ringbuf->iter); +} + +VMBusDevice *vmbus_channel_device(VMBusChannel *chan) +{ + return chan->dev; +} + +VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx) +{ + if (chan_idx >= dev->num_channels) { + return NULL; + } + return &dev->channels[chan_idx]; +} + +uint32_t vmbus_channel_idx(VMBusChannel *chan) +{ + return chan - chan->dev->channels; +} + +void vmbus_channel_notify_host(VMBusChannel *chan) +{ + event_notifier_set(&chan->notifier); +} + +bool vmbus_channel_is_open(VMBusChannel *chan) +{ + return chan->is_open; +} + +/* + * Notify the guest side about the data to work on in the channel ring buffer. + * The notification is done by signaling a dedicated per-channel SynIC event + * flag (more recent guests) or setting a bit in the interrupt page and firing + * the VMBus SINT (older guests). + */ +static int vmbus_channel_notify_guest(VMBusChannel *chan) +{ + int res = 0; + unsigned long *int_map, mask; + unsigned idx; + hwaddr addr = chan->vmbus->int_page_gpa; + hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0; + + trace_vmbus_channel_notify_guest(chan->id); + + if (!addr) { + return hyperv_set_event_flag(chan->notify_route, chan->id); + } + + int_map = cpu_physical_memory_map(addr, &len, 1); + if (len != TARGET_PAGE_SIZE / 2) { + res = -ENXIO; + goto unmap; + } + + idx = BIT_WORD(chan->id); + mask = BIT_MASK(chan->id); + if ((atomic_fetch_or(&int_map[idx], mask) & mask) != mask) { + res = hyperv_sint_route_set_sint(chan->notify_route); + dirty = len; + } + +unmap: + cpu_physical_memory_unmap(int_map, len, 1, dirty); + return res; +} + +#define VMBUS_PKT_TRAILER sizeof(uint64_t) + +static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr, + uint32_t desclen, uint32_t msglen) +{ + hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) + + DIV_ROUND_UP(desclen, sizeof(uint64_t)); + hdr->len_qwords = hdr->offset_qwords + + DIV_ROUND_UP(msglen, sizeof(uint64_t)); + return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER; +} + +/* + * Simplified ring buffer operation with paired barriers annotations in the + * producer and consumer loops: + * + * producer * consumer + * ~~~~~~~~ * ~~~~~~~~ + * write pending_send_sz * read write_index + * smp_mb [A] * smp_mb [C] + * read read_index * read packet + * smp_mb [B] * read/write out-of-band data + * read/write out-of-band data * smp_mb [B] + * write packet * write read_index + * smp_mb [C] * smp_mb [A] + * write write_index * read pending_send_sz + * smp_wmb [D] * smp_rmb [D] + * write pending_send_sz * read write_index + * ... * ... + */ + +static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf) +{ + /* don't trust guest data */ + if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) { + return 0; + } + return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx, + ringbuf->common.len, false); +} + +static ssize_t ringbuf_send_update_idx(VMBusChannel *chan) +{ + VMBusSendRingBuf *ringbuf = &chan->send_ringbuf; + vmbus_ring_buffer *rb; + uint32_t written; + + written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx, + ringbuf->common.len, true); + if (!written) { + return 0; + } + + rb = ringbuf_map_hdr(&ringbuf->common); + if (!rb) { + return -EFAULT; + } + + ringbuf->reserved -= written; + + /* prevent reorder with the data operation and packet write */ + smp_mb(); /* barrier pair [C] */ + rb->write_index = ringbuf->wr_idx; + + /* + * If the producer earlier indicated that it wants to be notified when the + * consumer frees certain amount of space in the ring buffer, that amount + * is reduced by the size of the completed write. + */ + if (ringbuf->wanted) { + /* otherwise reservation would fail */ + assert(ringbuf->wanted < written); + ringbuf->wanted -= written; + /* prevent reorder with write_index write */ + smp_wmb(); /* barrier pair [D] */ + rb->pending_send_sz = ringbuf->wanted; + } + + /* prevent reorder with write_index or pending_send_sz write */ + smp_mb(); /* barrier pair [A] */ + ringbuf->last_seen_rd_idx = rb->read_index; + + /* + * The consumer may have missed the reduction of pending_send_sz and skip + * notification, so re-check the blocking condition, and, if it's no longer + * true, ensure processing another iteration by simulating consumer's + * notification. + */ + if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) { + vmbus_channel_notify_host(chan); + } + + /* skip notification by consumer's request */ + if (rb->interrupt_mask) { + goto out; + } + + /* + * The consumer hasn't caught up with the producer's previous state so it's + * not blocked. + * (last_seen_rd_idx comes from the guest but it's safe to use w/o + * validation here as it only affects notification.) + */ + if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx, + ringbuf->common.len, true) > written) { + goto out; + } + + vmbus_channel_notify_guest(chan); +out: + ringbuf_unmap_hdr(&ringbuf->common, rb, true); + ringbuf->last_wr_idx = ringbuf->wr_idx; + return written; +} + +int vmbus_channel_reserve(VMBusChannel *chan, + uint32_t desclen, uint32_t msglen) +{ + VMBusSendRingBuf *ringbuf = &chan->send_ringbuf; + vmbus_ring_buffer *rb = NULL; + vmbus_packet_hdr hdr; + uint32_t needed = ringbuf->reserved + + vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen); + + /* avoid touching the guest memory if possible */ + if (likely(needed <= ringbuf_send_avail(ringbuf))) { + goto success; + } + + rb = ringbuf_map_hdr(&ringbuf->common); + if (!rb) { + return -EFAULT; + } + + /* fetch read index from guest memory and try again */ + ringbuf->last_seen_rd_idx = rb->read_index; + + if (likely(needed <= ringbuf_send_avail(ringbuf))) { + goto success; + } + + rb->pending_send_sz = needed; + + /* + * The consumer may have made progress and freed up some space before + * seeing updated pending_send_sz, so re-read read_index (preventing + * reorder with the pending_send_sz write) and try again. + */ + smp_mb(); /* barrier pair [A] */ + ringbuf->last_seen_rd_idx = rb->read_index; + + if (needed > ringbuf_send_avail(ringbuf)) { + goto out; + } + +success: + ringbuf->reserved = needed; + needed = 0; + + /* clear pending_send_sz if it was set */ + if (ringbuf->wanted) { + if (!rb) { + rb = ringbuf_map_hdr(&ringbuf->common); + if (!rb) { + /* failure to clear pending_send_sz is non-fatal */ + goto out; + } + } + + rb->pending_send_sz = 0; + } + + /* prevent reorder of the following data operation with read_index read */ + smp_mb(); /* barrier pair [B] */ + +out: + if (rb) { + ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed); + } + ringbuf->wanted = needed; + return needed ? -ENOSPC : 0; +} + +ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type, + void *desc, uint32_t desclen, + void *msg, uint32_t msglen, + bool need_comp, uint64_t transaction_id) +{ + ssize_t ret = 0; + vmbus_packet_hdr hdr; + uint32_t totlen; + VMBusSendRingBuf *ringbuf = &chan->send_ringbuf; + + if (!vmbus_channel_is_open(chan)) { + return -EINVAL; + } + + totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen); + hdr.type = pkt_type; + hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0; + hdr.transaction_id = transaction_id; + + assert(totlen <= ringbuf->reserved); + + ringbuf_start_io(&ringbuf->common); + ringbuf_seek(&ringbuf->common, ringbuf->wr_idx); + ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)); + if (ret < 0) { + goto out; + } + if (desclen) { + assert(desc); + ret = ringbuf_io(&ringbuf->common, desc, desclen); + if (ret < 0) { + goto out; + } + ringbuf_seek(&ringbuf->common, + ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t)); + } + ret = ringbuf_io(&ringbuf->common, msg, msglen); + if (ret < 0) { + goto out; + } + ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen); + ringbuf->wr_idx = ringbuf_tell(&ringbuf->common); + ret = 0; +out: + ringbuf_end_io(&ringbuf->common); + if (ret) { + return ret; + } + return ringbuf_send_update_idx(chan); +} + +ssize_t vmbus_channel_send_completion(VMBusChanReq *req, + void *msg, uint32_t msglen) +{ + assert(req->need_comp); + return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0, + msg, msglen, false, req->transaction_id); +} + +static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev, + VMBusRingBufCommon *ringbuf, uint32_t len) +{ + int ret; + vmbus_pkt_gpa_direct hdr; + hwaddr curaddr = 0; + hwaddr curlen = 0; + int num; + + if (len < sizeof(hdr)) { + return -EIO; + } + ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr)); + if (ret < 0) { + return ret; + } + len -= sizeof(hdr); + + num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t); + if (num < 0) { + return -EIO; + } + qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as); + + for (; hdr.rangecount; hdr.rangecount--) { + vmbus_gpa_range range; + + if (len < sizeof(range)) { + goto eio; + } + ret = ringbuf_io(ringbuf, &range, sizeof(range)); + if (ret < 0) { + goto err; + } + len -= sizeof(range); + + if (range.byte_offset & TARGET_PAGE_MASK) { + goto eio; + } + + for (; range.byte_count; range.byte_offset = 0) { + uint64_t paddr; + uint32_t plen = MIN(range.byte_count, + TARGET_PAGE_SIZE - range.byte_offset); + + if (len < sizeof(uint64_t)) { + goto eio; + } + ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr)); + if (ret < 0) { + goto err; + } + len -= sizeof(uint64_t); + paddr <<= TARGET_PAGE_BITS; + paddr |= range.byte_offset; + range.byte_count -= plen; + + if (curaddr + curlen == paddr) { + /* consecutive fragments - join */ + curlen += plen; + } else { + if (curlen) { + qemu_sglist_add(sgl, curaddr, curlen); + } + + curaddr = paddr; + curlen = plen; + } + } + } + + if (curlen) { + qemu_sglist_add(sgl, curaddr, curlen); + } + + return 0; +eio: + ret = -EIO; +err: + qemu_sglist_destroy(sgl); + return ret; +} + +static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan, + uint32_t size, uint16_t pkt_type, + uint32_t msglen, uint64_t transaction_id, + bool need_comp) +{ + VMBusChanReq *req; + uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg)); + uint32_t totlen = msgoff + msglen; + + req = g_malloc0(totlen); + req->chan = chan; + req->pkt_type = pkt_type; + req->msg = (void *)req + msgoff; + req->msglen = msglen; + req->transaction_id = transaction_id; + req->need_comp = need_comp; + return req; +} + +int vmbus_channel_recv_start(VMBusChannel *chan) +{ + VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; + vmbus_ring_buffer *rb; + + rb = ringbuf_map_hdr(&ringbuf->common); + if (!rb) { + return -EFAULT; + } + ringbuf->last_seen_wr_idx = rb->write_index; + ringbuf_unmap_hdr(&ringbuf->common, rb, false); + + if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) { + return -EOVERFLOW; + } + + /* prevent reorder of the following data operation with write_index read */ + smp_mb(); /* barrier pair [C] */ + return 0; +} + +void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size) +{ + VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; + vmbus_packet_hdr hdr = {}; + VMBusChanReq *req; + uint32_t avail; + uint32_t totlen, pktlen, msglen, msgoff, desclen; + + assert(size >= sizeof(*req)); + + /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */ + avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx, + ringbuf->common.len, true); + if (avail < sizeof(hdr)) { + return NULL; + } + + ringbuf_seek(&ringbuf->common, ringbuf->rd_idx); + if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) { + return NULL; + } + + pktlen = hdr.len_qwords * sizeof(uint64_t); + totlen = pktlen + VMBUS_PKT_TRAILER; + if (totlen > avail) { + return NULL; + } + + msgoff = hdr.offset_qwords * sizeof(uint64_t); + if (msgoff > pktlen || msgoff < sizeof(hdr)) { + error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen); + return NULL; + } + + msglen = pktlen - msgoff; + + req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id, + hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION); + + switch (hdr.type) { + case VMBUS_PACKET_DATA_USING_GPA_DIRECT: + desclen = msgoff - sizeof(hdr); + if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common, + desclen) < 0) { + error_report("%s: failed to convert GPA ranges to SGL", __func__); + goto free_req; + } + break; + case VMBUS_PACKET_DATA_INBAND: + case VMBUS_PACKET_COMP: + break; + default: + error_report("%s: unexpected msg type: %x", __func__, hdr.type); + goto free_req; + } + + ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff); + if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) { + goto free_req; + } + ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen); + + return req; +free_req: + vmbus_free_req(req); + return NULL; +} + +void vmbus_channel_recv_pop(VMBusChannel *chan) +{ + VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; + ringbuf->rd_idx = ringbuf_tell(&ringbuf->common); +} + +ssize_t vmbus_channel_recv_done(VMBusChannel *chan) +{ + VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; + vmbus_ring_buffer *rb; + uint32_t read; + + read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx, + ringbuf->common.len, true); + if (!read) { + return 0; + } + + rb = ringbuf_map_hdr(&ringbuf->common); + if (!rb) { + return -EFAULT; + } + + /* prevent reorder with the data operation and packet read */ + smp_mb(); /* barrier pair [B] */ + rb->read_index = ringbuf->rd_idx; + + /* prevent reorder of the following pending_send_sz read */ + smp_mb(); /* barrier pair [A] */ + + if (rb->interrupt_mask) { + goto out; + } + + if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) { + uint32_t wr_idx, wr_avail; + uint32_t wanted = rb->pending_send_sz; + + if (!wanted) { + goto out; + } + + /* prevent reorder with pending_send_sz read */ + smp_rmb(); /* barrier pair [D] */ + wr_idx = rb->write_index; + + wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len, + true); + + /* the producer wasn't blocked on the consumer state */ + if (wr_avail >= read + wanted) { + goto out; + } + /* there's not enough space for the producer to make progress */ + if (wr_avail < wanted) { + goto out; + } + } + + vmbus_channel_notify_guest(chan); +out: + ringbuf_unmap_hdr(&ringbuf->common, rb, true); + ringbuf->last_rd_idx = ringbuf->rd_idx; + return read; +} + +void vmbus_free_req(void *req) +{ + VMBusChanReq *r = req; + + if (!req) { + return; + } + + if (r->sgl.dev) { + qemu_sglist_destroy(&r->sgl); + } + g_free(req); +} + +static void channel_event_cb(EventNotifier *e) +{ + VMBusChannel *chan = container_of(e, VMBusChannel, notifier); + if (event_notifier_test_and_clear(e)) { + /* + * All receives are supposed to happen within the device worker, so + * bracket it with ringbuf_start/end_io on the receive ringbuffer, and + * potentially reuse the cached mapping throughout the worker. + * Can't do this for sends as they may happen outside the device + * worker. + */ + VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; + ringbuf_start_io(&ringbuf->common); + chan->notify_cb(chan); + ringbuf_end_io(&ringbuf->common); + + } +} + +static int alloc_chan_id(VMBus *vmbus) +{ + int ret; + + ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0); + if (ret == VMBUS_CHANID_COUNT) { + return -ENOMEM; + } + return ret + VMBUS_FIRST_CHANID; +} + +static int register_chan_id(VMBusChannel *chan) +{ + return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID, + chan->vmbus->chanid_bitmap) ? -EEXIST : 0; +} + +static void unregister_chan_id(VMBusChannel *chan) +{ + clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap); +} + +static uint32_t chan_connection_id(VMBusChannel *chan) +{ + return VMBUS_CHAN_CONNECTION_OFFSET + chan->id; +} + +static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc, + VMBusChannel *chan, uint16_t idx, Error **errp) +{ + int res; + + chan->dev = dev; + chan->notify_cb = vdc->chan_notify_cb; + chan->subchan_idx = idx; + chan->vmbus = vmbus; + + res = alloc_chan_id(vmbus); + if (res < 0) { + error_setg(errp, "no spare channel id"); + return; + } + chan->id = res; + register_chan_id(chan); + + /* + * The guest drivers depend on the device subchannels (idx #1+) to be + * offered after the primary channel (idx #0) of that device. To ensure + * that, record the channels on the channel list in the order they appear + * within the device. + */ + QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link); +} + +static void deinit_channel(VMBusChannel *chan) +{ + assert(chan->state == VMCHAN_INIT); + QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link); + unregister_chan_id(chan); +} + +static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp) +{ + uint16_t i; + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev); + Error *err = NULL; + + dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1; + if (dev->num_channels < 1) { + error_setg(&err, "invalid #channels: %u", dev->num_channels); + goto error_out; + } + + dev->channels = g_new0(VMBusChannel, dev->num_channels); + for (i = 0; i < dev->num_channels; i++) { + init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err); + if (err) { + goto err_init; + } + } + + return; + +err_init: + while (i--) { + deinit_channel(&dev->channels[i]); + } +error_out: + error_propagate(errp, err); +} + +static void free_channels(VMBusDevice *dev) +{ + uint16_t i; + for (i = 0; i < dev->num_channels; i++) { + deinit_channel(&dev->channels[i]); + } + g_free(dev->channels); +} + +static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index) +{ + VMBusChannel *chan; + + if (vp_index == vmbus->target_vp) { + hyperv_sint_route_ref(vmbus->sint_route); + return vmbus->sint_route; + } + + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { + if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) { + hyperv_sint_route_ref(chan->notify_route); + return chan->notify_route; + } + } + + return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL); +} + +static void open_channel(VMBusChannel *chan) +{ + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev); + + chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl); + if (!chan->gpadl) { + return; + } + + if (ringbufs_init(chan)) { + goto put_gpadl; + } + + if (event_notifier_init(&chan->notifier, 0)) { + goto put_gpadl; + } + + event_notifier_set_handler(&chan->notifier, channel_event_cb); + + if (hyperv_set_event_flag_handler(chan_connection_id(chan), + &chan->notifier)) { + goto cleanup_notifier; + } + + chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp); + if (!chan->notify_route) { + goto clear_event_flag_handler; + } + + if (vdc->open_channel && vdc->open_channel(chan)) { + goto unref_sint_route; + } + + chan->is_open = true; + return; + +unref_sint_route: + hyperv_sint_route_unref(chan->notify_route); +clear_event_flag_handler: + hyperv_set_event_flag_handler(chan_connection_id(chan), NULL); +cleanup_notifier: + event_notifier_set_handler(&chan->notifier, NULL); + event_notifier_cleanup(&chan->notifier); +put_gpadl: + vmbus_put_gpadl(chan->gpadl); +} + +static void close_channel(VMBusChannel *chan) +{ + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev); + + if (!chan->is_open) { + return; + } + + if (vdc->close_channel) { + vdc->close_channel(chan); + } + + hyperv_sint_route_unref(chan->notify_route); + hyperv_set_event_flag_handler(chan_connection_id(chan), NULL); + event_notifier_set_handler(&chan->notifier, NULL); + event_notifier_cleanup(&chan->notifier); + vmbus_put_gpadl(chan->gpadl); + chan->is_open = false; +} + +static int channel_post_load(void *opaque, int version_id) +{ + VMBusChannel *chan = opaque; + + return register_chan_id(chan); +} + +static const VMStateDescription vmstate_channel = { + .name = "vmbus/channel", + .version_id = 0, + .minimum_version_id = 0, + .post_load = channel_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT32(id, VMBusChannel), + VMSTATE_UINT16(subchan_idx, VMBusChannel), + VMSTATE_UINT32(open_id, VMBusChannel), + VMSTATE_UINT32(target_vp, VMBusChannel), + VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel), + VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel), + VMSTATE_UINT8(offer_state, VMBusChannel), + VMSTATE_UINT8(state, VMBusChannel), + VMSTATE_END_OF_LIST() + } +}; + +static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id) +{ + VMBusChannel *chan; + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { + if (chan->id == id) { + return chan; + } + } + return NULL; +} + +static int enqueue_incoming_message(VMBus *vmbus, + const struct hyperv_post_message_input *msg) +{ + int ret = 0; + uint8_t idx, prev_size; + + qemu_mutex_lock(&vmbus->rx_queue_lock); + + if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) { + ret = -ENOBUFS; + goto out; + } + + prev_size = vmbus->rx_queue_size; + idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN; + memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg)); + vmbus->rx_queue_size++; + + /* only need to resched if the queue was empty before */ + if (!prev_size) { + vmbus_resched(vmbus); + } +out: + qemu_mutex_unlock(&vmbus->rx_queue_lock); + return ret; +} + +static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg, + void *data) +{ + VMBus *vmbus = data; + struct vmbus_message_header *vmbus_msg; + + if (msg->message_type != HV_MESSAGE_VMBUS) { + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } + + if (msg->payload_size < sizeof(struct vmbus_message_header)) { + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } + + vmbus_msg = (struct vmbus_message_header *)msg->payload; + + trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size); + + if (vmbus_msg->message_type == VMBUS_MSG_INVALID || + vmbus_msg->message_type >= VMBUS_MSG_COUNT) { + error_report("vmbus: unknown message type %#x", + vmbus_msg->message_type); + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } + + if (enqueue_incoming_message(vmbus, msg)) { + return HV_STATUS_INSUFFICIENT_BUFFERS; + } + return HV_STATUS_SUCCESS; +} + +static bool vmbus_initialized(VMBus *vmbus) +{ + return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT; +} + +static void vmbus_reset_all(VMBus *vmbus) +{ + qbus_reset_all(BUS(vmbus)); +} + +static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen) +{ + int ret; + struct hyperv_message msg = { + .header.message_type = HV_MESSAGE_VMBUS, + }; + + assert(!vmbus->msg_in_progress); + assert(msglen <= sizeof(msg.payload)); + assert(msglen >= sizeof(struct vmbus_message_header)); + + vmbus->msg_in_progress = true; + + trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type, + msglen); + + memcpy(msg.payload, msgdata, msglen); + msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN); + + ret = hyperv_post_msg(vmbus->sint_route, &msg); + if (ret == 0 || ret == -EAGAIN) { + return; + } + + error_report("message delivery fatal failure: %d; aborting vmbus", ret); + vmbus_reset_all(vmbus); +} + +static int vmbus_init(VMBus *vmbus) +{ + if (vmbus->target_vp != (uint32_t)-1) { + vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT, + vmbus_msg_cb, vmbus); + if (!vmbus->sint_route) { + error_report("failed to set up SINT route"); + return -ENOMEM; + } + } + return 0; +} + +static void vmbus_deinit(VMBus *vmbus) +{ + VMBusGpadl *gpadl, *tmp_gpadl; + VMBusChannel *chan; + + QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) { + if (gpadl->state == VMGPADL_TORNDOWN) { + continue; + } + vmbus_put_gpadl(gpadl); + } + + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { + chan->offer_state = VMOFFER_INIT; + } + + hyperv_sint_route_unref(vmbus->sint_route); + vmbus->sint_route = NULL; + vmbus->int_page_gpa = 0; + vmbus->target_vp = (uint32_t)-1; + vmbus->version = 0; + vmbus->state = VMBUS_LISTEN; + vmbus->msg_in_progress = false; +} + +static void handle_initiate_contact(VMBus *vmbus, + vmbus_message_initiate_contact *msg, + uint32_t msglen) +{ + if (msglen < sizeof(*msg)) { + return; + } + + trace_vmbus_initiate_contact(msg->version_requested >> 16, + msg->version_requested & 0xffff, + msg->target_vcpu, msg->monitor_page1, + msg->monitor_page2, msg->interrupt_page); + + /* + * Reset vmbus on INITIATE_CONTACT regardless of its previous state. + * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down + * before handing over to OS loader. + */ + vmbus_reset_all(vmbus); + + vmbus->target_vp = msg->target_vcpu; + vmbus->version = msg->version_requested; + if (vmbus->version < VMBUS_VERSION_WIN8) { + /* linux passes interrupt page even when it doesn't need it */ + vmbus->int_page_gpa = msg->interrupt_page; + } + vmbus->state = VMBUS_HANDSHAKE; + + if (vmbus_init(vmbus)) { + error_report("failed to init vmbus; aborting"); + vmbus_deinit(vmbus); + return; + } +} + +static void send_handshake(VMBus *vmbus) +{ + struct vmbus_message_version_response msg = { + .header.message_type = VMBUS_MSG_VERSION_RESPONSE, + .version_supported = vmbus_initialized(vmbus), + }; + + post_msg(vmbus, &msg, sizeof(msg)); +} + +static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen) +{ + VMBusChannel *chan; + + if (!vmbus_initialized(vmbus)) { + return; + } + + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { + if (chan->offer_state == VMOFFER_INIT) { + chan->offer_state = VMOFFER_SENDING; + break; + } + } + + vmbus->state = VMBUS_OFFER; +} + +static void send_offer(VMBus *vmbus) +{ + VMBusChannel *chan; + struct vmbus_message_header alloffers_msg = { + .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED, + }; + + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { + if (chan->offer_state == VMOFFER_SENDING) { + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev); + /* Hyper-V wants LE GUIDs */ + QemuUUID classid = qemu_uuid_bswap(vdc->classid); + QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid); + struct vmbus_message_offer_channel msg = { + .header.message_type = VMBUS_MSG_OFFERCHANNEL, + .child_relid = chan->id, + .connection_id = chan_connection_id(chan), + .channel_flags = vdc->channel_flags, + .mmio_size_mb = vdc->mmio_size_mb, + .sub_channel_index = vmbus_channel_idx(chan), + .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED, + }; + + memcpy(msg.type_uuid, &classid, sizeof(classid)); + memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid)); + + trace_vmbus_send_offer(chan->id, chan->dev); + + post_msg(vmbus, &msg, sizeof(msg)); + return; + } + } + + /* no more offers, send terminator message */ + trace_vmbus_terminate_offers(); + post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg)); +} + +static bool complete_offer(VMBus *vmbus) +{ + VMBusChannel *chan; + + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { + if (chan->offer_state == VMOFFER_SENDING) { + chan->offer_state = VMOFFER_SENT; + goto next_offer; + } + } + /* + * no transitioning channels found so this is completing the terminator + * message, and vmbus can move to the next state + */ + return true; + +next_offer: + /* try to mark another channel for offering */ + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { + if (chan->offer_state == VMOFFER_INIT) { + chan->offer_state = VMOFFER_SENDING; + break; + } + } + /* + * if an offer has been sent there are more offers or the terminator yet to + * send, so no state transition for vmbus + */ + return false; +} + + +static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg, + uint32_t msglen) +{ + VMBusGpadl *gpadl; + uint32_t num_gfns, i; + + /* must include at least one gpa range */ + if (msglen < sizeof(*msg) + sizeof(msg->range[0]) || + !vmbus_initialized(vmbus)) { + return; + } + + num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) / + sizeof(msg->range[0].pfn_array[0]); + + trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns); + + /* + * In theory the GPADL_HEADER message can define a GPADL with multiple GPA + * ranges each with arbitrary size and alignment. However in practice only + * single-range page-aligned GPADLs have been observed so just ignore + * anything else and simplify things greatly. + */ + if (msg->rangecount != 1 || msg->range[0].byte_offset || + (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) { + return; + } + + /* ignore requests to create already existing GPADLs */ + if (find_gpadl(vmbus, msg->gpadl_id)) { + return; + } + + gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns); + + for (i = 0; i < num_gfns && + (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen; + i++) { + gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i]; + } + + if (gpadl_full(gpadl)) { + vmbus->state = VMBUS_CREATE_GPADL; + } +} + +static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg, + uint32_t msglen) +{ + VMBusGpadl *gpadl; + uint32_t num_gfns_left, i; + + if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { + return; + } + + trace_vmbus_gpadl_body(msg->gpadl_id); + + gpadl = find_gpadl(vmbus, msg->gpadl_id); + if (!gpadl) { + return; + } + + num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns; + assert(num_gfns_left); + + for (i = 0; i < num_gfns_left && + (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) { + gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i]; + } + + if (gpadl_full(gpadl)) { + vmbus->state = VMBUS_CREATE_GPADL; + } +} + +static void send_create_gpadl(VMBus *vmbus) +{ + VMBusGpadl *gpadl; + + QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { + if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) { + struct vmbus_message_gpadl_created msg = { + .header.message_type = VMBUS_MSG_GPADL_CREATED, + .gpadl_id = gpadl->id, + .child_relid = gpadl->child_relid, + }; + + trace_vmbus_gpadl_created(gpadl->id); + post_msg(vmbus, &msg, sizeof(msg)); + return; + } + } + + assert(false); +} + +static bool complete_create_gpadl(VMBus *vmbus) +{ + VMBusGpadl *gpadl; + + QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { + if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) { + gpadl->state = VMGPADL_ALIVE; + + return true; + } + } + + assert(false); + return false; +} + +static void handle_gpadl_teardown(VMBus *vmbus, + vmbus_message_gpadl_teardown *msg, + uint32_t msglen) +{ + VMBusGpadl *gpadl; + + if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { + return; + } + + trace_vmbus_gpadl_teardown(msg->gpadl_id); + + gpadl = find_gpadl(vmbus, msg->gpadl_id); + if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) { + return; + } + + gpadl->state = VMGPADL_TEARINGDOWN; + vmbus->state = VMBUS_TEARDOWN_GPADL; +} + +static void send_teardown_gpadl(VMBus *vmbus) +{ + VMBusGpadl *gpadl; + + QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { + if (gpadl->state == VMGPADL_TEARINGDOWN) { + struct vmbus_message_gpadl_torndown msg = { + .header.message_type = VMBUS_MSG_GPADL_TORNDOWN, + .gpadl_id = gpadl->id, + }; + + trace_vmbus_gpadl_torndown(gpadl->id); + post_msg(vmbus, &msg, sizeof(msg)); + return; + } + } + + assert(false); +} + +static bool complete_teardown_gpadl(VMBus *vmbus) +{ + VMBusGpadl *gpadl; + + QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { + if (gpadl->state == VMGPADL_TEARINGDOWN) { + gpadl->state = VMGPADL_TORNDOWN; + vmbus_put_gpadl(gpadl); + return true; + } + } + + assert(false); + return false; +} + +static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg, + uint32_t msglen) +{ + VMBusChannel *chan; + + if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { + return; + } + + trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id, + msg->target_vp); + chan = find_channel(vmbus, msg->child_relid); + if (!chan || chan->state != VMCHAN_INIT) { + return; + } + + chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id; + chan->ringbuf_send_offset = msg->ring_buffer_offset; + chan->target_vp = msg->target_vp; + chan->open_id = msg->open_id; + + open_channel(chan); + + chan->state = VMCHAN_OPENING; + vmbus->state = VMBUS_OPEN_CHANNEL; +} + +static void send_open_channel(VMBus *vmbus) +{ + VMBusChannel *chan; + + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { + if (chan->state == VMCHAN_OPENING) { + struct vmbus_message_open_result msg = { + .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT, + .child_relid = chan->id, + .open_id = chan->open_id, + .status = !vmbus_channel_is_open(chan), + }; + + trace_vmbus_channel_open(chan->id, msg.status); + post_msg(vmbus, &msg, sizeof(msg)); + return; + } + } + + assert(false); +} + +static bool complete_open_channel(VMBus *vmbus) +{ + VMBusChannel *chan; + + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { + if (chan->state == VMCHAN_OPENING) { + if (vmbus_channel_is_open(chan)) { + chan->state = VMCHAN_OPEN; + /* + * simulate guest notification of ringbuffer space made + * available, for the channel protocols where the host + * initiates the communication + */ + vmbus_channel_notify_host(chan); + } else { + chan->state = VMCHAN_INIT; + } + return true; + } + } + + assert(false); + return false; +} + +static void vdev_reset_on_close(VMBusDevice *vdev) +{ + uint16_t i; + + for (i = 0; i < vdev->num_channels; i++) { + if (vmbus_channel_is_open(&vdev->channels[i])) { + return; + } + } + + /* all channels closed -- reset device */ + qdev_reset_all(DEVICE(vdev)); +} + +static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg, + uint32_t msglen) +{ + VMBusChannel *chan; + + if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { + return; + } + + trace_vmbus_close_channel(msg->child_relid); + + chan = find_channel(vmbus, msg->child_relid); + if (!chan) { + return; + } + + close_channel(chan); + chan->state = VMCHAN_INIT; + + vdev_reset_on_close(chan->dev); +} + +static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen) +{ + vmbus->state = VMBUS_UNLOAD; +} + +static void send_unload(VMBus *vmbus) +{ + vmbus_message_header msg = { + .message_type = VMBUS_MSG_UNLOAD_RESPONSE, + }; + + qemu_mutex_lock(&vmbus->rx_queue_lock); + vmbus->rx_queue_size = 0; + qemu_mutex_unlock(&vmbus->rx_queue_lock); + + post_msg(vmbus, &msg, sizeof(msg)); + return; +} + +static bool complete_unload(VMBus *vmbus) +{ + vmbus_reset_all(vmbus); + return true; +} + +static void process_message(VMBus *vmbus) +{ + struct hyperv_post_message_input *hv_msg; + struct vmbus_message_header *msg; + void *msgdata; + uint32_t msglen; + + qemu_mutex_lock(&vmbus->rx_queue_lock); + + if (!vmbus->rx_queue_size) { + goto unlock; + } + + hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head]; + msglen = hv_msg->payload_size; + if (msglen < sizeof(*msg)) { + goto out; + } + msgdata = hv_msg->payload; + msg = (struct vmbus_message_header *)msgdata; + + trace_vmbus_process_incoming_message(msg->message_type); + + switch (msg->message_type) { + case VMBUS_MSG_INITIATE_CONTACT: + handle_initiate_contact(vmbus, msgdata, msglen); + break; + case VMBUS_MSG_REQUESTOFFERS: + handle_request_offers(vmbus, msgdata, msglen); + break; + case VMBUS_MSG_GPADL_HEADER: + handle_gpadl_header(vmbus, msgdata, msglen); + break; + case VMBUS_MSG_GPADL_BODY: + handle_gpadl_body(vmbus, msgdata, msglen); + break; + case VMBUS_MSG_GPADL_TEARDOWN: + handle_gpadl_teardown(vmbus, msgdata, msglen); + break; + case VMBUS_MSG_OPENCHANNEL: + handle_open_channel(vmbus, msgdata, msglen); + break; + case VMBUS_MSG_CLOSECHANNEL: + handle_close_channel(vmbus, msgdata, msglen); + break; + case VMBUS_MSG_UNLOAD: + handle_unload(vmbus, msgdata, msglen); + break; + default: + error_report("unknown message type %#x", msg->message_type); + break; + } + +out: + vmbus->rx_queue_size--; + vmbus->rx_queue_head++; + vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN; + + vmbus_resched(vmbus); +unlock: + qemu_mutex_unlock(&vmbus->rx_queue_lock); +} + +static const struct { + void (*run)(VMBus *vmbus); + bool (*complete)(VMBus *vmbus); +} state_runner[] = { + [VMBUS_LISTEN] = {process_message, NULL}, + [VMBUS_HANDSHAKE] = {send_handshake, NULL}, + [VMBUS_OFFER] = {send_offer, complete_offer}, + [VMBUS_CREATE_GPADL] = {send_create_gpadl, complete_create_gpadl}, + [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl}, + [VMBUS_OPEN_CHANNEL] = {send_open_channel, complete_open_channel}, + [VMBUS_UNLOAD] = {send_unload, complete_unload}, +}; + +static void vmbus_do_run(VMBus *vmbus) +{ + if (vmbus->msg_in_progress) { + return; + } + + assert(vmbus->state < VMBUS_STATE_MAX); + assert(state_runner[vmbus->state].run); + state_runner[vmbus->state].run(vmbus); +} + +static void vmbus_run(void *opaque) +{ + VMBus *vmbus = opaque; + + /* make sure no recursion happens (e.g. due to recursive aio_poll()) */ + if (vmbus->in_progress) { + return; + } + + vmbus->in_progress = true; + /* + * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it + * should go *after* the code that can result in aio_poll; otherwise + * reschedules can be missed. No idea how to enforce that. + */ + vmbus_do_run(vmbus); + vmbus->in_progress = false; +} + +static void vmbus_msg_cb(void *data, int status) +{ + VMBus *vmbus = data; + bool (*complete)(VMBus *vmbus); + + assert(vmbus->msg_in_progress); + + trace_vmbus_msg_cb(status); + + if (status == -EAGAIN) { + goto out; + } + if (status) { + error_report("message delivery fatal failure: %d; aborting vmbus", + status); + vmbus_reset_all(vmbus); + return; + } + + assert(vmbus->state < VMBUS_STATE_MAX); + complete = state_runner[vmbus->state].complete; + if (!complete || complete(vmbus)) { + vmbus->state = VMBUS_LISTEN; + } +out: + vmbus->msg_in_progress = false; + vmbus_resched(vmbus); +} + +static void vmbus_resched(VMBus *vmbus) +{ + aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus); +} + +static void vmbus_signal_event(EventNotifier *e) +{ + VMBusChannel *chan; + VMBus *vmbus = container_of(e, VMBus, notifier); + unsigned long *int_map; + hwaddr addr, len; + bool is_dirty = false; + + if (!event_notifier_test_and_clear(e)) { + return; + } + + trace_vmbus_signal_event(); + + if (!vmbus->int_page_gpa) { + return; + } + + addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2; + len = TARGET_PAGE_SIZE / 2; + int_map = cpu_physical_memory_map(addr, &len, 1); + if (len != TARGET_PAGE_SIZE / 2) { + goto unmap; + } + + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { + if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) { + if (!vmbus_channel_is_open(chan)) { + continue; + } + vmbus_channel_notify_host(chan); + is_dirty = true; + } + } + +unmap: + cpu_physical_memory_unmap(int_map, len, 1, is_dirty); +} + +static void vmbus_dev_realize(DeviceState *dev, Error **errp) +{ + VMBusDevice *vdev = VMBUS_DEVICE(dev); + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); + VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev)); + BusChild *child; + Error *err = NULL; + char idstr[UUID_FMT_LEN + 1]; + + assert(!qemu_uuid_is_null(&vdev->instanceid)); + + /* Check for instance id collision for this class id */ + QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) { + VMBusDevice *child_dev = VMBUS_DEVICE(child->child); + + if (child_dev == vdev) { + continue; + } + + if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) { + qemu_uuid_unparse(&vdev->instanceid, idstr); + error_setg(&err, "duplicate vmbus device instance id %s", idstr); + goto error_out; + } + } + + vdev->dma_as = &address_space_memory; + + create_channels(vmbus, vdev, &err); + if (err) { + goto error_out; + } + + if (vdc->vmdev_realize) { + vdc->vmdev_realize(vdev, &err); + if (err) { + goto err_vdc_realize; + } + } + return; + +err_vdc_realize: + free_channels(vdev); +error_out: + error_propagate(errp, err); +} + +static void vmbus_dev_reset(DeviceState *dev) +{ + uint16_t i; + VMBusDevice *vdev = VMBUS_DEVICE(dev); + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); + + if (vdev->channels) { + for (i = 0; i < vdev->num_channels; i++) { + VMBusChannel *chan = &vdev->channels[i]; + close_channel(chan); + chan->state = VMCHAN_INIT; + } + } + + if (vdc->vmdev_reset) { + vdc->vmdev_reset(vdev); + } +} + +static void vmbus_dev_unrealize(DeviceState *dev) +{ + VMBusDevice *vdev = VMBUS_DEVICE(dev); + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); + + if (vdc->vmdev_unrealize) { + vdc->vmdev_unrealize(vdev); + } + free_channels(vdev); +} + +static void vmbus_dev_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *kdev = DEVICE_CLASS(klass); + kdev->bus_type = TYPE_VMBUS; + kdev->realize = vmbus_dev_realize; + kdev->unrealize = vmbus_dev_unrealize; + kdev->reset = vmbus_dev_reset; +} + +static Property vmbus_dev_instanceid = + DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid); + +static void vmbus_dev_instance_init(Object *obj) +{ + VMBusDevice *vdev = VMBUS_DEVICE(obj); + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); + + if (!qemu_uuid_is_null(&vdc->instanceid)) { + /* Class wants to only have a single instance with a fixed UUID */ + vdev->instanceid = vdc->instanceid; + } else { + qdev_property_add_static(DEVICE(vdev), &vmbus_dev_instanceid); + } +} + +const VMStateDescription vmstate_vmbus_dev = { + .name = TYPE_VMBUS_DEVICE, + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16), + VMSTATE_UINT16(num_channels, VMBusDevice), + VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice, + num_channels, vmstate_channel, + VMBusChannel), + VMSTATE_END_OF_LIST() + } +}; + +/* vmbus generic device base */ +static const TypeInfo vmbus_dev_type_info = { + .name = TYPE_VMBUS_DEVICE, + .parent = TYPE_DEVICE, + .abstract = true, + .instance_size = sizeof(VMBusDevice), + .class_size = sizeof(VMBusDeviceClass), + .class_init = vmbus_dev_class_init, + .instance_init = vmbus_dev_instance_init, +}; + +static void vmbus_realize(BusState *bus, Error **errp) +{ + int ret = 0; + Error *local_err = NULL; + VMBus *vmbus = VMBUS(bus); + + qemu_mutex_init(&vmbus->rx_queue_lock); + + QTAILQ_INIT(&vmbus->gpadl_list); + QTAILQ_INIT(&vmbus->channel_list); + + ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, + vmbus_recv_message, vmbus); + if (ret != 0) { + error_setg(&local_err, "hyperv set message handler failed: %d", ret); + goto error_out; + } + + ret = event_notifier_init(&vmbus->notifier, 0); + if (ret != 0) { + error_setg(&local_err, "event notifier failed to init with %d", ret); + goto remove_msg_handler; + } + + event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event); + ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, + &vmbus->notifier); + if (ret != 0) { + error_setg(&local_err, "hyperv set event handler failed with %d", ret); + goto clear_event_notifier; + } + + return; + +clear_event_notifier: + event_notifier_cleanup(&vmbus->notifier); +remove_msg_handler: + hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL); +error_out: + qemu_mutex_destroy(&vmbus->rx_queue_lock); + error_propagate(errp, local_err); +} + +static void vmbus_unrealize(BusState *bus) +{ + VMBus *vmbus = VMBUS(bus); + + hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL); + hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL); + event_notifier_cleanup(&vmbus->notifier); + + qemu_mutex_destroy(&vmbus->rx_queue_lock); +} + +static void vmbus_reset(BusState *bus) +{ + vmbus_deinit(VMBUS(bus)); +} + +static char *vmbus_get_dev_path(DeviceState *dev) +{ + BusState *bus = qdev_get_parent_bus(dev); + return qdev_get_dev_path(bus->parent); +} + +static char *vmbus_get_fw_dev_path(DeviceState *dev) +{ + VMBusDevice *vdev = VMBUS_DEVICE(dev); + char uuid[UUID_FMT_LEN + 1]; + + qemu_uuid_unparse(&vdev->instanceid, uuid); + return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid); +} + +static void vmbus_class_init(ObjectClass *klass, void *data) +{ + BusClass *k = BUS_CLASS(klass); + + k->get_dev_path = vmbus_get_dev_path; + k->get_fw_dev_path = vmbus_get_fw_dev_path; + k->realize = vmbus_realize; + k->unrealize = vmbus_unrealize; + k->reset = vmbus_reset; +} + +static int vmbus_pre_load(void *opaque) +{ + VMBusChannel *chan; + VMBus *vmbus = VMBUS(opaque); + + /* + * channel IDs allocated by the source will come in the migration stream + * for each channel, so clean up the ones allocated at realize + */ + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { + unregister_chan_id(chan); + } + + return 0; +} +static int vmbus_post_load(void *opaque, int version_id) +{ + int ret; + VMBus *vmbus = VMBUS(opaque); + VMBusGpadl *gpadl; + VMBusChannel *chan; + + ret = vmbus_init(vmbus); + if (ret) { + return ret; + } + + QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { + gpadl->vmbus = vmbus; + gpadl->refcount = 1; + } + + /* + * reopening channels depends on initialized vmbus so it's done here + * instead of channel_post_load() + */ + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { + + if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) { + open_channel(chan); + } + + if (chan->state != VMCHAN_OPEN) { + continue; + } + + if (!vmbus_channel_is_open(chan)) { + /* reopen failed, abort loading */ + return -1; + } + + /* resume processing on the guest side if it missed the notification */ + hyperv_sint_route_set_sint(chan->notify_route); + /* ditto on the host side */ + vmbus_channel_notify_host(chan); + } + + vmbus_resched(vmbus); + return 0; +} + +static const VMStateDescription vmstate_post_message_input = { + .name = "vmbus/hyperv_post_message_input", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + /* + * skip connection_id and message_type as they are validated before + * queueing and ignored on dequeueing + */ + VMSTATE_UINT32(payload_size, struct hyperv_post_message_input), + VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input, + HV_MESSAGE_PAYLOAD_SIZE), + VMSTATE_END_OF_LIST() + } +}; + +static bool vmbus_rx_queue_needed(void *opaque) +{ + VMBus *vmbus = VMBUS(opaque); + return vmbus->rx_queue_size; +} + +static const VMStateDescription vmstate_rx_queue = { + .name = "vmbus/rx_queue", + .version_id = 0, + .minimum_version_id = 0, + .needed = vmbus_rx_queue_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(rx_queue_head, VMBus), + VMSTATE_UINT8(rx_queue_size, VMBus), + VMSTATE_STRUCT_ARRAY(rx_queue, VMBus, + HV_MSG_QUEUE_LEN, 0, + vmstate_post_message_input, + struct hyperv_post_message_input), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_vmbus = { + .name = TYPE_VMBUS, + .version_id = 0, + .minimum_version_id = 0, + .pre_load = vmbus_pre_load, + .post_load = vmbus_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT8(state, VMBus), + VMSTATE_UINT32(version, VMBus), + VMSTATE_UINT32(target_vp, VMBus), + VMSTATE_UINT64(int_page_gpa, VMBus), + VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0, + vmstate_gpadl, VMBusGpadl, link), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &vmstate_rx_queue, + NULL + } +}; + +static const TypeInfo vmbus_type_info = { + .name = TYPE_VMBUS, + .parent = TYPE_BUS, + .instance_size = sizeof(VMBus), + .class_init = vmbus_class_init, +}; + +static void vmbus_bridge_realize(DeviceState *dev, Error **errp) +{ + VMBusBridge *bridge = VMBUS_BRIDGE(dev); + + /* + * here there's at least one vmbus bridge that is being realized, so + * vmbus_bridge_find can only return NULL if it's not unique + */ + if (!vmbus_bridge_find()) { + error_setg(errp, "there can be at most one %s in the system", + TYPE_VMBUS_BRIDGE); + return; + } + + if (!hyperv_is_synic_enabled()) { + error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX"); + return; + } + + bridge->bus = VMBUS(qbus_create(TYPE_VMBUS, dev, "vmbus")); +} + +static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev) +{ + /* there can be only one VMBus */ + return g_strdup("0"); +} + +static const VMStateDescription vmstate_vmbus_bridge = { + .name = TYPE_VMBUS_BRIDGE, + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus), + VMSTATE_END_OF_LIST() + }, +}; + +static void vmbus_bridge_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *k = DEVICE_CLASS(klass); + SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass); + + k->realize = vmbus_bridge_realize; + k->fw_name = "vmbus"; + sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address; + set_bit(DEVICE_CATEGORY_BRIDGE, k->categories); + k->vmsd = &vmstate_vmbus_bridge; + /* override SysBusDevice's default */ + k->user_creatable = true; +} + +static const TypeInfo vmbus_bridge_type_info = { + .name = TYPE_VMBUS_BRIDGE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(VMBusBridge), + .class_init = vmbus_bridge_class_init, +}; + +static void vmbus_register_types(void) +{ + type_register_static(&vmbus_bridge_type_info); + type_register_static(&vmbus_dev_type_info); + type_register_static(&vmbus_type_info); +} + +type_init(vmbus_register_types) diff --git a/include/hw/hyperv/vmbus-bridge.h b/include/hw/hyperv/vmbus-bridge.h new file mode 100644 index 0000000000..9cc8f780de --- /dev/null +++ b/include/hw/hyperv/vmbus-bridge.h @@ -0,0 +1,32 @@ +/* + * QEMU Hyper-V VMBus root bridge + * + * Copyright (c) 2017-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_VMBUS_BRIDGE_H +#define HW_HYPERV_VMBUS_BRIDGE_H + +#include "hw/sysbus.h" + +#define TYPE_VMBUS_BRIDGE "vmbus-bridge" + +typedef struct VMBus VMBus; + +typedef struct VMBusBridge { + SysBusDevice parent_obj; + + VMBus *bus; +} VMBusBridge; + +#define VMBUS_BRIDGE(obj) OBJECT_CHECK(VMBusBridge, (obj), TYPE_VMBUS_BRIDGE) + +static inline VMBusBridge *vmbus_bridge_find(void) +{ + return VMBUS_BRIDGE(object_resolve_path_type("", TYPE_VMBUS_BRIDGE, NULL)); +} + +#endif diff --git a/include/hw/hyperv/vmbus.h b/include/hw/hyperv/vmbus.h new file mode 100644 index 0000000000..99b647e1d6 --- /dev/null +++ b/include/hw/hyperv/vmbus.h @@ -0,0 +1,227 @@ +/* + * QEMU Hyper-V VMBus + * + * Copyright (c) 2017-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_VMBUS_H +#define HW_HYPERV_VMBUS_H + +#include "sysemu/sysemu.h" +#include "sysemu/dma.h" +#include "hw/qdev-core.h" +#include "migration/vmstate.h" +#include "hw/hyperv/vmbus-proto.h" +#include "qemu/uuid.h" + +#define TYPE_VMBUS_DEVICE "vmbus-dev" + +#define VMBUS_DEVICE(obj) \ + OBJECT_CHECK(VMBusDevice, (obj), TYPE_VMBUS_DEVICE) +#define VMBUS_DEVICE_CLASS(klass) \ + OBJECT_CLASS_CHECK(VMBusDeviceClass, (klass), TYPE_VMBUS_DEVICE) +#define VMBUS_DEVICE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(VMBusDeviceClass, (obj), TYPE_VMBUS_DEVICE) + +/* + * Object wrapping a GPADL -- GPA Descriptor List -- an array of guest physical + * pages, to be used for various buffers shared between the host and the guest. + */ +typedef struct VMBusGpadl VMBusGpadl; +/* + * VMBus channel -- a pair of ring buffers for either direction, placed within + * one GPADL, and the associated notification means. + */ +typedef struct VMBusChannel VMBusChannel; +/* + * Base class for VMBus devices. Includes one or more channels. Identified by + * class GUID and instance GUID. + */ +typedef struct VMBusDevice VMBusDevice; + +typedef void(*VMBusChannelNotifyCb)(struct VMBusChannel *chan); + +typedef struct VMBusDeviceClass { + DeviceClass parent; + + QemuUUID classid; + QemuUUID instanceid; /* Fixed UUID for singleton devices */ + uint16_t channel_flags; + uint16_t mmio_size_mb; + + /* Extentions to standard device callbacks */ + void (*vmdev_realize)(VMBusDevice *vdev, Error **errp); + void (*vmdev_unrealize)(VMBusDevice *vdev); + void (*vmdev_reset)(VMBusDevice *vdev); + /* + * Calculate the number of channels based on the device properties. Called + * at realize time. + **/ + uint16_t (*num_channels)(VMBusDevice *vdev); + /* + * Device-specific actions to complete the otherwise successful process of + * opening a channel. + * Return 0 on success, -errno on failure. + */ + int (*open_channel)(VMBusChannel *chan); + /* + * Device-specific actions to perform before closing a channel. + */ + void (*close_channel)(VMBusChannel *chan); + /* + * Main device worker; invoked in response to notifications from either + * side, when there's work to do with the data in the channel ring buffers. + */ + VMBusChannelNotifyCb chan_notify_cb; +} VMBusDeviceClass; + +struct VMBusDevice { + DeviceState parent; + QemuUUID instanceid; + uint16_t num_channels; + VMBusChannel *channels; + AddressSpace *dma_as; +}; + +extern const VMStateDescription vmstate_vmbus_dev; + +/* + * A unit of work parsed out of a message in the receive (i.e. guest->host) + * ring buffer of a channel. It's supposed to be subclassed (through + * embedding) by the specific devices. + */ +typedef struct VMBusChanReq { + VMBusChannel *chan; + uint16_t pkt_type; + uint32_t msglen; + void *msg; + uint64_t transaction_id; + bool need_comp; + QEMUSGList sgl; +} VMBusChanReq; + +VMBusDevice *vmbus_channel_device(VMBusChannel *chan); +VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx); +uint32_t vmbus_channel_idx(VMBusChannel *chan); +bool vmbus_channel_is_open(VMBusChannel *chan); + +/* + * Notify (on guest's behalf) the host side of the channel that there's data in + * the ringbuffer to process. + */ +void vmbus_channel_notify_host(VMBusChannel *chan); + +/* + * Reserve space for a packet in the send (i.e. host->guest) ringbuffer. If + * there isn't enough room, indicate that to the guest, to be notified when it + * becomes available. + * Return 0 on success, negative errno on failure. + * The ringbuffer indices are NOT updated, the requested space indicator may. + */ +int vmbus_channel_reserve(VMBusChannel *chan, + uint32_t desclen, uint32_t msglen); + +/* + * Send a packet to the guest. The space for the packet MUST be reserved + * first. + * Return total number of bytes placed in the send ringbuffer on success, + * negative errno on failure. + * The ringbuffer indices are updated on success, and the guest is signaled if + * needed. + */ +ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type, + void *desc, uint32_t desclen, + void *msg, uint32_t msglen, + bool need_comp, uint64_t transaction_id); + +/* + * Prepare to fetch a batch of packets from the receive ring buffer. + * Return 0 on success, negative errno on failure. + */ +int vmbus_channel_recv_start(VMBusChannel *chan); + +/* + * Shortcut for a common case of sending a simple completion packet with no + * auxiliary descriptors. + */ +ssize_t vmbus_channel_send_completion(VMBusChanReq *req, + void *msg, uint32_t msglen); + +/* + * Peek at the receive (i.e. guest->host) ring buffer and extract a unit of + * work (a device-specific subclass of VMBusChanReq) from a packet if there's + * one. + * Return an allocated buffer, containing the request of @size with filled + * VMBusChanReq at the beginning, followed by the message payload, or NULL on + * failure. + * The ringbuffer indices are NOT updated, nor is the private copy of the read + * index. + */ +void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size); + +/* + * Update the private copy of the read index once the preceding peek is deemed + * successful. + * The ringbuffer indices are NOT updated. + */ +void vmbus_channel_recv_pop(VMBusChannel *chan); + +/* + * Propagate the private copy of the read index into the receive ring buffer, + * and thus complete the reception of a series of packets. Notify guest if + * needed. + * Return the number of bytes popped off the receive ring buffer by the + * preceding recv_peek/recv_pop calls on success, negative errno on failure. + */ +ssize_t vmbus_channel_recv_done(VMBusChannel *chan); + +/* + * Free the request allocated by vmbus_channel_recv_peek, together with its + * fields. + */ +void vmbus_free_req(void *req); + +/* + * Find and reference a GPADL by @gpadl_id. + * If not found return NULL. + */ +VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id); + +/* + * Unreference @gpadl. If the reference count drops to zero, free it. + * @gpadl may be NULL, in which case nothing is done. + */ +void vmbus_put_gpadl(VMBusGpadl *gpadl); + +/* + * Calculate total length in bytes of @gpadl. + * @gpadl must be valid. + */ +uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl); + +/* + * Copy data from @iov to @gpadl at offset @off. + * Return the number of bytes copied, or a negative status on failure. + */ +ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off, + const struct iovec *iov, size_t iov_cnt); + +/* + * Map SGList contained in the request @req, at offset @off and no more than + * @len bytes, for io in direction @dir, and populate @iov with the mapped + * iovecs. + * Return the number of iovecs mapped, or negative status on failure. + */ +int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, + unsigned iov_cnt, size_t len, size_t off); + +/* + * Unmap *iov mapped with vmbus_map_sgl, marking the number of bytes @accessed. + */ +void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, + unsigned iov_cnt, size_t accessed); + +#endif -- cgit v1.2.3-55-g7522 From 6775d15de18268718c2f971c2b2d255c76ff2240 Mon Sep 17 00:00:00 2001 From: Jon Doron Date: Fri, 24 Apr 2020 15:34:43 +0300 Subject: i386: Hyper-V VMBus ACPI DSDT entry Guest OS uses ACPI to discover VMBus presence. Add a corresponding entry to DSDT in case VMBus has been enabled. Experimentally Windows guests were found to require this entry to include two IRQ resources. They seem to never be used but they still have to be there. Make IRQ numbers user-configurable via corresponding properties; use 7 and 13 by default. Signed-off-by: Evgeny Yakovlev Signed-off-by: Roman Kagan Signed-off-by: Maciej S. Szmigiero Signed-off-by: Jon Doron Message-Id: <20200424123444.3481728-6-arilou@gmail.com> Signed-off-by: Paolo Bonzini --- hw/hyperv/vmbus.c | 7 +++++++ hw/i386/acpi-build.c | 43 ++++++++++++++++++++++++++++++++++++++++ include/hw/hyperv/vmbus-bridge.h | 3 +++ 3 files changed, 53 insertions(+) (limited to 'include') diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c index 802bbc8c96..2acb2185e8 100644 --- a/hw/hyperv/vmbus.c +++ b/hw/hyperv/vmbus.c @@ -2641,6 +2641,12 @@ static const VMStateDescription vmstate_vmbus_bridge = { }, }; +static Property vmbus_bridge_props[] = { + DEFINE_PROP_UINT8("irq0", VMBusBridge, irq0, 7), + DEFINE_PROP_UINT8("irq1", VMBusBridge, irq1, 13), + DEFINE_PROP_END_OF_LIST() +}; + static void vmbus_bridge_class_init(ObjectClass *klass, void *data) { DeviceClass *k = DEVICE_CLASS(klass); @@ -2651,6 +2657,7 @@ static void vmbus_bridge_class_init(ObjectClass *klass, void *data) sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address; set_bit(DEVICE_CATEGORY_BRIDGE, k->categories); k->vmsd = &vmstate_vmbus_bridge; + device_class_set_props(k, vmbus_bridge_props); /* override SysBusDevice's default */ k->user_creatable = true; } diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 2e15f6848e..dcdfbd8906 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -50,6 +50,7 @@ #include "hw/mem/nvdimm.h" #include "sysemu/numa.h" #include "sysemu/reset.h" +#include "hw/hyperv/vmbus-bridge.h" /* Supported chipsets: */ #include "hw/southbridge/piix.h" @@ -1270,9 +1271,47 @@ static Aml *build_com_device_aml(uint8_t uid) return dev; } +static Aml *build_vmbus_device_aml(VMBusBridge *vmbus_bridge) +{ + Aml *dev; + Aml *method; + Aml *crs; + + dev = aml_device("VMBS"); + aml_append(dev, aml_name_decl("STA", aml_int(0xF))); + aml_append(dev, aml_name_decl("_HID", aml_string("VMBus"))); + aml_append(dev, aml_name_decl("_UID", aml_int(0x0))); + aml_append(dev, aml_name_decl("_DDN", aml_string("VMBUS"))); + + method = aml_method("_DIS", 0, AML_NOTSERIALIZED); + aml_append(method, aml_store(aml_and(aml_name("STA"), aml_int(0xD), NULL), + aml_name("STA"))); + aml_append(dev, method); + + method = aml_method("_PS0", 0, AML_NOTSERIALIZED); + aml_append(method, aml_store(aml_or(aml_name("STA"), aml_int(0xF), NULL), + aml_name("STA"))); + aml_append(dev, method); + + method = aml_method("_STA", 0, AML_NOTSERIALIZED); + aml_append(method, aml_return(aml_name("STA"))); + aml_append(dev, method); + + aml_append(dev, aml_name_decl("_PS3", aml_int(0x0))); + + crs = aml_resource_template(); + aml_append(crs, aml_irq_no_flags(vmbus_bridge->irq0)); + /* FIXME: newer HyperV gets by with only one IRQ */ + aml_append(crs, aml_irq_no_flags(vmbus_bridge->irq1)); + aml_append(dev, aml_name_decl("_CRS", crs)); + + return dev; +} + static void build_isa_devices_aml(Aml *table) { ISADevice *fdc = pc_find_fdc0(); + VMBusBridge *vmbus_bridge = vmbus_bridge_find(); bool ambiguous; Aml *scope = aml_scope("_SB.PCI0.ISA"); @@ -1297,6 +1336,10 @@ static void build_isa_devices_aml(Aml *table) isa_build_aml(ISA_BUS(obj), scope); } + if (vmbus_bridge) { + aml_append(scope, build_vmbus_device_aml(vmbus_bridge)); + } + aml_append(table, scope); } diff --git a/include/hw/hyperv/vmbus-bridge.h b/include/hw/hyperv/vmbus-bridge.h index 9cc8f780de..c0a06d832c 100644 --- a/include/hw/hyperv/vmbus-bridge.h +++ b/include/hw/hyperv/vmbus-bridge.h @@ -19,6 +19,9 @@ typedef struct VMBus VMBus; typedef struct VMBusBridge { SysBusDevice parent_obj; + uint8_t irq0; + uint8_t irq1; + VMBus *bus; } VMBusBridge; -- cgit v1.2.3-55-g7522 From 4dd8a7064b8a6527f99a62be11a5124e65cae270 Mon Sep 17 00:00:00 2001 From: Jon Doron Date: Fri, 24 Apr 2020 15:34:44 +0300 Subject: vmbus: add infrastructure to save/load vmbus requests This can be allow to include controller-specific data while saving/loading in-flight scsi requests of the vmbus scsi controller. Signed-off-by: Roman Kagan Signed-off-by: Maciej S. Szmigiero Signed-off-by: Jon Doron Message-Id: <20200424123444.3481728-7-arilou@gmail.com> Signed-off-by: Paolo Bonzini --- hw/hyperv/vmbus.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++ include/hw/hyperv/vmbus.h | 3 ++ 2 files changed, 102 insertions(+) (limited to 'include') diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c index 2acb2185e8..f371240176 100644 --- a/hw/hyperv/vmbus.c +++ b/hw/hyperv/vmbus.c @@ -1272,6 +1272,105 @@ void vmbus_free_req(void *req) g_free(req); } +static const VMStateDescription vmstate_sgent = { + .name = "vmbus/sgentry", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT64(base, ScatterGatherEntry), + VMSTATE_UINT64(len, ScatterGatherEntry), + VMSTATE_END_OF_LIST() + } +}; + +typedef struct VMBusChanReqSave { + uint16_t chan_idx; + uint16_t pkt_type; + uint32_t msglen; + void *msg; + uint64_t transaction_id; + bool need_comp; + uint32_t num; + ScatterGatherEntry *sgl; +} VMBusChanReqSave; + +static const VMStateDescription vmstate_vmbus_chan_req = { + .name = "vmbus/vmbus_chan_req", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT16(chan_idx, VMBusChanReqSave), + VMSTATE_UINT16(pkt_type, VMBusChanReqSave), + VMSTATE_UINT32(msglen, VMBusChanReqSave), + VMSTATE_VBUFFER_ALLOC_UINT32(msg, VMBusChanReqSave, 0, NULL, msglen), + VMSTATE_UINT64(transaction_id, VMBusChanReqSave), + VMSTATE_BOOL(need_comp, VMBusChanReqSave), + VMSTATE_UINT32(num, VMBusChanReqSave), + VMSTATE_STRUCT_VARRAY_POINTER_UINT32(sgl, VMBusChanReqSave, num, + vmstate_sgent, ScatterGatherEntry), + VMSTATE_END_OF_LIST() + } +}; + +void vmbus_save_req(QEMUFile *f, VMBusChanReq *req) +{ + VMBusChanReqSave req_save; + + req_save.chan_idx = req->chan->subchan_idx; + req_save.pkt_type = req->pkt_type; + req_save.msglen = req->msglen; + req_save.msg = req->msg; + req_save.transaction_id = req->transaction_id; + req_save.need_comp = req->need_comp; + req_save.num = req->sgl.nsg; + req_save.sgl = g_memdup(req->sgl.sg, + req_save.num * sizeof(ScatterGatherEntry)); + + vmstate_save_state(f, &vmstate_vmbus_chan_req, &req_save, NULL); + + g_free(req_save.sgl); +} + +void *vmbus_load_req(QEMUFile *f, VMBusDevice *dev, uint32_t size) +{ + VMBusChanReqSave req_save; + VMBusChanReq *req = NULL; + VMBusChannel *chan = NULL; + uint32_t i; + + vmstate_load_state(f, &vmstate_vmbus_chan_req, &req_save, 0); + + if (req_save.chan_idx >= dev->num_channels) { + error_report("%s: %u(chan_idx) > %u(num_channels)", __func__, + req_save.chan_idx, dev->num_channels); + goto out; + } + chan = &dev->channels[req_save.chan_idx]; + + if (vmbus_channel_reserve(chan, 0, req_save.msglen)) { + goto out; + } + + req = vmbus_alloc_req(chan, size, req_save.pkt_type, req_save.msglen, + req_save.transaction_id, req_save.need_comp); + if (req_save.msglen) { + memcpy(req->msg, req_save.msg, req_save.msglen); + } + + for (i = 0; i < req_save.num; i++) { + qemu_sglist_add(&req->sgl, req_save.sgl[i].base, req_save.sgl[i].len); + } + +out: + if (req_save.msglen) { + g_free(req_save.msg); + } + if (req_save.num) { + g_free(req_save.sgl); + } + return req; +} + static void channel_event_cb(EventNotifier *e) { VMBusChannel *chan = container_of(e, VMBusChannel, notifier); diff --git a/include/hw/hyperv/vmbus.h b/include/hw/hyperv/vmbus.h index 99b647e1d6..40e8417eec 100644 --- a/include/hw/hyperv/vmbus.h +++ b/include/hw/hyperv/vmbus.h @@ -224,4 +224,7 @@ int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, unsigned iov_cnt, size_t accessed); +void vmbus_save_req(QEMUFile *f, VMBusChanReq *req); +void *vmbus_load_req(QEMUFile *f, VMBusDevice *dev, uint32_t size); + #endif -- cgit v1.2.3-55-g7522 From d8f23d619c495bc64977c00e92f3af4ff2c54046 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Thu, 12 Mar 2020 18:54:22 +0200 Subject: hw/i386/vmport: Introduce vmport.h No functional change. This is mere refactoring. Suggested-by: Michael S. Tsirkin Signed-off-by: Liran Alon Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20200312165431.82118-8-liran.alon@oracle.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 2 +- hw/i386/vmmouse.c | 2 +- hw/i386/vmport.c | 2 +- hw/i386/vmport.h | 34 ---------------------------------- include/hw/i386/vmport.h | 16 ++++++++++++++++ 5 files changed, 19 insertions(+), 37 deletions(-) delete mode 100644 hw/i386/vmport.h create mode 100644 include/hw/i386/vmport.h (limited to 'include') diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 2128f3d6fe..b9961b1035 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -31,6 +31,7 @@ #include "hw/i386/apic.h" #include "hw/i386/topology.h" #include "hw/i386/fw_cfg.h" +#include "hw/i386/vmport.h" #include "sysemu/cpus.h" #include "hw/block/fdc.h" #include "hw/ide.h" @@ -91,7 +92,6 @@ #include "qapi/qmp/qerror.h" #include "config-devices.h" #include "e820_memory_layout.h" -#include "vmport.h" #include "fw_cfg.h" #include "trace.h" diff --git a/hw/i386/vmmouse.c b/hw/i386/vmmouse.c index b3aef41327..b73f076753 100644 --- a/hw/i386/vmmouse.c +++ b/hw/i386/vmmouse.c @@ -25,10 +25,10 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "ui/console.h" +#include "hw/i386/vmport.h" #include "hw/input/i8042.h" #include "hw/qdev-properties.h" #include "migration/vmstate.h" -#include "vmport.h" #include "cpu.h" /* debug only vmmouse */ diff --git a/hw/i386/vmport.c b/hw/i386/vmport.c index dabb443367..e6d169566d 100644 --- a/hw/i386/vmport.c +++ b/hw/i386/vmport.c @@ -30,10 +30,10 @@ #include "qemu/osdep.h" #include "hw/isa/isa.h" +#include "hw/i386/vmport.h" #include "hw/qdev-properties.h" #include "sysemu/hw_accel.h" #include "qemu/log.h" -#include "vmport.h" #include "cpu.h" #include "trace.h" diff --git a/hw/i386/vmport.h b/hw/i386/vmport.h deleted file mode 100644 index 47eda7a22b..0000000000 --- a/hw/i386/vmport.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * QEMU VMPort emulation - * - * Copyright (C) 2007 Hervé Poussineau - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#ifndef HW_I386_VMPORT_H -#define HW_I386_VMPORT_H - -#define TYPE_VMPORT "vmport" - -typedef uint32_t (VMPortReadFunc)(void *opaque, uint32_t address); - -void vmport_register(unsigned char command, VMPortReadFunc *func, void *opaque); - -#endif diff --git a/include/hw/i386/vmport.h b/include/hw/i386/vmport.h new file mode 100644 index 0000000000..efbe3e0e3f --- /dev/null +++ b/include/hw/i386/vmport.h @@ -0,0 +1,16 @@ +#ifndef HW_VMPORT_H +#define HW_VMPORT_H + +#include "hw/isa/isa.h" + +#define TYPE_VMPORT "vmport" +typedef uint32_t (VMPortReadFunc)(void *opaque, uint32_t address); + +static inline void vmport_init(ISABus *bus) +{ + isa_create_simple(bus, TYPE_VMPORT); +} + +void vmport_register(unsigned char command, VMPortReadFunc *func, void *opaque); + +#endif -- cgit v1.2.3-55-g7522 From dcd938f032d3cca5d33d9faaca591b498d40debe Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Thu, 12 Mar 2020 18:54:23 +0200 Subject: hw/i386/vmport: Define enum for all commands No functional change. Defining an enum for all VMPort commands have the following advantages: * It gets rid of the error-prone requirement to update VMPORT_ENTRIES when new VMPort commands are added to QEMU. * It makes it clear to know by looking at one place at the source, what are all the VMPort commands supported by QEMU. Reviewed-by: Nikita Leshenko Signed-off-by: Liran Alon Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20200312165431.82118-9-liran.alon@oracle.com> Signed-off-by: Paolo Bonzini --- hw/i386/vmmouse.c | 18 ++++++------------ hw/i386/vmport.c | 11 ++--------- include/hw/i386/vmport.h | 11 ++++++++++- 3 files changed, 18 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/hw/i386/vmmouse.c b/hw/i386/vmmouse.c index b73f076753..ba5c987bd2 100644 --- a/hw/i386/vmmouse.c +++ b/hw/i386/vmmouse.c @@ -34,12 +34,6 @@ /* debug only vmmouse */ //#define DEBUG_VMMOUSE -/* VMMouse Commands */ -#define VMMOUSE_GETVERSION 10 -#define VMMOUSE_DATA 39 -#define VMMOUSE_STATUS 40 -#define VMMOUSE_COMMAND 41 - #define VMMOUSE_READ_ID 0x45414552 #define VMMOUSE_DISABLE 0x000000f5 #define VMMOUSE_REQUEST_RELATIVE 0x4c455252 @@ -217,10 +211,10 @@ static uint32_t vmmouse_ioport_read(void *opaque, uint32_t addr) command = data[2] & 0xFFFF; switch (command) { - case VMMOUSE_STATUS: + case VMPORT_CMD_VMMOUSE_STATUS: data[0] = vmmouse_get_status(s); break; - case VMMOUSE_COMMAND: + case VMPORT_CMD_VMMOUSE_COMMAND: switch (data[1]) { case VMMOUSE_DISABLE: vmmouse_disable(s); @@ -239,7 +233,7 @@ static uint32_t vmmouse_ioport_read(void *opaque, uint32_t addr) break; } break; - case VMMOUSE_DATA: + case VMPORT_CMD_VMMOUSE_DATA: vmmouse_data(s, data, data[1]); break; default: @@ -296,9 +290,9 @@ static void vmmouse_realizefn(DeviceState *dev, Error **errp) return; } - vmport_register(VMMOUSE_STATUS, vmmouse_ioport_read, s); - vmport_register(VMMOUSE_COMMAND, vmmouse_ioport_read, s); - vmport_register(VMMOUSE_DATA, vmmouse_ioport_read, s); + vmport_register(VMPORT_CMD_VMMOUSE_STATUS, vmmouse_ioport_read, s); + vmport_register(VMPORT_CMD_VMMOUSE_COMMAND, vmmouse_ioport_read, s); + vmport_register(VMPORT_CMD_VMMOUSE_DATA, vmmouse_ioport_read, s); } static Property vmmouse_properties[] = { diff --git a/hw/i386/vmport.c b/hw/i386/vmport.c index e6d169566d..359cdef1e0 100644 --- a/hw/i386/vmport.c +++ b/hw/i386/vmport.c @@ -37,10 +37,6 @@ #include "cpu.h" #include "trace.h" -#define VMPORT_CMD_GETVERSION 0x0a -#define VMPORT_CMD_GETRAMSIZE 0x14 - -#define VMPORT_ENTRIES 0x2c #define VMPORT_MAGIC 0x564D5868 /* Compatibility flags for migration */ @@ -71,12 +67,9 @@ typedef struct VMPortState { static VMPortState *port_state; -void vmport_register(unsigned char command, VMPortReadFunc *func, void *opaque) +void vmport_register(VMPortCommand command, VMPortReadFunc *func, void *opaque) { - if (command >= VMPORT_ENTRIES) { - return; - } - + assert(command < VMPORT_ENTRIES); trace_vmport_register(command, func, opaque); port_state->func[command] = func; port_state->opaque[command] = opaque; diff --git a/include/hw/i386/vmport.h b/include/hw/i386/vmport.h index efbe3e0e3f..bbe2c58bf9 100644 --- a/include/hw/i386/vmport.h +++ b/include/hw/i386/vmport.h @@ -6,11 +6,20 @@ #define TYPE_VMPORT "vmport" typedef uint32_t (VMPortReadFunc)(void *opaque, uint32_t address); +typedef enum { + VMPORT_CMD_GETVERSION = 10, + VMPORT_CMD_GETRAMSIZE = 20, + VMPORT_CMD_VMMOUSE_DATA = 39, + VMPORT_CMD_VMMOUSE_STATUS = 40, + VMPORT_CMD_VMMOUSE_COMMAND = 41, + VMPORT_ENTRIES +} VMPortCommand; + static inline void vmport_init(ISABus *bus) { isa_create_simple(bus, TYPE_VMPORT); } -void vmport_register(unsigned char command, VMPortReadFunc *func, void *opaque); +void vmport_register(VMPortCommand command, VMPortReadFunc *func, void *opaque); #endif -- cgit v1.2.3-55-g7522 From aaacf1c15a225ffeb1ff066b78e211594b3a5053 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Thu, 12 Mar 2020 18:54:24 +0200 Subject: hw/i386/vmport: Add support for CMD_GETBIOSUUID This is VMware documented functionallity that some guests rely on. Returns the BIOS UUID of the current virtual machine. Note that we also introduce a new compatability flag "x-cmds-v2" to make sure to expose new VMPort commands only to new machine-types. This flag will also be used by the following patches that will introduce additional VMPort commands. Reviewed-by: Nikita Leshenko Signed-off-by: Liran Alon Message-Id: <20200312165431.82118-10-liran.alon@oracle.com> Signed-off-by: Paolo Bonzini --- hw/core/machine.c | 1 + hw/i386/vmport.c | 22 ++++++++++++++++++++++ include/hw/i386/vmport.h | 1 + 3 files changed, 24 insertions(+) (limited to 'include') diff --git a/hw/core/machine.c b/hw/core/machine.c index 9a07e9333a..5460e62294 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -46,6 +46,7 @@ GlobalProperty hw_compat_4_2[] = { { "vmport", "x-read-set-eax", "off" }, { "vmport", "x-signal-unsupported-cmd", "off" }, { "vmport", "x-report-vmx-type", "off" }, + { "vmport", "x-cmds-v2", "off" }, }; const size_t hw_compat_4_2_len = G_N_ELEMENTS(hw_compat_4_2); diff --git a/hw/i386/vmport.c b/hw/i386/vmport.c index 359cdef1e0..8006ff91d4 100644 --- a/hw/i386/vmport.c +++ b/hw/i386/vmport.c @@ -32,6 +32,7 @@ #include "hw/isa/isa.h" #include "hw/i386/vmport.h" #include "hw/qdev-properties.h" +#include "sysemu/sysemu.h" #include "sysemu/hw_accel.h" #include "qemu/log.h" #include "cpu.h" @@ -43,12 +44,15 @@ #define VMPORT_COMPAT_READ_SET_EAX_BIT 0 #define VMPORT_COMPAT_SIGNAL_UNSUPPORTED_CMD_BIT 1 #define VMPORT_COMPAT_REPORT_VMX_TYPE_BIT 2 +#define VMPORT_COMPAT_CMDS_V2_BIT 3 #define VMPORT_COMPAT_READ_SET_EAX \ (1 << VMPORT_COMPAT_READ_SET_EAX_BIT) #define VMPORT_COMPAT_SIGNAL_UNSUPPORTED_CMD \ (1 << VMPORT_COMPAT_SIGNAL_UNSUPPORTED_CMD_BIT) #define VMPORT_COMPAT_REPORT_VMX_TYPE \ (1 << VMPORT_COMPAT_REPORT_VMX_TYPE_BIT) +#define VMPORT_COMPAT_CMDS_V2 \ + (1 << VMPORT_COMPAT_CMDS_V2_BIT) #define VMPORT(obj) OBJECT_CHECK(VMPortState, (obj), TYPE_VMPORT) @@ -143,6 +147,18 @@ static uint32_t vmport_cmd_get_version(void *opaque, uint32_t addr) return port_state->vmware_vmx_version; } +static uint32_t vmport_cmd_get_bios_uuid(void *opaque, uint32_t addr) +{ + X86CPU *cpu = X86_CPU(current_cpu); + uint32_t *uuid_parts = (uint32_t *)(qemu_uuid.data); + + cpu->env.regs[R_EAX] = le32_to_cpu(uuid_parts[0]); + cpu->env.regs[R_EBX] = le32_to_cpu(uuid_parts[1]); + cpu->env.regs[R_ECX] = le32_to_cpu(uuid_parts[2]); + cpu->env.regs[R_EDX] = le32_to_cpu(uuid_parts[3]); + return cpu->env.regs[R_EAX]; +} + static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr) { X86CPU *cpu = X86_CPU(current_cpu); @@ -170,9 +186,13 @@ static void vmport_realizefn(DeviceState *dev, Error **errp) isa_register_ioport(isadev, &s->io, 0x5658); port_state = s; + /* Register some generic port commands */ vmport_register(VMPORT_CMD_GETVERSION, vmport_cmd_get_version, NULL); vmport_register(VMPORT_CMD_GETRAMSIZE, vmport_cmd_ram_size, NULL); + if (s->compat_flags & VMPORT_COMPAT_CMDS_V2) { + vmport_register(VMPORT_CMD_GETBIOSUUID, vmport_cmd_get_bios_uuid, NULL); + } } static Property vmport_properties[] = { @@ -183,6 +203,8 @@ static Property vmport_properties[] = { VMPORT_COMPAT_SIGNAL_UNSUPPORTED_CMD_BIT, true), DEFINE_PROP_BIT("x-report-vmx-type", VMPortState, compat_flags, VMPORT_COMPAT_REPORT_VMX_TYPE_BIT, true), + DEFINE_PROP_BIT("x-cmds-v2", VMPortState, compat_flags, + VMPORT_COMPAT_CMDS_V2_BIT, true), /* Default value taken from open-vm-tools code VERSION_MAGIC definition */ DEFINE_PROP_UINT32("vmware-vmx-version", VMPortState, diff --git a/include/hw/i386/vmport.h b/include/hw/i386/vmport.h index bbe2c58bf9..7f566ccc02 100644 --- a/include/hw/i386/vmport.h +++ b/include/hw/i386/vmport.h @@ -8,6 +8,7 @@ typedef uint32_t (VMPortReadFunc)(void *opaque, uint32_t address); typedef enum { VMPORT_CMD_GETVERSION = 10, + VMPORT_CMD_GETBIOSUUID = 19, VMPORT_CMD_GETRAMSIZE = 20, VMPORT_CMD_VMMOUSE_DATA = 39, VMPORT_CMD_VMMOUSE_STATUS = 40, -- cgit v1.2.3-55-g7522 From acacd3550ba1b2379d5a14a00f7dad4820a015b4 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Thu, 12 Mar 2020 18:54:27 +0200 Subject: hw/i386/vmport: Add support for CMD_GET_VCPU_INFO Command currently returns that it is unimplemented by setting the reserved-bit in it's return value. Following patches will return various useful vCPU information to guest. Reviewed-by: Nikita Leshenko Signed-off-by: Liran Alon Message-Id: <20200312165431.82118-13-liran.alon@oracle.com> Signed-off-by: Paolo Bonzini --- hw/i386/vmport.c | 14 ++++++++++++++ include/hw/i386/vmport.h | 1 + 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/hw/i386/vmport.c b/hw/i386/vmport.c index 8006ff91d4..942a0e94e3 100644 --- a/hw/i386/vmport.c +++ b/hw/i386/vmport.c @@ -54,6 +54,13 @@ #define VMPORT_COMPAT_CMDS_V2 \ (1 << VMPORT_COMPAT_CMDS_V2_BIT) +/* vCPU features reported by CMD_GET_VCPU_INFO */ +#define VCPU_INFO_SLC64_BIT 0 +#define VCPU_INFO_SYNC_VTSCS_BIT 1 +#define VCPU_INFO_HV_REPLAY_OK_BIT 2 +#define VCPU_INFO_LEGACY_X2APIC_BIT 3 +#define VCPU_INFO_RESERVED_BIT 31 + #define VMPORT(obj) OBJECT_CHECK(VMPortState, (obj), TYPE_VMPORT) typedef struct VMPortState { @@ -167,6 +174,11 @@ static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr) return ram_size; } +static uint32_t vmport_cmd_get_vcpu_info(void *opaque, uint32_t addr) +{ + return 1 << VCPU_INFO_RESERVED_BIT; +} + static const MemoryRegionOps vmport_ops = { .read = vmport_ioport_read, .write = vmport_ioport_write, @@ -192,6 +204,8 @@ static void vmport_realizefn(DeviceState *dev, Error **errp) vmport_register(VMPORT_CMD_GETRAMSIZE, vmport_cmd_ram_size, NULL); if (s->compat_flags & VMPORT_COMPAT_CMDS_V2) { vmport_register(VMPORT_CMD_GETBIOSUUID, vmport_cmd_get_bios_uuid, NULL); + vmport_register(VMPORT_CMD_GET_VCPU_INFO, vmport_cmd_get_vcpu_info, + NULL); } } diff --git a/include/hw/i386/vmport.h b/include/hw/i386/vmport.h index 7f566ccc02..7656432358 100644 --- a/include/hw/i386/vmport.h +++ b/include/hw/i386/vmport.h @@ -13,6 +13,7 @@ typedef enum { VMPORT_CMD_VMMOUSE_DATA = 39, VMPORT_CMD_VMMOUSE_STATUS = 40, VMPORT_CMD_VMMOUSE_COMMAND = 41, + VMPORT_CMD_GET_VCPU_INFO = 68, VMPORT_ENTRIES } VMPortCommand; -- cgit v1.2.3-55-g7522 From d6048bfd12e24a0980ba2040cfaa2b101df3fa16 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Thu, 12 Mar 2020 18:54:30 +0200 Subject: hw/i386/vmport: Add support for CMD_GETHZ This command returns to guest information on LAPIC bus frequency and TSC frequency. One can see how this interface is used by Linux vmware_platform_setup() introduced in Linux commit 88b094fb8d4f ("x86: Hypervisor detection and get tsc_freq from hypervisor"). Reviewed-by: Nikita Leshenko Signed-off-by: Liran Alon Message-Id: <20200312165431.82118-16-liran.alon@oracle.com> Signed-off-by: Paolo Bonzini --- hw/i386/vmport.c | 19 +++++++++++++++++++ include/hw/i386/vmport.h | 1 + 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/hw/i386/vmport.c b/hw/i386/vmport.c index 21d4ff048a..309cfd105b 100644 --- a/hw/i386/vmport.c +++ b/hw/i386/vmport.c @@ -174,6 +174,24 @@ static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr) return ram_size; } +static uint32_t vmport_cmd_get_hz(void *opaque, uint32_t addr) +{ + X86CPU *cpu = X86_CPU(current_cpu); + + if (cpu->env.tsc_khz && cpu->env.apic_bus_freq) { + uint64_t tsc_freq = (uint64_t)cpu->env.tsc_khz * 1000; + + cpu->env.regs[R_ECX] = cpu->env.apic_bus_freq; + cpu->env.regs[R_EBX] = (uint32_t)(tsc_freq >> 32); + cpu->env.regs[R_EAX] = (uint32_t)tsc_freq; + } else { + /* Signal cmd as not supported */ + cpu->env.regs[R_EBX] = UINT32_MAX; + } + + return cpu->env.regs[R_EAX]; +} + static uint32_t vmport_cmd_get_vcpu_info(void *opaque, uint32_t addr) { X86CPU *cpu = X86_CPU(current_cpu); @@ -211,6 +229,7 @@ static void vmport_realizefn(DeviceState *dev, Error **errp) vmport_register(VMPORT_CMD_GETRAMSIZE, vmport_cmd_ram_size, NULL); if (s->compat_flags & VMPORT_COMPAT_CMDS_V2) { vmport_register(VMPORT_CMD_GETBIOSUUID, vmport_cmd_get_bios_uuid, NULL); + vmport_register(VMPORT_CMD_GETHZ, vmport_cmd_get_hz, NULL); vmport_register(VMPORT_CMD_GET_VCPU_INFO, vmport_cmd_get_vcpu_info, NULL); } diff --git a/include/hw/i386/vmport.h b/include/hw/i386/vmport.h index 7656432358..c380b9c1f0 100644 --- a/include/hw/i386/vmport.h +++ b/include/hw/i386/vmport.h @@ -13,6 +13,7 @@ typedef enum { VMPORT_CMD_VMMOUSE_DATA = 39, VMPORT_CMD_VMMOUSE_STATUS = 40, VMPORT_CMD_VMMOUSE_COMMAND = 41, + VMPORT_CMD_GETHZ = 45, VMPORT_CMD_GET_VCPU_INFO = 68, VMPORT_ENTRIES } VMPortCommand; -- cgit v1.2.3-55-g7522 From da278d58a092bfcc4e36f1e274229c1468dea731 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Fri, 8 May 2020 12:02:22 +0200 Subject: accel: Move Xen accelerator code under accel/xen/ This code is not related to hardware emulation. Move it under accel/ with the other hypervisors. Reviewed-by: Paul Durrant Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20200508100222.7112-1-philmd@redhat.com> Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini --- MAINTAINERS | 2 + accel/Makefile.objs | 1 + accel/xen/Makefile.objs | 1 + accel/xen/xen-all.c | 227 +++++++++++++++++++++++++++++++++++++++++++++ hw/acpi/piix4.c | 2 +- hw/i386/pc.c | 1 + hw/i386/pc_piix.c | 1 + hw/i386/pc_q35.c | 1 + hw/i386/xen/xen-hvm.c | 1 + hw/i386/xen/xen_platform.c | 1 + hw/isa/piix3.c | 1 + hw/pci/msix.c | 1 + hw/xen/Makefile.objs | 2 +- hw/xen/xen-common.c | 219 ------------------------------------------- include/exec/ram_addr.h | 2 +- include/hw/xen/xen.h | 11 --- include/sysemu/xen.h | 38 ++++++++ migration/savevm.c | 2 +- softmmu/vl.c | 2 +- stubs/xen-hvm.c | 9 -- target/i386/cpu.c | 2 +- 21 files changed, 282 insertions(+), 245 deletions(-) create mode 100644 accel/xen/Makefile.objs create mode 100644 accel/xen/xen-all.c delete mode 100644 hw/xen/xen-common.c create mode 100644 include/sysemu/xen.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 3abe3faa4e..abe4d7ef8a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -440,6 +440,7 @@ M: Paul Durrant L: xen-devel@lists.xenproject.org S: Supported F: */xen* +F: accel/xen/* F: hw/9pfs/xen-9p* F: hw/char/xen_console.c F: hw/display/xenfb.c @@ -453,6 +454,7 @@ F: hw/i386/xen/ F: hw/pci-host/xen_igd_pt.c F: include/hw/block/dataplane/xen* F: include/hw/xen/ +F: include/sysemu/xen.h F: include/sysemu/xen-mapcache.h Guest CPU Cores (HAXM) diff --git a/accel/Makefile.objs b/accel/Makefile.objs index 17e5ac6061..ff72f0d030 100644 --- a/accel/Makefile.objs +++ b/accel/Makefile.objs @@ -2,4 +2,5 @@ common-obj-$(CONFIG_SOFTMMU) += accel.o obj-$(call land,$(CONFIG_SOFTMMU),$(CONFIG_POSIX)) += qtest.o obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_TCG) += tcg/ +obj-$(CONFIG_XEN) += xen/ obj-y += stubs/ diff --git a/accel/xen/Makefile.objs b/accel/xen/Makefile.objs new file mode 100644 index 0000000000..7482cfb436 --- /dev/null +++ b/accel/xen/Makefile.objs @@ -0,0 +1 @@ +obj-y += xen-all.o diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c new file mode 100644 index 0000000000..f3edc65ec9 --- /dev/null +++ b/accel/xen/xen-all.c @@ -0,0 +1,227 @@ +/* + * Copyright (C) 2014 Citrix Systems UK Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "qapi/error.h" +#include "hw/xen/xen-legacy-backend.h" +#include "hw/xen/xen_pt.h" +#include "chardev/char.h" +#include "sysemu/accel.h" +#include "sysemu/xen.h" +#include "sysemu/runstate.h" +#include "migration/misc.h" +#include "migration/global_state.h" +#include "hw/boards.h" + +//#define DEBUG_XEN + +#ifdef DEBUG_XEN +#define DPRINTF(fmt, ...) \ + do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +static bool xen_allowed; + +bool xen_enabled(void) +{ + return xen_allowed; +} + +xc_interface *xen_xc; +xenforeignmemory_handle *xen_fmem; +xendevicemodel_handle *xen_dmod; + +static int store_dev_info(int domid, Chardev *cs, const char *string) +{ + struct xs_handle *xs = NULL; + char *path = NULL; + char *newpath = NULL; + char *pts = NULL; + int ret = -1; + + /* Only continue if we're talking to a pty. */ + if (!CHARDEV_IS_PTY(cs)) { + return 0; + } + pts = cs->filename + 4; + + /* We now have everything we need to set the xenstore entry. */ + xs = xs_open(0); + if (xs == NULL) { + fprintf(stderr, "Could not contact XenStore\n"); + goto out; + } + + path = xs_get_domain_path(xs, domid); + if (path == NULL) { + fprintf(stderr, "xs_get_domain_path() error\n"); + goto out; + } + newpath = realloc(path, (strlen(path) + strlen(string) + + strlen("/tty") + 1)); + if (newpath == NULL) { + fprintf(stderr, "realloc error\n"); + goto out; + } + path = newpath; + + strcat(path, string); + strcat(path, "/tty"); + if (!xs_write(xs, XBT_NULL, path, pts, strlen(pts))) { + fprintf(stderr, "xs_write for '%s' fail", string); + goto out; + } + ret = 0; + +out: + free(path); + xs_close(xs); + + return ret; +} + +void xenstore_store_pv_console_info(int i, Chardev *chr) +{ + if (i == 0) { + store_dev_info(xen_domid, chr, "/console"); + } else { + char buf[32]; + snprintf(buf, sizeof(buf), "/device/console/%d", i); + store_dev_info(xen_domid, chr, buf); + } +} + + +static void xenstore_record_dm_state(struct xs_handle *xs, const char *state) +{ + char path[50]; + + if (xs == NULL) { + error_report("xenstore connection not initialized"); + exit(1); + } + + snprintf(path, sizeof (path), "device-model/%u/state", xen_domid); + /* + * This call may fail when running restricted so don't make it fatal in + * that case. Toolstacks should instead use QMP to listen for state changes. + */ + if (!xs_write(xs, XBT_NULL, path, state, strlen(state)) && + !xen_domid_restrict) { + error_report("error recording dm state"); + exit(1); + } +} + + +static void xen_change_state_handler(void *opaque, int running, + RunState state) +{ + if (running) { + /* record state running */ + xenstore_record_dm_state(xenstore, "running"); + } +} + +static bool xen_get_igd_gfx_passthru(Object *obj, Error **errp) +{ + return has_igd_gfx_passthru; +} + +static void xen_set_igd_gfx_passthru(Object *obj, bool value, Error **errp) +{ + has_igd_gfx_passthru = value; +} + +static void xen_setup_post(MachineState *ms, AccelState *accel) +{ + int rc; + + if (xen_domid_restrict) { + rc = xen_restrict(xen_domid); + if (rc < 0) { + perror("xen: failed to restrict"); + exit(1); + } + } +} + +static int xen_init(MachineState *ms) +{ + MachineClass *mc = MACHINE_GET_CLASS(ms); + + xen_xc = xc_interface_open(0, 0, 0); + if (xen_xc == NULL) { + xen_pv_printf(NULL, 0, "can't open xen interface\n"); + return -1; + } + xen_fmem = xenforeignmemory_open(0, 0); + if (xen_fmem == NULL) { + xen_pv_printf(NULL, 0, "can't open xen fmem interface\n"); + xc_interface_close(xen_xc); + return -1; + } + xen_dmod = xendevicemodel_open(0, 0); + if (xen_dmod == NULL) { + xen_pv_printf(NULL, 0, "can't open xen devicemodel interface\n"); + xenforeignmemory_close(xen_fmem); + xc_interface_close(xen_xc); + return -1; + } + qemu_add_vm_change_state_handler(xen_change_state_handler, NULL); + /* + * opt out of system RAM being allocated by generic code + */ + mc->default_ram_id = NULL; + return 0; +} + +static void xen_accel_class_init(ObjectClass *oc, void *data) +{ + AccelClass *ac = ACCEL_CLASS(oc); + static GlobalProperty compat[] = { + { "migration", "store-global-state", "off" }, + { "migration", "send-configuration", "off" }, + { "migration", "send-section-footer", "off" }, + }; + + ac->name = "Xen"; + ac->init_machine = xen_init; + ac->setup_post = xen_setup_post; + ac->allowed = &xen_allowed; + ac->compat_props = g_ptr_array_new(); + + compat_props_add(ac->compat_props, compat, G_N_ELEMENTS(compat)); + + object_class_property_add_bool(oc, "igd-passthru", + xen_get_igd_gfx_passthru, xen_set_igd_gfx_passthru); + object_class_property_set_description(oc, "igd-passthru", + "Set on/off to enable/disable igd passthrou"); +} + +#define TYPE_XEN_ACCEL ACCEL_CLASS_NAME("xen") + +static const TypeInfo xen_accel_type = { + .name = TYPE_XEN_ACCEL, + .parent = TYPE_ACCEL, + .class_init = xen_accel_class_init, +}; + +static void xen_type_init(void) +{ + type_register_static(&xen_accel_type); +} + +type_init(xen_type_init); diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 85c199b30d..e27f57195a 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -30,6 +30,7 @@ #include "hw/acpi/acpi.h" #include "sysemu/runstate.h" #include "sysemu/sysemu.h" +#include "sysemu/xen.h" #include "qapi/error.h" #include "qemu/range.h" #include "exec/address-spaces.h" @@ -41,7 +42,6 @@ #include "hw/mem/nvdimm.h" #include "hw/acpi/memory_hotplug.h" #include "hw/acpi/acpi_dev_interface.h" -#include "hw/xen/xen.h" #include "migration/vmstate.h" #include "hw/core/cpu.h" #include "trace.h" diff --git a/hw/i386/pc.c b/hw/i386/pc.c index b9961b1035..143ac1c354 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -57,6 +57,7 @@ #include "sysemu/tcg.h" #include "sysemu/numa.h" #include "sysemu/kvm.h" +#include "sysemu/xen.h" #include "sysemu/qtest.h" #include "sysemu/reset.h" #include "sysemu/runstate.h" diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 2613d25bda..eea964e72b 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -53,6 +53,7 @@ #include "cpu.h" #include "qapi/error.h" #include "qemu/error-report.h" +#include "sysemu/xen.h" #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 688d57ddf3..fa9ef449d1 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -36,6 +36,7 @@ #include "hw/rtc/mc146818rtc.h" #include "hw/xen/xen.h" #include "sysemu/kvm.h" +#include "sysemu/xen.h" #include "hw/kvm/clock.h" #include "hw/pci-host/q35.h" #include "hw/qdev-properties.h" diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index 82ece6b9e7..041303a2fa 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -28,6 +28,7 @@ #include "qemu/range.h" #include "sysemu/runstate.h" #include "sysemu/sysemu.h" +#include "sysemu/xen.h" #include "sysemu/xen-mapcache.h" #include "trace.h" #include "exec/address-spaces.h" diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c index 0f7b05e5e1..a1492fdecd 100644 --- a/hw/i386/xen/xen_platform.c +++ b/hw/i386/xen/xen_platform.c @@ -33,6 +33,7 @@ #include "hw/xen/xen-legacy-backend.h" #include "trace.h" #include "exec/address-spaces.h" +#include "sysemu/xen.h" #include "sysemu/block-backend.h" #include "qemu/error-report.h" #include "qemu/module.h" diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c index fd1c78879f..1a5267e19f 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix3.c @@ -28,6 +28,7 @@ #include "hw/irq.h" #include "hw/isa/isa.h" #include "hw/xen/xen.h" +#include "sysemu/xen.h" #include "sysemu/sysemu.h" #include "sysemu/reset.h" #include "sysemu/runstate.h" diff --git a/hw/pci/msix.c b/hw/pci/msix.c index 29187898f2..2c7ead7667 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -19,6 +19,7 @@ #include "hw/pci/msix.h" #include "hw/pci/pci.h" #include "hw/xen/xen.h" +#include "sysemu/xen.h" #include "migration/qemu-file-types.h" #include "migration/vmstate.h" #include "qemu/range.h" diff --git a/hw/xen/Makefile.objs b/hw/xen/Makefile.objs index 84df60a928..340b2c5096 100644 --- a/hw/xen/Makefile.objs +++ b/hw/xen/Makefile.objs @@ -1,5 +1,5 @@ # xen backend driver support -common-obj-$(CONFIG_XEN) += xen-legacy-backend.o xen_devconfig.o xen_pvdev.o xen-common.o xen-bus.o xen-bus-helper.o xen-backend.o +common-obj-$(CONFIG_XEN) += xen-legacy-backend.o xen_devconfig.o xen_pvdev.o xen-bus.o xen-bus-helper.o xen-backend.o obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen-host-pci-device.o obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pt.o xen_pt_config_init.o xen_pt_graphics.o xen_pt_msi.o diff --git a/hw/xen/xen-common.c b/hw/xen/xen-common.c deleted file mode 100644 index 70564cc952..0000000000 --- a/hw/xen/xen-common.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright (C) 2014 Citrix Systems UK Ltd. - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - * Contributions after 2012-01-13 are licensed under the terms of the - * GNU GPL, version 2 or (at your option) any later version. - */ - -#include "qemu/osdep.h" -#include "qemu/error-report.h" -#include "qemu/module.h" -#include "qapi/error.h" -#include "hw/xen/xen-legacy-backend.h" -#include "hw/xen/xen_pt.h" -#include "chardev/char.h" -#include "sysemu/accel.h" -#include "sysemu/runstate.h" -#include "migration/misc.h" -#include "migration/global_state.h" -#include "hw/boards.h" - -//#define DEBUG_XEN - -#ifdef DEBUG_XEN -#define DPRINTF(fmt, ...) \ - do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) -#endif - -xc_interface *xen_xc; -xenforeignmemory_handle *xen_fmem; -xendevicemodel_handle *xen_dmod; - -static int store_dev_info(int domid, Chardev *cs, const char *string) -{ - struct xs_handle *xs = NULL; - char *path = NULL; - char *newpath = NULL; - char *pts = NULL; - int ret = -1; - - /* Only continue if we're talking to a pty. */ - if (!CHARDEV_IS_PTY(cs)) { - return 0; - } - pts = cs->filename + 4; - - /* We now have everything we need to set the xenstore entry. */ - xs = xs_open(0); - if (xs == NULL) { - fprintf(stderr, "Could not contact XenStore\n"); - goto out; - } - - path = xs_get_domain_path(xs, domid); - if (path == NULL) { - fprintf(stderr, "xs_get_domain_path() error\n"); - goto out; - } - newpath = realloc(path, (strlen(path) + strlen(string) + - strlen("/tty") + 1)); - if (newpath == NULL) { - fprintf(stderr, "realloc error\n"); - goto out; - } - path = newpath; - - strcat(path, string); - strcat(path, "/tty"); - if (!xs_write(xs, XBT_NULL, path, pts, strlen(pts))) { - fprintf(stderr, "xs_write for '%s' fail", string); - goto out; - } - ret = 0; - -out: - free(path); - xs_close(xs); - - return ret; -} - -void xenstore_store_pv_console_info(int i, Chardev *chr) -{ - if (i == 0) { - store_dev_info(xen_domid, chr, "/console"); - } else { - char buf[32]; - snprintf(buf, sizeof(buf), "/device/console/%d", i); - store_dev_info(xen_domid, chr, buf); - } -} - - -static void xenstore_record_dm_state(struct xs_handle *xs, const char *state) -{ - char path[50]; - - if (xs == NULL) { - error_report("xenstore connection not initialized"); - exit(1); - } - - snprintf(path, sizeof (path), "device-model/%u/state", xen_domid); - /* - * This call may fail when running restricted so don't make it fatal in - * that case. Toolstacks should instead use QMP to listen for state changes. - */ - if (!xs_write(xs, XBT_NULL, path, state, strlen(state)) && - !xen_domid_restrict) { - error_report("error recording dm state"); - exit(1); - } -} - - -static void xen_change_state_handler(void *opaque, int running, - RunState state) -{ - if (running) { - /* record state running */ - xenstore_record_dm_state(xenstore, "running"); - } -} - -static bool xen_get_igd_gfx_passthru(Object *obj, Error **errp) -{ - return has_igd_gfx_passthru; -} - -static void xen_set_igd_gfx_passthru(Object *obj, bool value, Error **errp) -{ - has_igd_gfx_passthru = value; -} - -static void xen_setup_post(MachineState *ms, AccelState *accel) -{ - int rc; - - if (xen_domid_restrict) { - rc = xen_restrict(xen_domid); - if (rc < 0) { - perror("xen: failed to restrict"); - exit(1); - } - } -} - -static int xen_init(MachineState *ms) -{ - MachineClass *mc = MACHINE_GET_CLASS(ms); - - xen_xc = xc_interface_open(0, 0, 0); - if (xen_xc == NULL) { - xen_pv_printf(NULL, 0, "can't open xen interface\n"); - return -1; - } - xen_fmem = xenforeignmemory_open(0, 0); - if (xen_fmem == NULL) { - xen_pv_printf(NULL, 0, "can't open xen fmem interface\n"); - xc_interface_close(xen_xc); - return -1; - } - xen_dmod = xendevicemodel_open(0, 0); - if (xen_dmod == NULL) { - xen_pv_printf(NULL, 0, "can't open xen devicemodel interface\n"); - xenforeignmemory_close(xen_fmem); - xc_interface_close(xen_xc); - return -1; - } - qemu_add_vm_change_state_handler(xen_change_state_handler, NULL); - /* - * opt out of system RAM being allocated by generic code - */ - mc->default_ram_id = NULL; - return 0; -} - -static void xen_accel_class_init(ObjectClass *oc, void *data) -{ - AccelClass *ac = ACCEL_CLASS(oc); - static GlobalProperty compat[] = { - { "migration", "store-global-state", "off" }, - { "migration", "send-configuration", "off" }, - { "migration", "send-section-footer", "off" }, - }; - - ac->name = "Xen"; - ac->init_machine = xen_init; - ac->setup_post = xen_setup_post; - ac->allowed = &xen_allowed; - ac->compat_props = g_ptr_array_new(); - - compat_props_add(ac->compat_props, compat, G_N_ELEMENTS(compat)); - - object_class_property_add_bool(oc, "igd-passthru", - xen_get_igd_gfx_passthru, xen_set_igd_gfx_passthru); - object_class_property_set_description(oc, "igd-passthru", - "Set on/off to enable/disable igd passthrou"); -} - -#define TYPE_XEN_ACCEL ACCEL_CLASS_NAME("xen") - -static const TypeInfo xen_accel_type = { - .name = TYPE_XEN_ACCEL, - .parent = TYPE_ACCEL, - .class_init = xen_accel_class_init, -}; - -static void xen_type_init(void) -{ - type_register_static(&xen_accel_type); -} - -type_init(xen_type_init); diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index b295f6a784..7b5c24e928 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -21,7 +21,7 @@ #ifndef CONFIG_USER_ONLY #include "cpu.h" -#include "hw/xen/xen.h" +#include "sysemu/xen.h" #include "sysemu/tcg.h" #include "exec/ramlist.h" #include "exec/ramblock.h" diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h index 5ac1c6dc55..771dd447f2 100644 --- a/include/hw/xen/xen.h +++ b/include/hw/xen/xen.h @@ -20,13 +20,6 @@ extern uint32_t xen_domid; extern enum xen_mode xen_mode; extern bool xen_domid_restrict; -extern bool xen_allowed; - -static inline bool xen_enabled(void) -{ - return xen_allowed; -} - int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num); void xen_piix3_set_irq(void *opaque, int irq_num, int level); void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len); @@ -39,10 +32,6 @@ void xenstore_store_pv_console_info(int i, struct Chardev *chr); void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory); -void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, - struct MemoryRegion *mr, Error **errp); -void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length); - void xen_register_framebuffer(struct MemoryRegion *mr); #endif /* QEMU_HW_XEN_H */ diff --git a/include/sysemu/xen.h b/include/sysemu/xen.h new file mode 100644 index 0000000000..1ca292715e --- /dev/null +++ b/include/sysemu/xen.h @@ -0,0 +1,38 @@ +/* + * QEMU Xen support + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef SYSEMU_XEN_H +#define SYSEMU_XEN_H + +#ifdef CONFIG_XEN + +bool xen_enabled(void); + +#ifndef CONFIG_USER_ONLY +void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length); +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, + struct MemoryRegion *mr, Error **errp); +#endif + +#else /* !CONFIG_XEN */ + +#define xen_enabled() 0 +#ifndef CONFIG_USER_ONLY +static inline void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) +{ + /* nothing */ +} +static inline void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, + MemoryRegion *mr, Error **errp) +{ + g_assert_not_reached(); +} +#endif + +#endif /* CONFIG_XEN */ + +#endif diff --git a/migration/savevm.c b/migration/savevm.c index c00a6807d9..b979ea6e7f 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -28,7 +28,6 @@ #include "qemu/osdep.h" #include "hw/boards.h" -#include "hw/xen/xen.h" #include "net/net.h" #include "migration.h" #include "migration/snapshot.h" @@ -59,6 +58,7 @@ #include "sysemu/replay.h" #include "sysemu/runstate.h" #include "sysemu/sysemu.h" +#include "sysemu/xen.h" #include "qjson.h" #include "migration/colo.h" #include "qemu/bitmap.h" diff --git a/softmmu/vl.c b/softmmu/vl.c index 055472da66..f669c06ede 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -36,6 +36,7 @@ #include "sysemu/runstate.h" #include "sysemu/seccomp.h" #include "sysemu/tcg.h" +#include "sysemu/xen.h" #include "qemu/error-report.h" #include "qemu/sockets.h" @@ -178,7 +179,6 @@ static NotifierList exit_notifiers = static NotifierList machine_init_done_notifiers = NOTIFIER_LIST_INITIALIZER(machine_init_done_notifiers); -bool xen_allowed; uint32_t xen_domid; enum xen_mode xen_mode = XEN_EMULATE; bool xen_domid_restrict; diff --git a/stubs/xen-hvm.c b/stubs/xen-hvm.c index b7d53b5e2f..6954a5b696 100644 --- a/stubs/xen-hvm.c +++ b/stubs/xen-hvm.c @@ -35,11 +35,6 @@ int xen_is_pirq_msi(uint32_t msi_data) return 0; } -void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr, - Error **errp) -{ -} - qemu_irq *xen_interrupt_controller_init(void) { return NULL; @@ -49,10 +44,6 @@ void xen_register_framebuffer(MemoryRegion *mr) { } -void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) -{ -} - void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory) { } diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 25a8f6b1ad..b5705cda86 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -29,6 +29,7 @@ #include "sysemu/reset.h" #include "sysemu/hvf.h" #include "sysemu/cpus.h" +#include "sysemu/xen.h" #include "kvm_i386.h" #include "sev_i386.h" @@ -54,7 +55,6 @@ #include "hw/i386/topology.h" #ifndef CONFIG_USER_ONLY #include "exec/address-spaces.h" -#include "hw/xen/xen.h" #include "hw/i386/apic_internal.h" #include "hw/boards.h" #endif -- cgit v1.2.3-55-g7522 From 38df19fad71abe8823f8b416f672be95c2ac8d04 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Mon, 18 May 2020 17:53:02 +0200 Subject: exec: Let address_space_read/write_cached() propagate MemTxResult Both address_space_read_cached_slow() and address_space_write_cached_slow() return a MemTxResult type. Do not discard it, return it to the caller. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Peter Maydell Signed-off-by: Paolo Bonzini --- exec.c | 16 ++++++++-------- include/exec/memory.h | 19 +++++++++++-------- 2 files changed, 19 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/exec.c b/exec.c index be4be2df3a..0e197e53c2 100644 --- a/exec.c +++ b/exec.c @@ -3718,7 +3718,7 @@ static inline MemoryRegion *address_space_translate_cached( /* Called from RCU critical section. address_space_read_cached uses this * out of line function when the target is an MMIO or IOMMU region. */ -void +MemTxResult address_space_read_cached_slow(MemoryRegionCache *cache, hwaddr addr, void *buf, hwaddr len) { @@ -3728,15 +3728,15 @@ address_space_read_cached_slow(MemoryRegionCache *cache, hwaddr addr, l = len; mr = address_space_translate_cached(cache, addr, &addr1, &l, false, MEMTXATTRS_UNSPECIFIED); - flatview_read_continue(cache->fv, - addr, MEMTXATTRS_UNSPECIFIED, buf, len, - addr1, l, mr); + return flatview_read_continue(cache->fv, + addr, MEMTXATTRS_UNSPECIFIED, buf, len, + addr1, l, mr); } /* Called from RCU critical section. address_space_write_cached uses this * out of line function when the target is an MMIO or IOMMU region. */ -void +MemTxResult address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr, const void *buf, hwaddr len) { @@ -3746,9 +3746,9 @@ address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr, l = len; mr = address_space_translate_cached(cache, addr, &addr1, &l, true, MEMTXATTRS_UNSPECIFIED); - flatview_write_continue(cache->fv, - addr, MEMTXATTRS_UNSPECIFIED, buf, len, - addr1, l, mr); + return flatview_write_continue(cache->fv, + addr, MEMTXATTRS_UNSPECIFIED, buf, len, + addr1, l, mr); } #define ARG1_DECL MemoryRegionCache *cache diff --git a/include/exec/memory.h b/include/exec/memory.h index 3e00cdbbfa..48df5abe13 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -2354,10 +2354,11 @@ void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr); /* Internal functions, part of the implementation of address_space_read_cached * and address_space_write_cached. */ -void address_space_read_cached_slow(MemoryRegionCache *cache, - hwaddr addr, void *buf, hwaddr len); -void address_space_write_cached_slow(MemoryRegionCache *cache, - hwaddr addr, const void *buf, hwaddr len); +MemTxResult address_space_read_cached_slow(MemoryRegionCache *cache, + hwaddr addr, void *buf, hwaddr len); +MemTxResult address_space_write_cached_slow(MemoryRegionCache *cache, + hwaddr addr, const void *buf, + hwaddr len); static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write) { @@ -2422,15 +2423,16 @@ MemTxResult address_space_read(AddressSpace *as, hwaddr addr, * @buf: buffer with the data transferred * @len: length of the data transferred */ -static inline void +static inline MemTxResult address_space_read_cached(MemoryRegionCache *cache, hwaddr addr, void *buf, hwaddr len) { assert(addr < cache->len && len <= cache->len - addr); if (likely(cache->ptr)) { memcpy(buf, cache->ptr + addr, len); + return MEMTX_OK; } else { - address_space_read_cached_slow(cache, addr, buf, len); + return address_space_read_cached_slow(cache, addr, buf, len); } } @@ -2442,15 +2444,16 @@ address_space_read_cached(MemoryRegionCache *cache, hwaddr addr, * @buf: buffer with the data transferred * @len: length of the data transferred */ -static inline void +static inline MemTxResult address_space_write_cached(MemoryRegionCache *cache, hwaddr addr, const void *buf, hwaddr len) { assert(addr < cache->len && len <= cache->len - addr); if (likely(cache->ptr)) { memcpy(cache->ptr + addr, buf, len); + return MEMTX_OK; } else { - address_space_write_cached_slow(cache, addr, buf, len); + return address_space_write_cached_slow(cache, addr, buf, len); } } -- cgit v1.2.3-55-g7522 From ddfc8b96eec648f35f0f054bd3f0a05df6cd34fb Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Mon, 18 May 2020 17:53:03 +0200 Subject: exec: Propagate cpu_memory_rw_debug() error Do not ignore the MemTxResult error type returned by the address_space_rw() API. Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- exec.c | 12 ++++++++---- include/exec/cpu-all.h | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/exec.c b/exec.c index 0e197e53c2..9cbde85d8c 100644 --- a/exec.c +++ b/exec.c @@ -3771,6 +3771,7 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, while (len > 0) { int asidx; MemTxAttrs attrs; + MemTxResult res; page = addr & TARGET_PAGE_MASK; phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs); @@ -3783,11 +3784,14 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, l = len; phys_addr += (addr & ~TARGET_PAGE_MASK); if (is_write) { - address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr, - attrs, buf, l); + res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr, + attrs, buf, l); } else { - address_space_read(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf, - l); + res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr, + attrs, buf, l); + } + if (res != MEMTX_OK) { + return -1; } len -= l; buf += l; diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index d14374bdd4..fb4e8a8e29 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -413,6 +413,7 @@ void dump_exec_info(void); void dump_opcount_info(void); #endif /* !CONFIG_USER_ONLY */ +/* Returns: 0 on success, -1 on error */ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, void *ptr, target_ulong len, bool is_write); -- cgit v1.2.3-55-g7522 From 5579b524b0d2e4b310157c0b7985d35c24238120 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Mon, 18 May 2020 17:53:05 +0200 Subject: hw/elf_ops: Do not ignore write failures when loading ELF Do not ignore the MemTxResult error type returned by address_space_write(). Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Bonzini --- include/hw/elf_ops.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h index 398a4a2c85..6fdff3dced 100644 --- a/include/hw/elf_ops.h +++ b/include/hw/elf_ops.h @@ -553,9 +553,14 @@ static int glue(load_elf, SZ)(const char *name, int fd, rom_add_elf_program(label, mapped_file, data, file_size, mem_size, addr, as); } else { - address_space_write(as ? as : &address_space_memory, - addr, MEMTXATTRS_UNSPECIFIED, - data, file_size); + MemTxResult res; + + res = address_space_write(as ? as : &address_space_memory, + addr, MEMTXATTRS_UNSPECIFIED, + data, file_size); + if (res != MEMTX_OK) { + goto fail; + } } } -- cgit v1.2.3-55-g7522 From c82d9d43ed5d1a7021890d788193fdbeae1011c8 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 18 Mar 2020 10:52:03 -0400 Subject: KVM: Kick resamplefd for split kernel irqchip This is majorly only for X86 because that's the only one that supports split irqchip for now. When the irqchip is split, we face a dilemma that KVM irqfd will be enabled, however the slow irqchip is still running in the userspace. It means that the resamplefd in the kernel irqfds won't take any effect and it will miss to ack INTx interrupts on EOIs. One example is split irqchip with VFIO INTx, which will break if we use the VFIO INTx fast path. This patch can potentially supports the VFIO fast path again for INTx, that the IRQ delivery will still use the fast path, while we don't need to trap MMIOs in QEMU for the device to emulate the EIOs (see the callers of vfio_eoi() hook). However the EOI of the INTx will still need to be done from the userspace by caching all the resamplefds in QEMU and kick properly for IOAPIC EOI broadcast. This is tricky because in this case the userspace ioapic irr & remote-irr will be bypassed. However such a change will greatly boost performance for assigned devices using INTx irqs (TCP_RR boosts 46% after this patch applied). When the userspace is responsible for the resamplefd kickup, don't register it on the kvm_irqfd anymore, because on newer kernels (after commit 654f1f13ea56, 5.2+) the KVM_IRQFD will fail if with both split irqchip and resamplefd. This will make sure that the fast path will work for all supported kernels. https://patchwork.kernel.org/patch/10738541/#22609933 Suggested-by: Paolo Bonzini Signed-off-by: Peter Xu Message-Id: <20200318145204.74483-5-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++-- accel/kvm/trace-events | 1 + hw/intc/ioapic.c | 19 ++++++++++++ include/sysemu/kvm.h | 4 +++ 4 files changed, 101 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index b048c10af9..f24d7da783 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -160,9 +160,59 @@ static const KVMCapabilityInfo kvm_required_capabilites[] = { static NotifierList kvm_irqchip_change_notifiers = NOTIFIER_LIST_INITIALIZER(kvm_irqchip_change_notifiers); +struct KVMResampleFd { + int gsi; + EventNotifier *resample_event; + QLIST_ENTRY(KVMResampleFd) node; +}; +typedef struct KVMResampleFd KVMResampleFd; + +/* + * Only used with split irqchip where we need to do the resample fd + * kick for the kernel from userspace. + */ +static QLIST_HEAD(, KVMResampleFd) kvm_resample_fd_list = + QLIST_HEAD_INITIALIZER(kvm_resample_fd_list); + #define kvm_slots_lock(kml) qemu_mutex_lock(&(kml)->slots_lock) #define kvm_slots_unlock(kml) qemu_mutex_unlock(&(kml)->slots_lock) +static inline void kvm_resample_fd_remove(int gsi) +{ + KVMResampleFd *rfd; + + QLIST_FOREACH(rfd, &kvm_resample_fd_list, node) { + if (rfd->gsi == gsi) { + QLIST_REMOVE(rfd, node); + g_free(rfd); + break; + } + } +} + +static inline void kvm_resample_fd_insert(int gsi, EventNotifier *event) +{ + KVMResampleFd *rfd = g_new0(KVMResampleFd, 1); + + rfd->gsi = gsi; + rfd->resample_event = event; + + QLIST_INSERT_HEAD(&kvm_resample_fd_list, rfd, node); +} + +void kvm_resample_fd_notify(int gsi) +{ + KVMResampleFd *rfd; + + QLIST_FOREACH(rfd, &kvm_resample_fd_list, node) { + if (rfd->gsi == gsi) { + event_notifier_set(rfd->resample_event); + trace_kvm_resample_fd_notify(gsi); + return; + } + } +} + int kvm_get_max_memslots(void) { KVMState *s = KVM_STATE(current_accel()); @@ -1676,8 +1726,33 @@ static int kvm_irqchip_assign_irqfd(KVMState *s, EventNotifier *event, }; if (rfd != -1) { - irqfd.flags |= KVM_IRQFD_FLAG_RESAMPLE; - irqfd.resamplefd = rfd; + assert(assign); + if (kvm_irqchip_is_split()) { + /* + * When the slow irqchip (e.g. IOAPIC) is in the + * userspace, KVM kernel resamplefd will not work because + * the EOI of the interrupt will be delivered to userspace + * instead, so the KVM kernel resamplefd kick will be + * skipped. The userspace here mimics what the kernel + * provides with resamplefd, remember the resamplefd and + * kick it when we receive EOI of this IRQ. + * + * This is hackery because IOAPIC is mostly bypassed + * (except EOI broadcasts) when irqfd is used. However + * this can bring much performance back for split irqchip + * with INTx IRQs (for VFIO, this gives 93% perf of the + * full fast path, which is 46% perf boost comparing to + * the INTx slow path). + */ + kvm_resample_fd_insert(virq, resample); + } else { + irqfd.flags |= KVM_IRQFD_FLAG_RESAMPLE; + irqfd.resamplefd = rfd; + } + } else if (!assign) { + if (kvm_irqchip_is_split()) { + kvm_resample_fd_remove(virq); + } } if (!kvm_irqfds_enabled()) { diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index 4fb6e59d19..a68eb66534 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -16,4 +16,5 @@ kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_ kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 +kvm_resample_fd_notify(int gsi) "gsi %d" diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index ffe30dc457..bca71b5934 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -241,6 +241,25 @@ void ioapic_eoi_broadcast(int vector) continue; } +#ifdef CONFIG_KVM + /* + * When IOAPIC is in the userspace while APIC is still in + * the kernel (i.e., split irqchip), we have a trick to + * kick the resamplefd logic for registered irqfds from + * userspace to deactivate the IRQ. When that happens, it + * means the irq bypassed userspace IOAPIC (so the irr and + * remote-irr of the table entry should be bypassed too + * even if interrupt come). Still kick the resamplefds if + * they're bound to the IRQ, to make sure to EOI the + * interrupt for the hardware correctly. + * + * Note: We still need to go through the irr & remote-irr + * operations below because we don't know whether there're + * emulated devices that are using/sharing the same IRQ. + */ + kvm_resample_fd_notify(n); +#endif + if (!(entry & IOAPIC_LVT_REMOTE_IRR)) { continue; } diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 3b2250471c..b4174d941c 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -554,4 +554,8 @@ int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source); int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); struct ppc_radix_page_info *kvm_get_radix_page_info(void); int kvm_get_max_memslots(void); + +/* Notify resamplefd for EOI of specific interrupts. */ +void kvm_resample_fd_notify(int gsi); + #endif -- cgit v1.2.3-55-g7522 From 2261d3939fd40267029e790a9970f0e6c2ecfdea Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Fri, 29 May 2020 14:53:25 +0200 Subject: memory: Make 'info mtree' not display disabled regions by default We might have many disabled memory regions, making the 'info mtree' output too verbose to be useful. Remove the disabled regions in the default output, but allow the monitor user to display them using the '-D' option. Before: (qemu) info mtree memory-region: system 0000000000000000-ffffffffffffffff (prio 0, i/o): system 0000000000000000-0000000007ffffff (prio 0, ram): alias ram-below-4g @pc.ram 0000000000000000-0000000007ffffff 0000000000000000-ffffffffffffffff (prio -1, i/o): pci 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom 00000000000e0000-00000000000fffff (prio 1, rom): alias isa-bios @pc.bios 0000000000020000-000000000003ffff 00000000fffc0000-00000000ffffffff (prio 0, rom): pc.bios 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff 00000000000c0000-00000000000c3fff (prio 1, ram): alias pam-ram @pc.ram 00000000000c0000-00000000000c3fff [disabled] 00000000000c0000-00000000000c3fff (prio 1, ram): alias pam-pci @pc.ram 00000000000c0000-00000000000c3fff [disabled] 00000000000c0000-00000000000c3fff (prio 1, ram): alias pam-rom @pc.ram 00000000000c0000-00000000000c3fff [disabled] 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff 00000000000c4000-00000000000c7fff (prio 1, ram): alias pam-ram @pc.ram 00000000000c4000-00000000000c7fff [disabled] 00000000000c4000-00000000000c7fff (prio 1, ram): alias pam-pci @pc.ram 00000000000c4000-00000000000c7fff [disabled] 00000000000c4000-00000000000c7fff (prio 1, ram): alias pam-rom @pc.ram 00000000000c4000-00000000000c7fff [disabled] 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff 00000000000c8000-00000000000cbfff (prio 1, ram): alias pam-ram @pc.ram 00000000000c8000-00000000000cbfff [disabled] 00000000000c8000-00000000000cbfff (prio 1, ram): alias pam-pci @pc.ram 00000000000c8000-00000000000cbfff [disabled] 00000000000c8000-00000000000cbfff (prio 1, ram): alias pam-rom @pc.ram 00000000000c8000-00000000000cbfff [disabled] 00000000000c8000-00000000000cbfff (prio 1, i/o): alias pam-pci @pci 00000000000c8000-00000000000cbfff 00000000000cc000-00000000000cffff (prio 1, ram): alias pam-ram @pc.ram 00000000000cc000-00000000000cffff [disabled] 00000000000cc000-00000000000cffff (prio 1, ram): alias pam-pci @pc.ram 00000000000cc000-00000000000cffff [disabled] 00000000000cc000-00000000000cffff (prio 1, ram): alias pam-rom @pc.ram 00000000000cc000-00000000000cffff [disabled] 00000000000cc000-00000000000cffff (prio 1, i/o): alias pam-pci @pci 00000000000cc000-00000000000cffff 00000000000d0000-00000000000d3fff (prio 1, ram): alias pam-ram @pc.ram 00000000000d0000-00000000000d3fff [disabled] 00000000000d0000-00000000000d3fff (prio 1, ram): alias pam-pci @pc.ram 00000000000d0000-00000000000d3fff [disabled] 00000000000d0000-00000000000d3fff (prio 1, ram): alias pam-rom @pc.ram 00000000000d0000-00000000000d3fff [disabled] 00000000000d0000-00000000000d3fff (prio 1, i/o): alias pam-pci @pci 00000000000d0000-00000000000d3fff 00000000000d4000-00000000000d7fff (prio 1, ram): alias pam-ram @pc.ram 00000000000d4000-00000000000d7fff [disabled] 00000000000d4000-00000000000d7fff (prio 1, ram): alias pam-pci @pc.ram 00000000000d4000-00000000000d7fff [disabled] 00000000000d4000-00000000000d7fff (prio 1, ram): alias pam-rom @pc.ram 00000000000d4000-00000000000d7fff [disabled] 00000000000d4000-00000000000d7fff (prio 1, i/o): alias pam-pci @pci 00000000000d4000-00000000000d7fff 00000000000d8000-00000000000dbfff (prio 1, ram): alias pam-ram @pc.ram 00000000000d8000-00000000000dbfff [disabled] 00000000000d8000-00000000000dbfff (prio 1, ram): alias pam-pci @pc.ram 00000000000d8000-00000000000dbfff [disabled] 00000000000d8000-00000000000dbfff (prio 1, ram): alias pam-rom @pc.ram 00000000000d8000-00000000000dbfff [disabled] 00000000000d8000-00000000000dbfff (prio 1, i/o): alias pam-pci @pci 00000000000d8000-00000000000dbfff 00000000000dc000-00000000000dffff (prio 1, ram): alias pam-ram @pc.ram 00000000000dc000-00000000000dffff [disabled] 00000000000dc000-00000000000dffff (prio 1, ram): alias pam-pci @pc.ram 00000000000dc000-00000000000dffff [disabled] 00000000000dc000-00000000000dffff (prio 1, ram): alias pam-rom @pc.ram 00000000000dc000-00000000000dffff [disabled] 00000000000dc000-00000000000dffff (prio 1, i/o): alias pam-pci @pci 00000000000dc000-00000000000dffff 00000000000e0000-00000000000e3fff (prio 1, ram): alias pam-ram @pc.ram 00000000000e0000-00000000000e3fff [disabled] 00000000000e0000-00000000000e3fff (prio 1, ram): alias pam-pci @pc.ram 00000000000e0000-00000000000e3fff [disabled] 00000000000e0000-00000000000e3fff (prio 1, ram): alias pam-rom @pc.ram 00000000000e0000-00000000000e3fff [disabled] 00000000000e0000-00000000000e3fff (prio 1, i/o): alias pam-pci @pci 00000000000e0000-00000000000e3fff 00000000000e4000-00000000000e7fff (prio 1, ram): alias pam-ram @pc.ram 00000000000e4000-00000000000e7fff [disabled] 00000000000e4000-00000000000e7fff (prio 1, ram): alias pam-pci @pc.ram 00000000000e4000-00000000000e7fff [disabled] 00000000000e4000-00000000000e7fff (prio 1, ram): alias pam-rom @pc.ram 00000000000e4000-00000000000e7fff [disabled] 00000000000e4000-00000000000e7fff (prio 1, i/o): alias pam-pci @pci 00000000000e4000-00000000000e7fff 00000000000e8000-00000000000ebfff (prio 1, ram): alias pam-ram @pc.ram 00000000000e8000-00000000000ebfff [disabled] 00000000000e8000-00000000000ebfff (prio 1, ram): alias pam-pci @pc.ram 00000000000e8000-00000000000ebfff [disabled] 00000000000e8000-00000000000ebfff (prio 1, ram): alias pam-rom @pc.ram 00000000000e8000-00000000000ebfff [disabled] 00000000000e8000-00000000000ebfff (prio 1, i/o): alias pam-pci @pci 00000000000e8000-00000000000ebfff 00000000000ec000-00000000000effff (prio 1, ram): alias pam-ram @pc.ram 00000000000ec000-00000000000effff [disabled] 00000000000ec000-00000000000effff (prio 1, ram): alias pam-pci @pc.ram 00000000000ec000-00000000000effff [disabled] 00000000000ec000-00000000000effff (prio 1, ram): alias pam-rom @pc.ram 00000000000ec000-00000000000effff [disabled] 00000000000ec000-00000000000effff (prio 1, i/o): alias pam-pci @pci 00000000000ec000-00000000000effff 00000000000f0000-00000000000fffff (prio 1, ram): alias pam-ram @pc.ram 00000000000f0000-00000000000fffff [disabled] 00000000000f0000-00000000000fffff (prio 1, ram): alias pam-pci @pc.ram 00000000000f0000-00000000000fffff [disabled] 00000000000f0000-00000000000fffff (prio 1, ram): alias pam-rom @pc.ram 00000000000f0000-00000000000fffff [disabled] 00000000000f0000-00000000000fffff (prio 1, i/o): alias pam-pci @pci 00000000000f0000-00000000000fffff 00000000fec00000-00000000fec00fff (prio 0, i/o): ioapic 00000000fed00000-00000000fed003ff (prio 0, i/o): hpet 00000000fee00000-00000000feefffff (prio 4096, i/o): apic-msi After: (qemu) info mtree memory-region: system 0000000000000000-ffffffffffffffff (prio 0, i/o): system 0000000000000000-0000000007ffffff (prio 0, ram): alias ram-below-4g @pc.ram 0000000000000000-0000000007ffffff 0000000000000000-ffffffffffffffff (prio -1, i/o): pci 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom 00000000000e0000-00000000000fffff (prio 1, rom): alias isa-bios @pc.bios 0000000000020000-000000000003ffff 00000000fffc0000-00000000ffffffff (prio 0, rom): pc.bios 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff 00000000000c8000-00000000000cbfff (prio 1, i/o): alias pam-pci @pci 00000000000c8000-00000000000cbfff 00000000000cc000-00000000000cffff (prio 1, i/o): alias pam-pci @pci 00000000000cc000-00000000000cffff 00000000000d0000-00000000000d3fff (prio 1, i/o): alias pam-pci @pci 00000000000d0000-00000000000d3fff 00000000000d4000-00000000000d7fff (prio 1, i/o): alias pam-pci @pci 00000000000d4000-00000000000d7fff 00000000000d8000-00000000000dbfff (prio 1, i/o): alias pam-pci @pci 00000000000d8000-00000000000dbfff 00000000000dc000-00000000000dffff (prio 1, i/o): alias pam-pci @pci 00000000000dc000-00000000000dffff 00000000000e0000-00000000000e3fff (prio 1, i/o): alias pam-pci @pci 00000000000e0000-00000000000e3fff 00000000000e4000-00000000000e7fff (prio 1, i/o): alias pam-pci @pci 00000000000e4000-00000000000e7fff 00000000000e8000-00000000000ebfff (prio 1, i/o): alias pam-pci @pci 00000000000e8000-00000000000ebfff 00000000000ec000-00000000000effff (prio 1, i/o): alias pam-pci @pci 00000000000ec000-00000000000effff 00000000000f0000-00000000000fffff (prio 1, i/o): alias pam-pci @pci 00000000000f0000-00000000000fffff 00000000fec00000-00000000fec00fff (prio 0, i/o): ioapic 00000000fed00000-00000000fed003ff (prio 0, i/o): hpet 00000000fee00000-00000000feefffff (prio 4096, i/o): apic-msi The old behavior is preserved using 'info mtree -D'. Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- hmp-commands-info.hx | 7 ++--- include/exec/memory.h | 2 +- memory.c | 75 ++++++++++++++++++++++++++++----------------------- monitor/misc.c | 3 ++- 4 files changed, 48 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index ca5198438d..30209e3903 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -254,11 +254,12 @@ ERST { .name = "mtree", - .args_type = "flatview:-f,dispatch_tree:-d,owner:-o", - .params = "[-f][-d][-o]", + .args_type = "flatview:-f,dispatch_tree:-d,owner:-o,disabled:-D", + .params = "[-f][-d][-o][-D]", .help = "show memory tree (-f: dump flat view for address spaces;" "-d: dump dispatch tree, valid with -f only);" - "-o: dump region owners/parents", + "-o: dump region owners/parents;" + "-D: dump disabled regions", .cmd = hmp_info_mtree, }, diff --git a/include/exec/memory.h b/include/exec/memory.h index 48df5abe13..bd7fdd6081 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1984,7 +1984,7 @@ void memory_global_dirty_log_start(void); */ void memory_global_dirty_log_stop(void); -void mtree_info(bool flatview, bool dispatch_tree, bool owner); +void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled); /** * memory_region_dispatch_read: perform a read directly to the specified diff --git a/memory.c b/memory.c index 91ceaf9fcf..2f15a4b250 100644 --- a/memory.c +++ b/memory.c @@ -2882,7 +2882,7 @@ static void mtree_print_mr_owner(const MemoryRegion *mr) static void mtree_print_mr(const MemoryRegion *mr, unsigned int level, hwaddr base, MemoryRegionListHead *alias_print_queue, - bool owner) + bool owner, bool display_disabled) { MemoryRegionList *new_ml, *ml, *next_ml; MemoryRegionListHead submr_print_queue; @@ -2894,10 +2894,6 @@ static void mtree_print_mr(const MemoryRegion *mr, unsigned int level, return; } - for (i = 0; i < level; i++) { - qemu_printf(MTREE_INDENT); - } - cur_start = base + mr->addr; cur_end = cur_start + MR_SIZE(mr->size); @@ -2926,35 +2922,46 @@ static void mtree_print_mr(const MemoryRegion *mr, unsigned int level, ml->mr = mr->alias; QTAILQ_INSERT_TAIL(alias_print_queue, ml, mrqueue); } - qemu_printf(TARGET_FMT_plx "-" TARGET_FMT_plx - " (prio %d, %s%s): alias %s @%s " TARGET_FMT_plx - "-" TARGET_FMT_plx "%s", - cur_start, cur_end, - mr->priority, - mr->nonvolatile ? "nv-" : "", - memory_region_type((MemoryRegion *)mr), - memory_region_name(mr), - memory_region_name(mr->alias), - mr->alias_offset, - mr->alias_offset + MR_SIZE(mr->size), - mr->enabled ? "" : " [disabled]"); - if (owner) { - mtree_print_mr_owner(mr); + if (mr->enabled || display_disabled) { + for (i = 0; i < level; i++) { + qemu_printf(MTREE_INDENT); + } + qemu_printf(TARGET_FMT_plx "-" TARGET_FMT_plx + " (prio %d, %s%s): alias %s @%s " TARGET_FMT_plx + "-" TARGET_FMT_plx "%s", + cur_start, cur_end, + mr->priority, + mr->nonvolatile ? "nv-" : "", + memory_region_type((MemoryRegion *)mr), + memory_region_name(mr), + memory_region_name(mr->alias), + mr->alias_offset, + mr->alias_offset + MR_SIZE(mr->size), + mr->enabled ? "" : " [disabled]"); + if (owner) { + mtree_print_mr_owner(mr); + } + qemu_printf("\n"); } } else { - qemu_printf(TARGET_FMT_plx "-" TARGET_FMT_plx - " (prio %d, %s%s): %s%s", - cur_start, cur_end, - mr->priority, - mr->nonvolatile ? "nv-" : "", - memory_region_type((MemoryRegion *)mr), - memory_region_name(mr), - mr->enabled ? "" : " [disabled]"); - if (owner) { - mtree_print_mr_owner(mr); + if (mr->enabled || display_disabled) { + for (i = 0; i < level; i++) { + qemu_printf(MTREE_INDENT); + } + qemu_printf(TARGET_FMT_plx "-" TARGET_FMT_plx + " (prio %d, %s%s): %s%s", + cur_start, cur_end, + mr->priority, + mr->nonvolatile ? "nv-" : "", + memory_region_type((MemoryRegion *)mr), + memory_region_name(mr), + mr->enabled ? "" : " [disabled]"); + if (owner) { + mtree_print_mr_owner(mr); + } + qemu_printf("\n"); } } - qemu_printf("\n"); QTAILQ_INIT(&submr_print_queue); @@ -2977,7 +2984,7 @@ static void mtree_print_mr(const MemoryRegion *mr, unsigned int level, QTAILQ_FOREACH(ml, &submr_print_queue, mrqueue) { mtree_print_mr(ml->mr, level + 1, cur_start, - alias_print_queue, owner); + alias_print_queue, owner, display_disabled); } QTAILQ_FOREACH_SAFE(ml, &submr_print_queue, mrqueue, next_ml) { @@ -3088,7 +3095,7 @@ static gboolean mtree_info_flatview_free(gpointer key, gpointer value, return true; } -void mtree_info(bool flatview, bool dispatch_tree, bool owner) +void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled) { MemoryRegionListHead ml_head; MemoryRegionList *ml, *ml2; @@ -3136,14 +3143,14 @@ void mtree_info(bool flatview, bool dispatch_tree, bool owner) QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { qemu_printf("address-space: %s\n", as->name); - mtree_print_mr(as->root, 1, 0, &ml_head, owner); + mtree_print_mr(as->root, 1, 0, &ml_head, owner, disabled); qemu_printf("\n"); } /* print aliased regions */ QTAILQ_FOREACH(ml, &ml_head, mrqueue) { qemu_printf("memory-region: %s\n", memory_region_name(ml->mr)); - mtree_print_mr(ml->mr, 1, 0, &ml_head, owner); + mtree_print_mr(ml->mr, 1, 0, &ml_head, owner, disabled); qemu_printf("\n"); } diff --git a/monitor/misc.c b/monitor/misc.c index f5207cd242..89bb970b00 100644 --- a/monitor/misc.c +++ b/monitor/misc.c @@ -957,8 +957,9 @@ static void hmp_info_mtree(Monitor *mon, const QDict *qdict) bool flatview = qdict_get_try_bool(qdict, "flatview", false); bool dispatch_tree = qdict_get_try_bool(qdict, "dispatch_tree", false); bool owner = qdict_get_try_bool(qdict, "owner", false); + bool disabled = qdict_get_try_bool(qdict, "disabled", false); - mtree_info(flatview, dispatch_tree, owner); + mtree_info(flatview, dispatch_tree, owner, disabled); } #ifdef CONFIG_PROFILER -- cgit v1.2.3-55-g7522 From c08790f48b2f41ef26d491fd4d460b74c06cefe4 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Mon, 1 Jun 2020 11:38:08 +0200 Subject: qemu/thread: Mark qemu_thread_exit() with 'noreturn' attribute After upgrading to Ubuntu 20.04 LTS, GCC 9.3 complains: util/qemu-thread-posix.c: In function ‘qemu_thread_exit’: util/qemu-thread-posix.c:577:6: error: function might be candidate for attribute ‘noreturn’ [-Werror=suggest-attribute=noreturn] 577 | void qemu_thread_exit(void *retval) | ^~~~~~~~~~~~~~~~ Fix by marking the qemu_thread_exit function with QEMU_NORETURN to set the 'noreturn' attribute. Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- include/qemu/thread.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/qemu/thread.h b/include/qemu/thread.h index d22848138e..06c058fb58 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -177,7 +177,7 @@ void qemu_thread_create(QemuThread *thread, const char *name, void *qemu_thread_join(QemuThread *thread); void qemu_thread_get_self(QemuThread *thread); bool qemu_thread_is_self(QemuThread *thread); -void qemu_thread_exit(void *retval); +void qemu_thread_exit(void *retval) QEMU_NORETURN; void qemu_thread_naming(bool enable); struct Notifier; -- cgit v1.2.3-55-g7522 From 77f55eac6c433e23e82a1b88b2d74f385c4c7d82 Mon Sep 17 00:00:00 2001 From: Prasad J Pandit Date: Tue, 26 May 2020 16:47:43 +0530 Subject: exec: set map length to zero when returning NULL When mapping physical memory into host's virtual address space, 'address_space_map' may return NULL if BounceBuffer is in_use. Set and return '*plen = 0' to avoid later NULL pointer dereference. Reported-by: Alexander Bulekov Fixes: https://bugs.launchpad.net/qemu/+bug/1878259 Suggested-by: Paolo Bonzini Suggested-by: Peter Maydell Signed-off-by: Prasad J Pandit Message-Id: <20200526111743.428367-1-ppandit@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- exec.c | 1 + include/exec/memory.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/exec.c b/exec.c index 9cbde85d8c..778263f1c6 100644 --- a/exec.c +++ b/exec.c @@ -3540,6 +3540,7 @@ void *address_space_map(AddressSpace *as, if (!memory_access_is_direct(mr, is_write)) { if (atomic_xchg(&bounce.in_use, true)) { + *plen = 0; return NULL; } /* Avoid unbounded allocations */ diff --git a/include/exec/memory.h b/include/exec/memory.h index bd7fdd6081..af8ca7824e 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -2314,7 +2314,8 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, hwaddr len, /* address_space_map: map a physical memory region into a host virtual address * * May map a subset of the requested range, given by and returned in @plen. - * May return %NULL if resources needed to perform the mapping are exhausted. + * May return %NULL and set *@plen to zero(0), if resources needed to perform + * the mapping are exhausted. * Use only for reads OR writes - not for read-modify-write operations. * Use cpu_register_map_client() to know when retrying the map operation is * likely to succeed. -- cgit v1.2.3-55-g7522 From 33fb9bfaa4bef35b53affaf17368b439834de5f9 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Tue, 26 May 2020 19:24:21 +0200 Subject: sysemu/accel: Restrict machine methods to system-mode Restrict init_machine(), setup_post() and has_memory() to system-mode. Reviewed-by: Edgar E. Iglesias Reviewed-by: Cornelia Huck Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Roman Bolshakov Message-Id: <20200526172427.17460-2-f4bug@amsat.org> Signed-off-by: Paolo Bonzini --- include/sysemu/accel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sysemu/accel.h b/include/sysemu/accel.h index 47e5788530..e08b8ab8fa 100644 --- a/include/sysemu/accel.h +++ b/include/sysemu/accel.h @@ -37,10 +37,12 @@ typedef struct AccelClass { /*< public >*/ const char *name; +#ifndef CONFIG_USER_ONLY int (*init_machine)(MachineState *ms); void (*setup_post)(MachineState *ms, AccelState *accel); bool (*has_memory)(MachineState *ms, AddressSpace *as, hwaddr start_addr, hwaddr size); +#endif bool *allowed; /* * Array of global properties that would be applied when specific -- cgit v1.2.3-55-g7522 From ce4049e89344cc019fff75ab0ecc645af27400ca Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Tue, 26 May 2020 19:24:22 +0200 Subject: sysemu/tcg: Only declare tcg_allowed when TCG is available When TCG is not available, the tcg_allowed variable does not exist. Reviewed-by: Edgar E. Iglesias Reviewed-by: Cornelia Huck Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20200526172427.17460-3-f4bug@amsat.org> Signed-off-by: Paolo Bonzini --- include/sysemu/tcg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sysemu/tcg.h b/include/sysemu/tcg.h index 7d116d2e80..d9d3ca8559 100644 --- a/include/sysemu/tcg.h +++ b/include/sysemu/tcg.h @@ -8,9 +8,9 @@ #ifndef SYSEMU_TCG_H #define SYSEMU_TCG_H -extern bool tcg_allowed; void tcg_exec_init(unsigned long tb_size); #ifdef CONFIG_TCG +extern bool tcg_allowed; #define tcg_enabled() (tcg_allowed) #else #define tcg_enabled() 0 -- cgit v1.2.3-55-g7522 From f291cf54148e5b9e51c55b9056e4be546492a9ca Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Tue, 26 May 2020 19:24:23 +0200 Subject: sysemu/hvf: Only declare hvf_allowed when HVF is available When HVF is not available, the hvf_allowed variable does not exist. Reviewed-by: Edgar E. Iglesias Reviewed-by: Cornelia Huck Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Roman Bolshakov Message-Id: <20200526172427.17460-4-f4bug@amsat.org> Signed-off-by: Paolo Bonzini --- include/sysemu/hvf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h index d211e808e9..fe95743124 100644 --- a/include/sysemu/hvf.h +++ b/include/sysemu/hvf.h @@ -18,7 +18,6 @@ #include "exec/memory.h" #include "sysemu/accel.h" -extern bool hvf_allowed; #ifdef CONFIG_HVF #include #include @@ -26,11 +25,12 @@ extern bool hvf_allowed; #include "target/i386/cpu.h" uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, int reg); +extern bool hvf_allowed; #define hvf_enabled() (hvf_allowed) -#else +#else /* !CONFIG_HVF */ #define hvf_enabled() 0 #define hvf_get_supported_cpuid(func, idx, reg) 0 -#endif +#endif /* !CONFIG_HVF */ /* hvf_slot flags */ #define HVF_SLOT_LOG (1 << 0) -- cgit v1.2.3-55-g7522 From 24115348bd8a8a1206646b81990e451ee841c8e5 Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Thu, 28 May 2020 22:37:46 +0300 Subject: i386: hvf: Move HVFState definition into hvf "sysemu/hvf.h" is intended for inclusion in generic code. However it also contains several hvf definitions and declarations, including HVFState that are used only inside "hvf.c". "hvf-i386.h" would be more appropriate place to define HVFState as it's only included by "hvf.c" and "x86_task.c". Signed-off-by: Roman Bolshakov Message-Id: <20200528193758.51454-2-r.bolshakov@yadro.com> Signed-off-by: Paolo Bonzini --- include/sysemu/hvf.h | 37 ------------------------------------- target/i386/hvf/hvf-i386.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h index fe95743124..644bdfc722 100644 --- a/include/sysemu/hvf.h +++ b/include/sysemu/hvf.h @@ -15,8 +15,6 @@ #include "cpu.h" #include "qemu/bitops.h" -#include "exec/memory.h" -#include "sysemu/accel.h" #ifdef CONFIG_HVF #include @@ -32,41 +30,6 @@ extern bool hvf_allowed; #define hvf_get_supported_cpuid(func, idx, reg) 0 #endif /* !CONFIG_HVF */ -/* hvf_slot flags */ -#define HVF_SLOT_LOG (1 << 0) - -typedef struct hvf_slot { - uint64_t start; - uint64_t size; - uint8_t *mem; - int slot_id; - uint32_t flags; - MemoryRegion *region; -} hvf_slot; - -typedef struct hvf_vcpu_caps { - uint64_t vmx_cap_pinbased; - uint64_t vmx_cap_procbased; - uint64_t vmx_cap_procbased2; - uint64_t vmx_cap_entry; - uint64_t vmx_cap_exit; - uint64_t vmx_cap_preemption_timer; -} hvf_vcpu_caps; - -typedef struct HVFState { - AccelState parent; - hvf_slot slots[32]; - int num_slots; - - hvf_vcpu_caps *hvf_caps; -} HVFState; -extern HVFState *hvf_state; - -void hvf_set_phys_mem(MemoryRegionSection *, bool); -void hvf_handle_io(CPUArchState *, uint16_t, void *, - int, int, int); -hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t); - /* Disable HVF if |disable| is 1, otherwise, enable it iff it is supported by * the host CPU. Use hvf_enabled() after this to get the result. */ void hvf_disable(int disable); diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h index fbe4a350c5..ef20c73eca 100644 --- a/target/i386/hvf/hvf-i386.h +++ b/target/i386/hvf/hvf-i386.h @@ -16,6 +16,7 @@ #ifndef HVF_I386_H #define HVF_I386_H +#include "sysemu/accel.h" #include "sysemu/hvf.h" #include "cpu.h" #include "x86.h" @@ -35,6 +36,40 @@ struct hvf_state { uint64_t mem_quota; }; +/* hvf_slot flags */ +#define HVF_SLOT_LOG (1 << 0) + +typedef struct hvf_slot { + uint64_t start; + uint64_t size; + uint8_t *mem; + int slot_id; + uint32_t flags; + MemoryRegion *region; +} hvf_slot; + +typedef struct hvf_vcpu_caps { + uint64_t vmx_cap_pinbased; + uint64_t vmx_cap_procbased; + uint64_t vmx_cap_procbased2; + uint64_t vmx_cap_entry; + uint64_t vmx_cap_exit; + uint64_t vmx_cap_preemption_timer; +} hvf_vcpu_caps; + +typedef struct HVFState { + AccelState parent; + hvf_slot slots[32]; + int num_slots; + + hvf_vcpu_caps *hvf_caps; +} HVFState; +extern HVFState *hvf_state; + +void hvf_set_phys_mem(MemoryRegionSection *, bool); +void hvf_handle_io(CPUArchState *, uint16_t, void *, int, int, int); +hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t); + #ifdef NEED_CPU_H /* Functions exported to host specific mode */ -- cgit v1.2.3-55-g7522 From 583ae161b1d7ba8832260d1cca022afce7dcf957 Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Thu, 28 May 2020 22:37:47 +0300 Subject: i386: hvf: Drop useless declarations in sysemu They're either declared elsewhere or have no use. While at it, rename _hvf_cpu_synchronize_post_init() to do_hvf_cpu_synchronize_post_init(). Signed-off-by: Roman Bolshakov Message-Id: <20200528193758.51454-3-r.bolshakov@yadro.com> Signed-off-by: Paolo Bonzini Reviewed-by: Philippe Mathieu-Daudé --- include/sysemu/hvf.h | 22 ---------------------- target/i386/hvf/hvf.c | 7 ++++--- 2 files changed, 4 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h index 644bdfc722..2af32e505e 100644 --- a/include/sysemu/hvf.h +++ b/include/sysemu/hvf.h @@ -30,35 +30,13 @@ extern bool hvf_allowed; #define hvf_get_supported_cpuid(func, idx, reg) 0 #endif /* !CONFIG_HVF */ -/* Disable HVF if |disable| is 1, otherwise, enable it iff it is supported by - * the host CPU. Use hvf_enabled() after this to get the result. */ -void hvf_disable(int disable); - -/* Returns non-0 if the host CPU supports the VMX "unrestricted guest" feature - * which allows the virtual CPU to directly run in "real mode". If true, this - * allows QEMU to run several vCPU threads in parallel (see cpus.c). Otherwise, - * only a a single TCG thread can run, and it will call HVF to run the current - * instructions, except in case of "real mode" (paging disabled, typically at - * boot time), or MMIO operations. */ - -int hvf_sync_vcpus(void); - int hvf_init_vcpu(CPUState *); int hvf_vcpu_exec(CPUState *); -int hvf_smp_cpu_exec(CPUState *); void hvf_cpu_synchronize_state(CPUState *); void hvf_cpu_synchronize_post_reset(CPUState *); void hvf_cpu_synchronize_post_init(CPUState *); -void _hvf_cpu_synchronize_post_init(CPUState *, run_on_cpu_data); - void hvf_vcpu_destroy(CPUState *); -void hvf_raise_event(CPUState *); -/* void hvf_reset_vcpu_state(void *opaque); */ void hvf_reset_vcpu(CPUState *); -void vmx_update_tpr(CPUState *); -void update_apic_tpr(CPUState *); -int hvf_put_registers(CPUState *); -void vmx_clear_int_window_exiting(CPUState *cpu); #define TYPE_HVF_ACCEL ACCEL_CLASS_NAME("hvf") diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index d72543dc31..9ccdb7e7c7 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -251,7 +251,7 @@ void vmx_update_tpr(CPUState *cpu) } } -void update_apic_tpr(CPUState *cpu) +static void update_apic_tpr(CPUState *cpu) { X86CPU *x86_cpu = X86_CPU(cpu); int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4; @@ -312,7 +312,8 @@ void hvf_cpu_synchronize_post_reset(CPUState *cpu_state) run_on_cpu(cpu_state, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); } -void _hvf_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) +static void do_hvf_cpu_synchronize_post_init(CPUState *cpu, + run_on_cpu_data arg) { CPUState *cpu_state = cpu; hvf_put_registers(cpu_state); @@ -321,7 +322,7 @@ void _hvf_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) void hvf_cpu_synchronize_post_init(CPUState *cpu_state) { - run_on_cpu(cpu_state, _hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL); + run_on_cpu(cpu_state, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL); } static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) -- cgit v1.2.3-55-g7522 From 8598135dd6df2b2ad2ce10f68d9eae3e9e26da6d Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Thu, 28 May 2020 22:37:48 +0300 Subject: i386: hvf: Clean stray includes in sysemu They have no use. Signed-off-by: Roman Bolshakov Reviewed-by: Claudio Fontana Message-Id: <20200528193758.51454-4-r.bolshakov@yadro.com> Signed-off-by: Paolo Bonzini --- include/sysemu/hvf.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h index 2af32e505e..5214ed5202 100644 --- a/include/sysemu/hvf.h +++ b/include/sysemu/hvf.h @@ -13,14 +13,7 @@ #ifndef HVF_H #define HVF_H -#include "cpu.h" -#include "qemu/bitops.h" - #ifdef CONFIG_HVF -#include -#include -#include -#include "target/i386/cpu.h" uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, int reg); extern bool hvf_allowed; -- cgit v1.2.3-55-g7522 From e77cb0bb204c18c04a8290e03181510bbbfc683a Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Thu, 28 May 2020 22:37:58 +0300 Subject: i386: hvf: Drop HVFX86EmulatorState Signed-off-by: Roman Bolshakov Message-Id: <20200528193758.51454-14-r.bolshakov@yadro.com> Signed-off-by: Paolo Bonzini --- include/qemu/typedefs.h | 1 - target/i386/cpu.h | 1 - target/i386/hvf/hvf.c | 1 - target/i386/hvf/x86.h | 4 ---- 4 files changed, 7 deletions(-) (limited to 'include') diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index de68d51d52..ce4a78b687 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -51,7 +51,6 @@ typedef struct FWCfgIoState FWCfgIoState; typedef struct FWCfgMemState FWCfgMemState; typedef struct FWCfgState FWCfgState; typedef struct HostMemoryBackend HostMemoryBackend; -typedef struct HVFX86EmulatorState HVFX86EmulatorState; typedef struct I2CBus I2CBus; typedef struct I2SCodec I2SCodec; typedef struct IOMMUMemoryRegion IOMMUMemoryRegion; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 25a2f4c0c3..7d77efd9e4 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1604,7 +1604,6 @@ typedef struct CPUX86State { #if defined(CONFIG_HVF) HVFX86LazyFlags hvf_lflags; void *hvf_mmio_buf; - HVFX86EmulatorState *hvf_emul; #endif uint64_t mcg_cap; diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 57696c46c7..be016b951a 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -568,7 +568,6 @@ int hvf_init_vcpu(CPUState *cpu) hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1); env->hvf_mmio_buf = g_new(char, 4096); - env->hvf_emul = g_new0(HVFX86EmulatorState, 1); r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT); cpu->vcpu_dirty = 1; diff --git a/target/i386/hvf/x86.h b/target/i386/hvf/x86.h index 483fcea762..bacade7b65 100644 --- a/target/i386/hvf/x86.h +++ b/target/i386/hvf/x86.h @@ -228,10 +228,6 @@ typedef struct x68_segment_selector { }; } __attribute__ ((__packed__)) x68_segment_selector; -/* Definition of hvf_x86_state is here */ -struct HVFX86EmulatorState { -}; - /* useful register access macros */ #define x86_reg(cpu, reg) ((x86_register *) &cpu->regs[reg]) -- cgit v1.2.3-55-g7522 From e1bc61989264a37aeffefa1fb3cf100db259b35b Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Mon, 1 Jun 2020 16:15:34 +0200 Subject: exec/memory: Remove unused MemoryRegionMmio type Since commit 62a0db942dec ('memory: Remove old_mmio accessors') this structure is unused. Remove it. Suggested-by: Peter Maydell Reviewed-by: Peter Maydell Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-Id: <20200601141536.15192-2-f4bug@amsat.org> Signed-off-by: Paolo Bonzini --- include/exec/memory.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/exec/memory.h b/include/exec/memory.h index af8ca7824e..7207025bd4 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -50,12 +50,6 @@ extern bool global_dirty_log; typedef struct MemoryRegionOps MemoryRegionOps; -typedef struct MemoryRegionMmio MemoryRegionMmio; - -struct MemoryRegionMmio { - CPUReadMemoryFunc *read[3]; - CPUWriteMemoryFunc *write[3]; -}; typedef struct IOMMUTLBEntry IOMMUTLBEntry; -- cgit v1.2.3-55-g7522 From 2c89d91195c4c7a118d2ae3518c31ca77a0583cd Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Mon, 1 Jun 2020 16:15:35 +0200 Subject: hw/usb: Move device-specific declarations to new 'hcd-musb.h' header Move the declarations for the MUSB-HDRC USB2.0 OTG compliant core into a separate header. Reviewed-by: Peter Maydell Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-Id: <20200601141536.15192-3-f4bug@amsat.org> Signed-off-by: Paolo Bonzini --- hw/usb/hcd-musb.c | 1 + hw/usb/tusb6010.c | 1 + include/hw/usb.h | 30 ------------------------------ include/hw/usb/hcd-musb.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 30 deletions(-) create mode 100644 include/hw/usb/hcd-musb.h (limited to 'include') diff --git a/hw/usb/hcd-musb.c b/hw/usb/hcd-musb.c index c29fbef6fc..5ab13feb3a 100644 --- a/hw/usb/hcd-musb.c +++ b/hw/usb/hcd-musb.c @@ -23,6 +23,7 @@ #include "qemu/osdep.h" #include "qemu/timer.h" #include "hw/usb.h" +#include "hw/usb/hcd-musb.h" #include "hw/irq.h" #include "hw/hw.h" diff --git a/hw/usb/tusb6010.c b/hw/usb/tusb6010.c index 17580876c6..27eb28d3e4 100644 --- a/hw/usb/tusb6010.c +++ b/hw/usb/tusb6010.c @@ -23,6 +23,7 @@ #include "qemu/module.h" #include "qemu/timer.h" #include "hw/usb.h" +#include "hw/usb/hcd-musb.h" #include "hw/arm/omap.h" #include "hw/hw.h" #include "hw/irq.h" diff --git a/include/hw/usb.h b/include/hw/usb.h index 1cf1cd9584..e2128c7c45 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h @@ -474,36 +474,6 @@ bool usb_host_dev_is_scsi_storage(USBDevice *usbdev); #define VM_USB_HUB_SIZE 8 -/* hw/usb/hdc-musb.c */ - -enum musb_irq_source_e { - musb_irq_suspend = 0, - musb_irq_resume, - musb_irq_rst_babble, - musb_irq_sof, - musb_irq_connect, - musb_irq_disconnect, - musb_irq_vbus_request, - musb_irq_vbus_error, - musb_irq_rx, - musb_irq_tx, - musb_set_vbus, - musb_set_session, - /* Add new interrupts here */ - musb_irq_max, /* total number of interrupts defined */ -}; - -typedef struct MUSBState MUSBState; - -extern CPUReadMemoryFunc * const musb_read[]; -extern CPUWriteMemoryFunc * const musb_write[]; - -MUSBState *musb_init(DeviceState *parent_device, int gpio_base); -void musb_reset(MUSBState *s); -uint32_t musb_core_intr_get(MUSBState *s); -void musb_core_intr_clear(MUSBState *s, uint32_t mask); -void musb_set_size(MUSBState *s, int epnum, int size, int is_tx); - /* usb-bus.c */ #define TYPE_USB_BUS "usb-bus" diff --git a/include/hw/usb/hcd-musb.h b/include/hw/usb/hcd-musb.h new file mode 100644 index 0000000000..26b50132ff --- /dev/null +++ b/include/hw/usb/hcd-musb.h @@ -0,0 +1,46 @@ +/* + * "Inventra" High-speed Dual-Role Controller (MUSB-HDRC), Mentor Graphics, + * USB2.0 OTG compliant core used in various chips. + * + * Only host-mode and non-DMA accesses are currently supported. + * + * Copyright (C) 2008 Nokia Corporation + * Written by Andrzej Zaborowski + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_USB_MUSB_H +#define HW_USB_MUSB_H + +#include "exec/cpu-common.h" + +enum musb_irq_source_e { + musb_irq_suspend = 0, + musb_irq_resume, + musb_irq_rst_babble, + musb_irq_sof, + musb_irq_connect, + musb_irq_disconnect, + musb_irq_vbus_request, + musb_irq_vbus_error, + musb_irq_rx, + musb_irq_tx, + musb_set_vbus, + musb_set_session, + /* Add new interrupts here */ + musb_irq_max /* total number of interrupts defined */ +}; + +extern CPUReadMemoryFunc * const musb_read[]; +extern CPUWriteMemoryFunc * const musb_write[]; + +typedef struct MUSBState MUSBState; + +MUSBState *musb_init(DeviceState *parent_device, int gpio_base); +void musb_reset(MUSBState *s); +uint32_t musb_core_intr_get(MUSBState *s); +void musb_core_intr_clear(MUSBState *s, uint32_t mask); +void musb_set_size(MUSBState *s, int epnum, int size, int is_tx); + +#endif -- cgit v1.2.3-55-g7522 From efb22b2f98975785aa594e5a198d4c2e13a5d2d0 Mon Sep 17 00:00:00 2001 From: Philippe Mathieu-Daudé Date: Mon, 1 Jun 2020 16:15:36 +0200 Subject: exec/cpu-common: Move MUSB specific typedefs to 'hw/usb/hcd-musb.h' The CPUReadMemoryFunc/CPUWriteMemoryFunc typedefs are legacy remnant from before the conversion to MemoryRegions. Since they are now only used in tusb6010.c and hcd-musb.c, move them to "hw/usb/musb.h" and rename them appropriately. Suggested-by: Peter Maydell Reviewed-by: Peter Maydell Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-Id: <20200601141536.15192-4-f4bug@amsat.org> Signed-off-by: Paolo Bonzini --- hw/usb/hcd-musb.c | 4 ++-- include/exec/cpu-common.h | 3 --- include/hw/usb/hcd-musb.h | 9 +++++---- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/hw/usb/hcd-musb.c b/hw/usb/hcd-musb.c index 5ab13feb3a..85f5ff5bd4 100644 --- a/hw/usb/hcd-musb.c +++ b/hw/usb/hcd-musb.c @@ -1540,13 +1540,13 @@ static void musb_writew(void *opaque, hwaddr addr, uint32_t value) }; } -CPUReadMemoryFunc * const musb_read[] = { +MUSBReadFunc * const musb_read[] = { musb_readb, musb_readh, musb_readw, }; -CPUWriteMemoryFunc * const musb_write[] = { +MUSBWriteFunc * const musb_write[] = { musb_writeb, musb_writeh, musb_writew, diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index b47e5630e7..d5e285d2b5 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -43,9 +43,6 @@ extern ram_addr_t ram_size; /* memory API */ -typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value); -typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr); - void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); /* This should not be used by devices. */ ram_addr_t qemu_ram_addr_from_host(void *ptr); diff --git a/include/hw/usb/hcd-musb.h b/include/hw/usb/hcd-musb.h index 26b50132ff..c874b9f292 100644 --- a/include/hw/usb/hcd-musb.h +++ b/include/hw/usb/hcd-musb.h @@ -13,8 +13,6 @@ #ifndef HW_USB_MUSB_H #define HW_USB_MUSB_H -#include "exec/cpu-common.h" - enum musb_irq_source_e { musb_irq_suspend = 0, musb_irq_resume, @@ -32,8 +30,11 @@ enum musb_irq_source_e { musb_irq_max /* total number of interrupts defined */ }; -extern CPUReadMemoryFunc * const musb_read[]; -extern CPUWriteMemoryFunc * const musb_write[]; +/* TODO convert hcd-musb to QOM/qdev and remove MUSBReadFunc/MUSBWriteFunc */ +typedef void MUSBWriteFunc(void *opaque, hwaddr addr, uint32_t value); +typedef uint32_t MUSBReadFunc(void *opaque, hwaddr addr); +extern MUSBReadFunc * const musb_read[]; +extern MUSBWriteFunc * const musb_write[]; typedef struct MUSBState MUSBState; -- cgit v1.2.3-55-g7522