diff options
author | Michael Brown | 2014-12-11 18:23:45 +0100 |
---|---|---|
committer | Michael Brown | 2014-12-18 17:27:37 +0100 |
commit | d1894970dbb727805acffd9320e3755fab289fe1 (patch) | |
tree | 26bc88c7e09b04475252f47a05bce46d33c64a6b | |
parent | [hyperv] Add support for Hyper-V hypervisor (diff) | |
download | ipxe-d1894970dbb727805acffd9320e3755fab289fe1.tar.gz ipxe-d1894970dbb727805acffd9320e3755fab289fe1.tar.xz ipxe-d1894970dbb727805acffd9320e3755fab289fe1.zip |
[hyperv] Add support for VMBus devices
Add support for an abstraction of a VMBus device.
Signed-off-by: Michael Brown <mcb30@ipxe.org>
-rw-r--r-- | src/Makefile | 1 | ||||
-rw-r--r-- | src/arch/x86/drivers/hyperv/hyperv.c | 8 | ||||
-rw-r--r-- | src/include/ipxe/device.h | 3 | ||||
-rw-r--r-- | src/include/ipxe/errfile.h | 1 | ||||
-rw-r--r-- | src/include/ipxe/hyperv.h | 2 | ||||
-rw-r--r-- | src/include/ipxe/vmbus.h | 629 | ||||
-rw-r--r-- | src/interface/hyperv/vmbus.c | 1261 |
7 files changed, 1905 insertions, 0 deletions
diff --git a/src/Makefile b/src/Makefile index b742d128..66d804b1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -86,6 +86,7 @@ SRCDIRS += drivers/infiniband SRCDIRS += interface/pxe interface/efi interface/smbios SRCDIRS += interface/bofm SRCDIRS += interface/xen +SRCDIRS += interface/hyperv SRCDIRS += tests SRCDIRS += crypto crypto/axtls crypto/matrixssl SRCDIRS += hci hci/commands hci/tui diff --git a/src/arch/x86/drivers/hyperv/hyperv.c b/src/arch/x86/drivers/hyperv/hyperv.c index bd68d051..39054407 100644 --- a/src/arch/x86/drivers/hyperv/hyperv.c +++ b/src/arch/x86/drivers/hyperv/hyperv.c @@ -38,6 +38,7 @@ FILE_LICENCE ( GPL2_OR_LATER ); #include <ipxe/cpuid.h> #include <ipxe/msr.h> #include <ipxe/hyperv.h> +#include <ipxe/vmbus.h> #include "hyperv.h" /** Maximum time to wait for a message response @@ -506,9 +507,15 @@ static int hv_probe ( struct root_device *rootdev ) { if ( ( rc = hv_map_synic ( hv ) ) != 0 ) goto err_map_synic; + /* Probe Hyper-V devices */ + if ( ( rc = vmbus_probe ( hv, &rootdev->dev ) ) != 0 ) + goto err_vmbus_probe; + rootdev_set_drvdata ( rootdev, hv ); return 0; + vmbus_remove ( hv, &rootdev->dev ); + err_vmbus_probe: hv_unmap_synic ( hv ); err_map_synic: hv_unmap_hypercall ( hv ); @@ -532,6 +539,7 @@ static int hv_probe ( struct root_device *rootdev ) { static void hv_remove ( struct root_device *rootdev ) { struct hv_hypervisor *hv = rootdev_get_drvdata ( rootdev ); + vmbus_remove ( hv, &rootdev->dev ); hv_unmap_synic ( hv ); hv_unmap_hypercall ( hv ); hv_free_message ( hv ); diff --git a/src/include/ipxe/device.h b/src/include/ipxe/device.h index d1420b1a..fd17b252 100644 --- a/src/include/ipxe/device.h +++ b/src/include/ipxe/device.h @@ -63,6 +63,9 @@ struct device_description { /** Xen bus type */ #define BUS_TYPE_XEN 8 +/** Hyper-V bus type */ +#define BUS_TYPE_HV 9 + /** A hardware device */ struct device { /** Name */ diff --git a/src/include/ipxe/errfile.h b/src/include/ipxe/errfile.h index fe20b516..a8babdf3 100644 --- a/src/include/ipxe/errfile.h +++ b/src/include/ipxe/errfile.h @@ -309,6 +309,7 @@ FILE_LICENCE ( GPL2_OR_LATER ); #define ERRFILE_xengrant ( ERRFILE_OTHER | 0x00440000 ) #define ERRFILE_efi_utils ( ERRFILE_OTHER | 0x00450000 ) #define ERRFILE_efi_wrap ( ERRFILE_OTHER | 0x00460000 ) +#define ERRFILE_vmbus ( ERRFILE_OTHER | 0x00470000 ) /** @} */ diff --git a/src/include/ipxe/hyperv.h b/src/include/ipxe/hyperv.h index 10be1ca8..4605b57e 100644 --- a/src/include/ipxe/hyperv.h +++ b/src/include/ipxe/hyperv.h @@ -197,6 +197,8 @@ struct hv_hypervisor { struct hv_synic synic; /** Message buffer */ union hv_message_buffer *message; + /** Virtual machine bus */ + struct vmbus *vmbus; }; #include <bits/hyperv.h> diff --git a/src/include/ipxe/vmbus.h b/src/include/ipxe/vmbus.h new file mode 100644 index 00000000..14c66a5d --- /dev/null +++ b/src/include/ipxe/vmbus.h @@ -0,0 +1,629 @@ +#ifndef _IPXE_VMBUS_H +#define _IPXE_VMBUS_H + +/** @file + * + * Hyper-V virtual machine bus + * + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include <byteswap.h> +#include <ipxe/uuid.h> +#include <ipxe/device.h> +#include <ipxe/tables.h> +#include <ipxe/uaccess.h> +#include <ipxe/iobuf.h> +#include <ipxe/hyperv.h> + +/** VMBus message connection ID */ +#define VMBUS_MESSAGE_ID 1 + +/** VMBus event connection ID */ +#define VMBUS_EVENT_ID 2 + +/** VMBus message type */ +#define VMBUS_MESSAGE_TYPE 1 + +/** VMBus message synthetic interrupt */ +#define VMBUS_MESSAGE_SINT 2 + +/** VMBus version number */ +union vmbus_version { + /** Raw version */ + uint32_t raw; + /** Major/minor version */ + struct { + /** Minor version */ + uint16_t minor; + /** Major version */ + uint16_t major; + }; +} __attribute__ (( packed )); + +/** Oldest known VMBus protocol version (Windows Server 2008) */ +#define VMBUS_VERSION_WS2008 ( ( 0 << 16 ) | ( 13 << 0 ) ) + +/** Guest physical address range descriptor */ +struct vmbus_gpa_range { + /** Byte count */ + uint32_t len; + /** Starting byte offset */ + uint32_t offset; + /** Page frame numbers + * + * The length of this array is implied by the byte count and + * starting offset. + */ + uint64_t pfn[0]; +} __attribute__ (( packed )); + +/** VMBus message header */ +struct vmbus_message_header { + /** Message type */ + uint32_t type; + /** Reserved */ + uint32_t reserved; +} __attribute__ (( packed )); + +/** VMBus message types */ +enum vmbus_message_type { + VMBUS_OFFER_CHANNEL = 1, + VMBUS_REQUEST_OFFERS = 3, + VMBUS_ALL_OFFERS_DELIVERED = 4, + VMBUS_OPEN_CHANNEL = 5, + VMBUS_OPEN_CHANNEL_RESULT = 6, + VMBUS_CLOSE_CHANNEL = 7, + VMBUS_GPADL_HEADER = 8, + VMBUS_GPADL_CREATED = 10, + VMBUS_GPADL_TEARDOWN = 11, + VMBUS_GPADL_TORNDOWN = 12, + VMBUS_INITIATE_CONTACT = 14, + VMBUS_VERSION_RESPONSE = 15, + VMBUS_UNLOAD = 16, +}; + +/** VMBus "offer channel" message */ +struct vmbus_offer_channel { + /** Message header */ + struct vmbus_message_header header; + /** Channel type */ + union uuid type; + /** Channel instance */ + union uuid instance; + /** Reserved */ + uint8_t reserved_a[16]; + /** Flags */ + uint16_t flags; + /** Reserved */ + uint8_t reserved_b[2]; + /** User data */ + uint8_t data[120]; + /** Reserved */ + uint8_t reserved_c[4]; + /** Channel ID */ + uint32_t channel; + /** Monitor ID */ + uint8_t monitor; + /** Monitor exists */ + uint8_t monitored; + /** Reserved */ + uint8_t reserved[2]; + /** Connection ID */ + uint32_t connection; +} __attribute__ (( packed )); + +/** VMBus "open channel" message */ +struct vmbus_open_channel { + /** Message header */ + struct vmbus_message_header header; + /** Channel ID */ + uint32_t channel; + /** Open ID */ + uint32_t id; + /** Ring buffer GPADL ID */ + uint32_t gpadl; + /** Reserved */ + uint32_t reserved; + /** Outbound ring buffer size (in pages) */ + uint32_t out_pages; + /** User-specific data */ + uint8_t data[120]; +} __attribute__ (( packed )); + +/** VMBus "open channel result" message */ +struct vmbus_open_channel_result { + /** Message header */ + struct vmbus_message_header header; + /** Channel ID */ + uint32_t channel; + /** Open ID */ + uint32_t id; + /** Status */ + uint32_t status; +} __attribute__ (( packed )); + +/** VMBus "close channel" message */ +struct vmbus_close_channel { + /** Message header */ + struct vmbus_message_header header; + /** Channel ID */ + uint32_t channel; +} __attribute__ (( packed )); + +/** VMBus "GPADL header" message */ +struct vmbus_gpadl_header { + /** Message header */ + struct vmbus_message_header header; + /** Channel ID */ + uint32_t channel; + /** GPADL ID */ + uint32_t gpadl; + /** Length of range descriptors */ + uint16_t range_len; + /** Number of range descriptors */ + uint16_t range_count; + /** Range descriptors */ + struct vmbus_gpa_range range[0]; +} __attribute__ (( packed )); + +/** VMBus "GPADL created" message */ +struct vmbus_gpadl_created { + /** Message header */ + struct vmbus_message_header header; + /** Channel ID */ + uint32_t channel; + /** GPADL ID */ + uint32_t gpadl; + /** Creation status */ + uint32_t status; +} __attribute__ (( packed )); + +/** VMBus "GPADL teardown" message */ +struct vmbus_gpadl_teardown { + /** Message header */ + struct vmbus_message_header header; + /** Channel ID */ + uint32_t channel; + /** GPADL ID */ + uint32_t gpadl; +} __attribute__ (( packed )); + +/** VMBus "GPADL torndown" message */ +struct vmbus_gpadl_torndown { + /** Message header */ + struct vmbus_message_header header; + /** GPADL ID */ + uint32_t gpadl; +} __attribute__ (( packed )); + +/** VMBus "initiate contact" message */ +struct vmbus_initiate_contact { + /** Message header */ + struct vmbus_message_header header; + /** Requested version */ + union vmbus_version version; + /** Target virtual CPU */ + uint32_t vcpu; + /** Interrupt page base address */ + uint64_t intr; + /** Parent to child monitor page base address */ + uint64_t monitor_in; + /** Child to parent monitor page base address */ + uint64_t monitor_out; +} __attribute__ (( packed )); + +/** VMBus "version response" message */ +struct vmbus_version_response { + /** Message header */ + struct vmbus_message_header header; + /** Version is supported */ + uint8_t supported; + /** Reserved */ + uint8_t reserved[3]; + /** Version */ + union vmbus_version version; +} __attribute__ (( packed )); + +/** VMBus message */ +union vmbus_message { + /** Common message header */ + struct vmbus_message_header header; + /** "Offer channel" message */ + struct vmbus_offer_channel offer; + /** "Open channel" message */ + struct vmbus_open_channel open; + /** "Open channel result" message */ + struct vmbus_open_channel_result opened; + /** "Close channel" message */ + struct vmbus_close_channel close; + /** "GPADL header" message */ + struct vmbus_gpadl_header gpadlhdr; + /** "GPADL created" message */ + struct vmbus_gpadl_created created; + /** "GPADL teardown" message */ + struct vmbus_gpadl_teardown teardown; + /** "GPADL torndown" message */ + struct vmbus_gpadl_torndown torndown; + /** "Initiate contact" message */ + struct vmbus_initiate_contact initiate; + /** "Version response" message */ + struct vmbus_version_response version; +}; + +/** VMBus packet header */ +struct vmbus_packet_header { + /** Type */ + uint16_t type; + /** Length of packet header (in quadwords) */ + uint16_t hdr_qlen; + /** Length of packet (in quadwords) */ + uint16_t qlen; + /** Flags */ + uint16_t flags; + /** Transaction ID + * + * This is an opaque token: we therefore treat it as + * native-endian and don't worry about byte-swapping. + */ + uint64_t xid; +} __attribute__ (( packed )); + +/** VMBus packet types */ +enum vmbus_packet_type { + VMBUS_DATA_INBAND = 6, + VMBUS_DATA_XFER_PAGES = 7, + VMBUS_DATA_GPA_DIRECT = 9, + VMBUS_CANCELLATION = 10, + VMBUS_COMPLETION = 11, +}; + +/** VMBus packet flags */ +enum vmbus_packet_flags { + VMBUS_COMPLETION_REQUESTED = 0x0001, +}; + +/** VMBus GPA direct header */ +struct vmbus_gpa_direct_header { + /** Packet header */ + struct vmbus_packet_header header; + /** Reserved */ + uint32_t reserved; + /** Number of range descriptors */ + uint32_t range_count; + /** Range descriptors */ + struct vmbus_gpa_range range[0]; +} __attribute__ (( packed )); + +/** VMBus transfer page range */ +struct vmbus_xfer_page_range { + /** Length */ + uint32_t len; + /** Offset */ + uint32_t offset; +} __attribute__ (( packed )); + +/** VMBus transfer page header */ +struct vmbus_xfer_page_header { + /** Packet header */ + struct vmbus_packet_header header; + /** Page set ID */ + uint16_t pageset; + /** Sender owns page set */ + uint8_t owner; + /** Reserved */ + uint8_t reserved; + /** Number of range descriptors */ + uint32_t range_count; + /** Range descriptors */ + struct vmbus_xfer_page_range range[0]; +} __attribute__ (( packed )); + +/** Maximum expected size of VMBus packet header */ +#define VMBUS_PACKET_MAX_HEADER_LEN 64 + +/** VMBus maximum-sized packet header */ +union vmbus_packet_header_max { + /** Common header */ + struct vmbus_packet_header header; + /** GPA direct header */ + struct vmbus_gpa_direct_header gpa; + /** Transfer page header */ + struct vmbus_xfer_page_header xfer; + /** Padding to maximum supported size */ + uint8_t padding[VMBUS_PACKET_MAX_HEADER_LEN]; +} __attribute__ (( packed )); + +/** VMBus packet footer */ +struct vmbus_packet_footer { + /** Reserved */ + uint32_t reserved; + /** Producer index of the first byte of the packet */ + uint32_t prod; +} __attribute__ (( packed )); + +/** VMBus ring buffer + * + * This is the structure of the each of the ring buffers created when + * a VMBus channel is opened. + */ +struct vmbus_ring { + /** Producer index (modulo ring length) */ + uint32_t prod; + /** Consumer index (modulo ring length) */ + uint32_t cons; + /** Interrupt mask */ + uint32_t intr_mask; + /** Reserved */ + uint8_t reserved[4084]; + /** Ring buffer contents */ + uint8_t data[0]; +} __attribute__ (( packed )); + +/** VMBus interrupt page */ +struct vmbus_interrupt { + /** Inbound interrupts */ + uint8_t in[ PAGE_SIZE / 2 ]; + /** Outbound interrupts */ + uint8_t out[ PAGE_SIZE / 2 ]; +} __attribute__ (( packed )); + +/** A virtual machine bus */ +struct vmbus { + /** Interrupt page */ + struct vmbus_interrupt *intr; + /** Inbound notifications */ + struct hv_monitor *monitor_in; + /** Outbound notifications */ + struct hv_monitor *monitor_out; + /** Received message buffer */ + const union vmbus_message *message; +}; + +struct vmbus_device; + +/** VMBus channel operations */ +struct vmbus_channel_operations { + /** + * Handle received control packet + * + * @v vmdev VMBus device + * @v xid Transaction ID + * @v data Data + * @v len Length of data + * @ret rc Return status code + */ + int ( * recv_control ) ( struct vmbus_device *vmdev, uint64_t xid, + const void *data, size_t len ); + /** + * Handle received data packet + * + * @v vmdev VMBus device + * @v xid Transaction ID + * @v data Data + * @v len Length of data + * @v iobuf I/O buffer, or NULL if allocation failed + * @ret rc Return status code + * + * This function takes ownership of the I/O buffer. It should + * eventually call vmbus_send_completion() to indicate to the + * host that the buffer can be reused. + * + * Note that this function will be called even if we failed to + * allocate or populate the I/O buffer; this is to allow for a + * completion to be sent even in the event of a transient + * memory shortage failure. + */ + int ( * recv_data ) ( struct vmbus_device *vmdev, uint64_t xid, + const void *data, size_t len, + struct io_buffer *iobuf ); + /** + * Handle received completion packet + * + * @v vmdev VMBus device + * @v xid Transaction ID + * @v data Data + * @v len Length of data + * @ret rc Return status code + */ + int ( * recv_completion ) ( struct vmbus_device *vmdev, uint64_t xid, + const void *data, size_t len ); + /** + * Handle received cancellation packet + * + * @v vmdev VMBus device + * @v xid Transaction ID + * @ret rc Return status code + */ + int ( * recv_cancellation ) ( struct vmbus_device *vmdev, + uint64_t xid ); +}; + +struct vmbus_xfer_pages; + +/** VMBus transfer page set operations */ +struct vmbus_xfer_pages_operations { + /** + * Copy data from transfer page + * + * @v pages Transfer page set + * @v data Data buffer + * @v offset Offset within page set + * @v len Length within page set + * @ret rc Return status code + */ + int ( * copy ) ( struct vmbus_xfer_pages *pages, void *data, + size_t offset, size_t len ); +}; + +/** VMBus transfer page set */ +struct vmbus_xfer_pages { + /** List of all transfer page sets */ + struct list_head list; + /** Page set ID (in protocol byte order) */ + uint16_t pageset; + /** Page set operations */ + struct vmbus_xfer_pages_operations *op; +}; + +/** A VMBus device */ +struct vmbus_device { + /** Generic iPXE device */ + struct device dev; + /** Hyper-V hypervisor */ + struct hv_hypervisor *hv; + + /** Channel ID */ + unsigned int channel; + /** Monitor ID */ + unsigned int monitor; + /** Signal channel + * + * @v vmdev VMBus device + */ + void ( * signal ) ( struct vmbus_device *vmdev ); + + /** Outbound ring buffer length */ + uint32_t out_len; + /** Inbound ring buffer length */ + uint32_t in_len; + /** Outbound ring buffer */ + struct vmbus_ring *out; + /** Inbound ring buffer */ + struct vmbus_ring *in; + /** Ring buffer GPADL ID */ + unsigned int gpadl; + + /** Channel operations */ + struct vmbus_channel_operations *op; + /** Maximum expected data packet length */ + size_t mtu; + /** Packet buffer */ + void *packet; + /** List of transfer page sets */ + struct list_head pages; + + /** Driver */ + struct vmbus_driver *driver; + /** Driver-private data */ + void *priv; +}; + +/** A VMBus device driver */ +struct vmbus_driver { + /** Name */ + const char *name; + /** Device type */ + union uuid type; + /** Probe device + * + * @v vmdev VMBus device + * @ret rc Return status code + */ + int ( * probe ) ( struct vmbus_device *vmdev ); + /** Remove device + * + * @v vmdev VMBus device + */ + void ( * remove ) ( struct vmbus_device *vmdev ); +}; + +/** VMBus device driver table */ +#define VMBUS_DRIVERS __table ( struct vmbus_driver, "vmbus_drivers" ) + +/** Declare a VMBus device driver */ +#define __vmbus_driver __table_entry ( VMBUS_DRIVERS, 01 ) + +/** + * Set VMBus device driver-private data + * + * @v vmdev VMBus device + * @v priv Private data + */ +static inline void vmbus_set_drvdata ( struct vmbus_device *vmdev, void *priv ){ + vmdev->priv = priv; +} + +/** + * Get VMBus device driver-private data + * + * @v vmdev VMBus device + * @ret priv Private data + */ +static inline void * vmbus_get_drvdata ( struct vmbus_device *vmdev ) { + return vmdev->priv; +} + +/** Construct VMBus type */ +#define VMBUS_TYPE( a, b, c, d, e0, e1, e2, e3, e4, e5 ) { \ + .canonical = { \ + cpu_to_le32 ( a ), cpu_to_le16 ( b ), \ + cpu_to_le16 ( c ), cpu_to_be16 ( d ), \ + { e0, e1, e2, e3, e4, e5 } \ + } } + +/** + * Check if data is present in ring buffer + * + * @v vmdev VMBus device + * @v has_data Data is present + */ +static inline __attribute__ (( always_inline )) int +vmbus_has_data ( struct vmbus_device *vmdev ) { + + return ( vmdev->in->prod != vmdev->in->cons ); +} + +/** + * Register transfer page set + * + * @v vmdev VMBus device + * @v pages Transfer page set + * @ret rc Return status code + */ +static inline __attribute__ (( always_inline )) int +vmbus_register_pages ( struct vmbus_device *vmdev, + struct vmbus_xfer_pages *pages ) { + + list_add ( &pages->list, &vmdev->pages ); + return 0; +} + +/** + * Unregister transfer page set + * + * @v vmdev VMBus device + * @v pages Transfer page set + */ +static inline __attribute__ (( always_inline )) void +vmbus_unregister_pages ( struct vmbus_device *vmdev, + struct vmbus_xfer_pages *pages ) { + + list_check_contains_entry ( pages, &vmdev->pages, list ); + list_del ( &pages->list ); +} + +extern int vmbus_establish_gpadl ( struct vmbus_device *vmdev, userptr_t data, + size_t len ); +extern int vmbus_gpadl_teardown ( struct vmbus_device *vmdev, + unsigned int gpadl ); +extern int vmbus_open ( struct vmbus_device *vmdev, + struct vmbus_channel_operations *op, + size_t out_len, size_t in_len, size_t mtu ); +extern void vmbus_close ( struct vmbus_device *vmdev ); +extern int vmbus_send_control ( struct vmbus_device *vmdev, uint64_t xid, + const void *data, size_t len ); +extern int vmbus_send_data ( struct vmbus_device *vmdev, uint64_t xid, + const void *data, size_t len, + struct io_buffer *iobuf ); +extern int vmbus_send_completion ( struct vmbus_device *vmdev, uint64_t xid, + const void *data, size_t len ); +extern int vmbus_send_cancellation ( struct vmbus_device *vmdev, uint64_t xid ); +extern int vmbus_poll ( struct vmbus_device *vmdev ); +extern void vmbus_dump_channel ( struct vmbus_device *vmdev ); + +extern int vmbus_probe ( struct hv_hypervisor *hv, struct device *parent ); +extern void vmbus_remove ( struct hv_hypervisor *hv, struct device *parent ); + +#endif /* _IPXE_VMBUS_H */ diff --git a/src/interface/hyperv/vmbus.c b/src/interface/hyperv/vmbus.c new file mode 100644 index 00000000..1326ac9e --- /dev/null +++ b/src/interface/hyperv/vmbus.c @@ -0,0 +1,1261 @@ +/* + * Copyright (C) 2014 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** @file + * + * Hyper-V virtual machine bus + * + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <assert.h> +#include <byteswap.h> +#include <ipxe/nap.h> +#include <ipxe/malloc.h> +#include <ipxe/iobuf.h> +#include <ipxe/hyperv.h> +#include <ipxe/vmbus.h> + +/** Chosen VMBus protocol version + * + * This is a policy decision. We use the oldest common version in + * order to avoid including any version-specific code. + */ +#define VMBUS_VERSION VMBUS_VERSION_WS2008 + +/** VMBus initial GPADL ID + * + * This is an opaque value with no meaning. The Linux kernel uses + * 0xe1e10. + */ +#define VMBUS_GPADL_MAGIC 0x18ae0000 + +/** + * Post message + * + * @v hv Hyper-V hypervisor + * @v header Message header + * @v len Length of message (including header) + * @ret rc Return status code + */ +static int vmbus_post_message ( struct hv_hypervisor *hv, + const struct vmbus_message_header *header, + size_t len ) { + struct vmbus *vmbus = hv->vmbus; + int rc; + + /* Post message */ + if ( ( rc = hv_post_message ( hv, VMBUS_MESSAGE_ID, VMBUS_MESSAGE_TYPE, + header, len ) ) != 0 ) { + DBGC ( vmbus, "VMBUS %p could not post message: %s\n", + vmbus, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Post empty message + * + * @v hv Hyper-V hypervisor + * @v type Message type + * @ret rc Return status code + */ +static int vmbus_post_empty_message ( struct hv_hypervisor *hv, + unsigned int type ) { + struct vmbus_message_header header = { .type = cpu_to_le32 ( type ) }; + + return vmbus_post_message ( hv, &header, sizeof ( header ) ); +} + +/** + * Wait for received message + * + * @v hv Hyper-V hypervisor + * @ret rc Return status code + */ +static int vmbus_wait_for_message ( struct hv_hypervisor *hv ) { + struct vmbus *vmbus = hv->vmbus; + int rc; + + /* Wait for message */ + if ( ( rc = hv_wait_for_message ( hv, VMBUS_MESSAGE_SINT ) ) != 0 ) { + DBGC ( vmbus, "VMBUS %p failed waiting for message: %s\n", + vmbus, strerror ( rc ) ); + return rc; + } + + /* Sanity check */ + if ( hv->message->received.type != cpu_to_le32 ( VMBUS_MESSAGE_TYPE ) ){ + DBGC ( vmbus, "VMBUS %p invalid message type %d\n", + vmbus, le32_to_cpu ( hv->message->received.type ) ); + return -EINVAL; + } + + return 0; +} + +/** + * Initiate contact + * + * @v hv Hyper-V hypervisor + * @ret rc Return status code + */ +static int vmbus_initiate_contact ( struct hv_hypervisor *hv ) { + struct vmbus *vmbus = hv->vmbus; + const struct vmbus_version_response *version = &vmbus->message->version; + struct vmbus_initiate_contact initiate; + int rc; + + /* Construct message */ + memset ( &initiate, 0, sizeof ( initiate ) ); + initiate.header.type = cpu_to_le32 ( VMBUS_INITIATE_CONTACT ); + initiate.version.raw = cpu_to_le32 ( VMBUS_VERSION ); + initiate.intr = virt_to_phys ( vmbus->intr ); + initiate.monitor_in = virt_to_phys ( vmbus->monitor_in ); + initiate.monitor_out = virt_to_phys ( vmbus->monitor_out ); + + /* Post message */ + if ( ( rc = vmbus_post_message ( hv, &initiate.header, + sizeof ( initiate ) ) ) != 0 ) + return rc; + + /* Wait for response */ + if ( ( rc = vmbus_wait_for_message ( hv ) ) != 0 ) + return rc; + + /* Check response */ + if ( version->header.type != cpu_to_le32 ( VMBUS_VERSION_RESPONSE ) ) { + DBGC ( vmbus, "VMBUS %p unexpected version response type %d\n", + vmbus, le32_to_cpu ( version->header.type ) ); + return -EPROTO; + } + if ( ! version->supported ) { + DBGC ( vmbus, "VMBUS %p requested version not supported\n", + vmbus ); + return -ENOTSUP; + } + if ( version->version.raw != cpu_to_le32 ( VMBUS_VERSION ) ) { + DBGC ( vmbus, "VMBUS %p unexpected version %d.%d\n", + vmbus, le16_to_cpu ( version->version.major ), + le16_to_cpu ( version->version.minor ) ); + return -EPROTO; + } + + DBGC ( vmbus, "VMBUS %p initiated contact using version %d.%d\n", + vmbus, le16_to_cpu ( version->version.major ), + le16_to_cpu ( version->version.minor ) ); + return 0; +} + +/** + * Terminate contact + * + * @v hv Hyper-V hypervisor + * @ret rc Return status code + */ +static int vmbus_unload ( struct hv_hypervisor *hv ) { + + return vmbus_post_empty_message ( hv, VMBUS_UNLOAD ); +} + +/** + * Establish GPA descriptor list + * + * @v vmdev VMBus device + * @v data Data buffer + * @v len Length of data buffer + * @ret gpadl GPADL ID, or negative error + */ +int vmbus_establish_gpadl ( struct vmbus_device *vmdev, userptr_t data, + size_t len ) { + struct hv_hypervisor *hv = vmdev->hv; + struct vmbus *vmbus = hv->vmbus; + physaddr_t addr = user_to_phys ( data, 0 ); + unsigned int pfn_count = hv_pfn_count ( addr, len ); + struct { + struct vmbus_gpadl_header gpadlhdr; + struct vmbus_gpa_range range; + uint64_t pfn[pfn_count]; + } __attribute__ (( packed )) gpadlhdr; + const struct vmbus_gpadl_created *created = &vmbus->message->created; + static unsigned int gpadl = VMBUS_GPADL_MAGIC; + unsigned int i; + int rc; + + /* Allocate GPADL ID */ + gpadl++; + + /* Construct message */ + memset ( &gpadlhdr, 0, sizeof ( gpadlhdr ) ); + gpadlhdr.gpadlhdr.header.type = cpu_to_le32 ( VMBUS_GPADL_HEADER ); + gpadlhdr.gpadlhdr.channel = cpu_to_le32 ( vmdev->channel ); + gpadlhdr.gpadlhdr.gpadl = cpu_to_le32 ( gpadl ); + gpadlhdr.gpadlhdr.range_len = + cpu_to_le16 ( ( sizeof ( gpadlhdr.range ) + + sizeof ( gpadlhdr.pfn ) ) ); + gpadlhdr.gpadlhdr.range_count = cpu_to_le16 ( 1 ); + gpadlhdr.range.len = cpu_to_le32 ( len ); + gpadlhdr.range.offset = cpu_to_le32 ( addr & ( PAGE_SIZE - 1 ) ); + for ( i = 0 ; i < pfn_count ; i++ ) + gpadlhdr.pfn[i] = ( ( addr / PAGE_SIZE ) + i ); + + /* Post message */ + if ( ( rc = vmbus_post_message ( hv, &gpadlhdr.gpadlhdr.header, + sizeof ( gpadlhdr ) ) ) != 0 ) + return rc; + + /* Wait for response */ + if ( ( rc = vmbus_wait_for_message ( hv ) ) != 0 ) + return rc; + + /* Check response */ + if ( created->header.type != cpu_to_le32 ( VMBUS_GPADL_CREATED ) ) { + DBGC ( vmdev, "VMBUS %s unexpected GPADL response type %d\n", + vmdev->dev.name, le32_to_cpu ( created->header.type ) ); + return -EPROTO; + } + if ( created->channel != cpu_to_le32 ( vmdev->channel ) ) { + DBGC ( vmdev, "VMBUS %s unexpected GPADL channel %d\n", + vmdev->dev.name, le32_to_cpu ( created->channel ) ); + return -EPROTO; + } + if ( created->gpadl != cpu_to_le32 ( gpadl ) ) { + DBGC ( vmdev, "VMBUS %s unexpected GPADL ID %#08x\n", + vmdev->dev.name, le32_to_cpu ( created->gpadl ) ); + return -EPROTO; + } + if ( created->status != 0 ) { + DBGC ( vmdev, "VMBUS %s GPADL creation failed: %#08x\n", + vmdev->dev.name, le32_to_cpu ( created->status ) ); + return -EPROTO; + } + + DBGC ( vmdev, "VMBUS %s GPADL %#08x is [%08lx,%08lx)\n", + vmdev->dev.name, gpadl, addr, ( addr + len ) ); + return gpadl; +} + +/** + * Tear down GPA descriptor list + * + * @v vmdev VMBus device + * @v gpadl GPADL ID + * @ret rc Return status code + */ +int vmbus_gpadl_teardown ( struct vmbus_device *vmdev, unsigned int gpadl ) { + struct hv_hypervisor *hv = vmdev->hv; + struct vmbus *vmbus = hv->vmbus; + struct vmbus_gpadl_teardown teardown; + const struct vmbus_gpadl_torndown *torndown = &vmbus->message->torndown; + int rc; + + /* Construct message */ + memset ( &teardown, 0, sizeof ( teardown ) ); + teardown.header.type = cpu_to_le32 ( VMBUS_GPADL_TEARDOWN ); + teardown.channel = cpu_to_le32 ( vmdev->channel ); + teardown.gpadl = cpu_to_le32 ( gpadl ); + + /* Post message */ + if ( ( rc = vmbus_post_message ( hv, &teardown.header, + sizeof ( teardown ) ) ) != 0 ) + return rc; + + /* Wait for response */ + if ( ( rc = vmbus_wait_for_message ( hv ) ) != 0 ) + return rc; + + /* Check response */ + if ( torndown->header.type != cpu_to_le32 ( VMBUS_GPADL_TORNDOWN ) ) { + DBGC ( vmdev, "VMBUS %s unexpected GPADL response type %d\n", + vmdev->dev.name, le32_to_cpu ( torndown->header.type ) ); + return -EPROTO; + } + if ( torndown->gpadl != cpu_to_le32 ( gpadl ) ) { + DBGC ( vmdev, "VMBUS %s unexpected GPADL ID %#08x\n", + vmdev->dev.name, le32_to_cpu ( torndown->gpadl ) ); + return -EPROTO; + } + + return 0; +} + +/** + * Open VMBus channel + * + * @v vmdev VMBus device + * @v op Channel operations + * @v out_len Outbound ring buffer length + * @v in_len Inbound ring buffer length + * @v mtu Maximum expected data packet length (including headers) + * @ret rc Return status code + * + * Both outbound and inbound ring buffer lengths must be a power of + * two and a multiple of PAGE_SIZE. The requirement to be a power of + * two is a policy decision taken to simplify the ring buffer indexing + * logic. + */ +int vmbus_open ( struct vmbus_device *vmdev, + struct vmbus_channel_operations *op, + size_t out_len, size_t in_len, size_t mtu ) { + struct hv_hypervisor *hv = vmdev->hv; + struct vmbus *vmbus = hv->vmbus; + struct vmbus_open_channel open; + const struct vmbus_open_channel_result *opened = + &vmbus->message->opened; + size_t len; + void *ring; + void *packet; + int gpadl; + uint32_t open_id; + int rc; + + /* Sanity checks */ + assert ( ( out_len % PAGE_SIZE ) == 0 ); + assert ( ( out_len & ( out_len - 1 ) ) == 0 ); + assert ( ( in_len % PAGE_SIZE ) == 0 ); + assert ( ( in_len & ( in_len - 1 ) ) == 0 ); + assert ( mtu >= ( sizeof ( struct vmbus_packet_header ) + + sizeof ( struct vmbus_packet_footer ) ) ); + + /* Allocate packet buffer */ + packet = malloc ( mtu ); + if ( ! packet ) { + rc = -ENOMEM; + goto err_alloc_packet; + } + + /* Allocate ring buffer */ + len = ( sizeof ( *vmdev->out ) + out_len + + sizeof ( *vmdev->in ) + in_len ); + assert ( ( len % PAGE_SIZE ) == 0 ); + ring = malloc_dma ( len, PAGE_SIZE ); + if ( ! ring ) { + rc = -ENOMEM; + goto err_alloc_ring; + } + memset ( ring, 0, len ); + + /* Establish GPADL for ring buffer */ + gpadl = vmbus_establish_gpadl ( vmdev, virt_to_user ( ring ), len ); + if ( gpadl < 0 ) { + rc = gpadl; + goto err_establish; + } + + /* Construct message */ + memset ( &open, 0, sizeof ( open ) ); + open.header.type = cpu_to_le32 ( VMBUS_OPEN_CHANNEL ); + open.channel = cpu_to_le32 ( vmdev->channel ); + open_id = random(); + open.id = open_id; /* Opaque random value: endianness irrelevant */ + open.gpadl = cpu_to_le32 ( gpadl ); + open.out_pages = ( ( sizeof ( *vmdev->out ) / PAGE_SIZE ) + + ( out_len / PAGE_SIZE ) ); + + /* Post message */ + if ( ( rc = vmbus_post_message ( hv, &open.header, + sizeof ( open ) ) ) != 0 ) + return rc; + + /* Wait for response */ + if ( ( rc = vmbus_wait_for_message ( hv ) ) != 0 ) + return rc; + + /* Check response */ + if ( opened->header.type != cpu_to_le32 ( VMBUS_OPEN_CHANNEL_RESULT ) ){ + DBGC ( vmdev, "VMBUS %s unexpected open response type %d\n", + vmdev->dev.name, le32_to_cpu ( opened->header.type ) ); + return -EPROTO; + } + if ( opened->channel != cpu_to_le32 ( vmdev->channel ) ) { + DBGC ( vmdev, "VMBUS %s unexpected opened channel %#08x\n", + vmdev->dev.name, le32_to_cpu ( opened->channel ) ); + return -EPROTO; + } + if ( opened->id != open_id /* Non-endian */ ) { + DBGC ( vmdev, "VMBUS %s unexpected open ID %#08x\n", + vmdev->dev.name, le32_to_cpu ( opened->id ) ); + return -EPROTO; + } + if ( opened->status != 0 ) { + DBGC ( vmdev, "VMBUS %s open failed: %#08x\n", + vmdev->dev.name, le32_to_cpu ( opened->status ) ); + return -EPROTO; + } + + /* Store channel parameters */ + vmdev->out_len = out_len; + vmdev->in_len = in_len; + vmdev->out = ring; + vmdev->in = ( ring + sizeof ( *vmdev->out ) + out_len ); + vmdev->gpadl = gpadl; + vmdev->op = op; + vmdev->mtu = mtu; + vmdev->packet = packet; + + DBGC ( vmdev, "VMBUS %s channel GPADL %#08x ring " + "[%#08lx,%#08lx,%#08lx)\n", vmdev->dev.name, vmdev->gpadl, + virt_to_phys ( vmdev->out ), virt_to_phys ( vmdev->in ), + ( virt_to_phys ( vmdev->out ) + len ) ); + return 0; + + vmbus_gpadl_teardown ( vmdev, vmdev->gpadl ); + err_establish: + free_dma ( ring, len ); + err_alloc_ring: + free ( packet ); + err_alloc_packet: + return rc; +} + +/** + * Close VMBus channel + * + * @v vmdev VMBus device + */ +void vmbus_close ( struct vmbus_device *vmdev ) { + struct hv_hypervisor *hv = vmdev->hv; + struct vmbus_close_channel close; + size_t len; + int rc; + + /* Construct message */ + memset ( &close, 0, sizeof ( close ) ); + close.header.type = cpu_to_le32 ( VMBUS_CLOSE_CHANNEL ); + close.channel = cpu_to_le32 ( vmdev->channel ); + + /* Post message */ + if ( ( rc = vmbus_post_message ( hv, &close.header, + sizeof ( close ) ) ) != 0 ) { + DBGC ( vmdev, "VMBUS %s failed to close: %s\n", + vmdev->dev.name, strerror ( rc ) ); + /* Continue to attempt to tear down GPADL, so that our + * memory is no longer accessible by the remote VM. + */ + } + + /* Tear down GPADL */ + if ( ( rc = vmbus_gpadl_teardown ( vmdev, + vmdev->gpadl ) ) != 0 ) { + DBGC ( vmdev, "VMBUS %s failed to tear down channel GPADL: " + "%s\n", vmdev->dev.name, strerror ( rc ) ); + /* We can't prevent the remote VM from continuing to + * access this memory, so leak it. + */ + return; + } + + /* Free ring buffer */ + len = ( sizeof ( *vmdev->out ) + vmdev->out_len + + sizeof ( *vmdev->in ) + vmdev->in_len ); + free_dma ( vmdev->out, len ); + vmdev->out = NULL; + vmdev->in = NULL; + + /* Free packet buffer */ + free ( vmdev->packet ); + vmdev->packet = NULL; + + DBGC ( vmdev, "VMBUS %s closed\n", vmdev->dev.name ); +} + +/** + * Signal channel via monitor page + * + * @v vmdev VMBus device + */ +static void vmbus_signal_monitor ( struct vmbus_device *vmdev ) { + struct hv_hypervisor *hv = vmdev->hv; + struct vmbus *vmbus = hv->vmbus; + struct hv_monitor_trigger *trigger; + unsigned int group; + unsigned int bit; + + /* Set bit in monitor trigger group */ + group = ( vmdev->monitor / ( 8 * sizeof ( trigger->pending ) )); + bit = ( vmdev->monitor % ( 8 * sizeof ( trigger->pending ) ) ); + trigger = &vmbus->monitor_out->trigger[group]; + hv_set_bit ( trigger, bit ); +} + +/** + * Signal channel via hypervisor event + * + * @v vmdev VMBus device + */ +static void vmbus_signal_event ( struct vmbus_device *vmdev ) { + struct hv_hypervisor *hv = vmdev->hv; + int rc; + + /* Signal hypervisor event */ + if ( ( rc = hv_signal_event ( hv, VMBUS_EVENT_ID, 0 ) ) != 0 ) { + DBGC ( vmdev, "VMBUS %s could not signal event: %s\n", + vmdev->dev.name, strerror ( rc ) ); + return; + } +} + +/** + * Fill outbound ring buffer + * + * @v vmdev VMBus device + * @v prod Producer index + * @v data Data + * @v len Length + * @ret prod New producer index + * + * The caller must ensure that there is sufficient space in the ring + * buffer. + */ +static size_t vmbus_produce ( struct vmbus_device *vmdev, size_t prod, + const void *data, size_t len ) { + size_t first; + size_t second; + + /* Determine fragment lengths */ + first = ( vmdev->out_len - prod ); + if ( first > len ) + first = len; + second = ( len - first ); + + /* Copy fragment(s) */ + memcpy ( &vmdev->out->data[prod], data, first ); + if ( second ) + memcpy ( &vmdev->out->data[0], ( data + first ), second ); + + return ( ( prod + len ) & ( vmdev->out_len - 1 ) ); +} + +/** + * Consume inbound ring buffer + * + * @v vmdev VMBus device + * @v cons Consumer index + * @v data Data buffer, or NULL + * @v len Length to consume + * @ret cons New consumer index + */ +static size_t vmbus_consume ( struct vmbus_device *vmdev, size_t cons, + void *data, size_t len ) { + size_t first; + size_t second; + + /* Determine fragment lengths */ + first = ( vmdev->in_len - cons ); + if ( first > len ) + first = len; + second = ( len - first ); + + /* Copy fragment(s) */ + memcpy ( data, &vmdev->in->data[cons], first ); + if ( second ) + memcpy ( ( data + first ), &vmdev->in->data[0], second ); + + return ( ( cons + len ) & ( vmdev->in_len - 1 ) ); +} + +/** + * Send packet via ring buffer + * + * @v vmdev VMBus device + * @v header Packet header + * @v data Data + * @v len Length of data + * @ret rc Return status code + * + * Send a packet via the outbound ring buffer. All fields in the + * packet header must be filled in, with the exception of the total + * packet length. + */ +static int vmbus_send ( struct vmbus_device *vmdev, + struct vmbus_packet_header *header, + const void *data, size_t len ) { + struct hv_hypervisor *hv = vmdev->hv; + struct vmbus *vmbus = hv->vmbus; + static uint8_t padding[ 8 - 1 ]; + struct vmbus_packet_footer footer; + size_t header_len; + size_t pad_len; + size_t footer_len; + size_t ring_len; + size_t cons; + size_t prod; + size_t old_prod; + size_t fill; + + /* Sanity check */ + assert ( vmdev->out != NULL ); + + /* Calculate lengths */ + header_len = ( le16_to_cpu ( header->hdr_qlen ) * 8 ); + pad_len = ( ( -len ) & ( 8 - 1 ) ); + footer_len = sizeof ( footer ); + ring_len = ( header_len + len + pad_len + footer_len ); + + /* Check that we have enough room in the outbound ring buffer */ + cons = le32_to_cpu ( vmdev->out->cons ); + prod = le32_to_cpu ( vmdev->out->prod ); + old_prod = prod; + fill = ( ( prod - cons ) & ( vmdev->out_len - 1 ) ); + if ( ( fill + ring_len ) >= vmdev->out_len ) { + DBGC ( vmdev, "VMBUS %s ring buffer full\n", vmdev->dev.name ); + return -ENOBUFS; + } + + /* Complete header */ + header->qlen = cpu_to_le16 ( ( ring_len - footer_len ) / 8 ); + + /* Construct footer */ + footer.reserved = 0; + footer.prod = vmdev->out->prod; + + /* Copy packet to buffer */ + DBGC2 ( vmdev, "VMBUS %s sending:\n", vmdev->dev.name ); + DBGC2_HDA ( vmdev, prod, header, header_len ); + prod = vmbus_produce ( vmdev, prod, header, header_len ); + DBGC2_HDA ( vmdev, prod, data, len ); + prod = vmbus_produce ( vmdev, prod, data, len ); + prod = vmbus_produce ( vmdev, prod, padding, pad_len ); + DBGC2_HDA ( vmdev, prod, &footer, sizeof ( footer ) ); + prod = vmbus_produce ( vmdev, prod, &footer, sizeof ( footer ) ); + assert ( ( ( prod - old_prod ) & ( vmdev->out_len - 1 ) ) == ring_len ); + + /* Update producer index */ + wmb(); + vmdev->out->prod = cpu_to_le32 ( prod ); + + /* Return if we do not need to signal the host. This follows + * the logic of hv_need_to_signal() in the Linux driver. + */ + mb(); + if ( vmdev->out->intr_mask ) + return 0; + rmb(); + cons = le32_to_cpu ( vmdev->out->cons ); + if ( cons != old_prod ) + return 0; + + /* Set channel bit in interrupt page */ + hv_set_bit ( vmbus->intr->out, vmdev->channel ); + + /* Signal the host */ + vmdev->signal ( vmdev ); + + return 0; +} + +/** + * Send control packet via ring buffer + * + * @v vmdev VMBus device + * @v xid Transaction ID (or zero to not request completion) + * @v data Data + * @v len Length of data + * @ret rc Return status code + * + * Send data using a VMBUS_DATA_INBAND packet. + */ +int vmbus_send_control ( struct vmbus_device *vmdev, uint64_t xid, + const void *data, size_t len ) { + struct vmbus_packet_header *header = vmdev->packet; + + /* Construct header in packet buffer */ + assert ( header != NULL ); + header->type = cpu_to_le16 ( VMBUS_DATA_INBAND ); + header->hdr_qlen = cpu_to_le16 ( sizeof ( *header ) / 8 ); + header->flags = ( xid ? + cpu_to_le16 ( VMBUS_COMPLETION_REQUESTED ) : 0 ); + header->xid = xid; /* Non-endian */ + + return vmbus_send ( vmdev, header, data, len ); +} + +/** + * Send data packet via ring buffer + * + * @v vmdev VMBus device + * @v xid Transaction ID + * @v data Data + * @v len Length of data + * @v iobuf I/O buffer + * @ret rc Return status code + * + * Send data using a VMBUS_DATA_GPA_DIRECT packet. The caller is + * responsible for ensuring that the I/O buffer remains untouched + * until the corresponding completion has been received. + */ +int vmbus_send_data ( struct vmbus_device *vmdev, uint64_t xid, + const void *data, size_t len, struct io_buffer *iobuf ) { + physaddr_t addr = virt_to_phys ( iobuf->data ); + unsigned int pfn_count = hv_pfn_count ( addr, iob_len ( iobuf ) ); + struct { + struct vmbus_gpa_direct_header gpa; + struct vmbus_gpa_range range; + uint64_t pfn[pfn_count]; + } __attribute__ (( packed )) *header = vmdev->packet; + unsigned int i; + + /* Sanity check */ + assert ( header != NULL ); + assert ( sizeof ( *header ) <= vmdev->mtu ); + + /* Construct header in packet buffer */ + header->gpa.header.type = cpu_to_le16 ( VMBUS_DATA_GPA_DIRECT ); + header->gpa.header.hdr_qlen = cpu_to_le16 ( sizeof ( *header ) / 8 ); + header->gpa.header.flags = cpu_to_le16 ( VMBUS_COMPLETION_REQUESTED ); + header->gpa.header.xid = xid; /* Non-endian */ + header->gpa.range_count = 1; + header->range.len = cpu_to_le32 ( iob_len ( iobuf ) ); + header->range.offset = cpu_to_le32 ( addr & ( PAGE_SIZE - 1 ) ); + for ( i = 0 ; i < pfn_count ; i++ ) + header->pfn[i] = ( ( addr / PAGE_SIZE ) + i ); + + return vmbus_send ( vmdev, &header->gpa.header, data, len ); +} + +/** + * Send completion packet via ring buffer + * + * @v vmdev VMBus device + * @v xid Transaction ID + * @v data Data + * @v len Length of data + * @ret rc Return status code + * + * Send data using a VMBUS_COMPLETION packet. + */ +int vmbus_send_completion ( struct vmbus_device *vmdev, uint64_t xid, + const void *data, size_t len ) { + struct vmbus_packet_header *header = vmdev->packet; + + /* Construct header in packet buffer */ + assert ( header != NULL ); + header->type = cpu_to_le16 ( VMBUS_COMPLETION ); + header->hdr_qlen = cpu_to_le16 ( sizeof ( *header ) / 8 ); + header->flags = 0; + header->xid = xid; /* Non-endian */ + + return vmbus_send ( vmdev, header, data, len ); +} + +/** + * Send cancellation packet via ring buffer + * + * @v vmdev VMBus device + * @v xid Transaction ID + * @ret rc Return status code + * + * Send data using a VMBUS_CANCELLATION packet. + */ +int vmbus_send_cancellation ( struct vmbus_device *vmdev, uint64_t xid ) { + struct vmbus_packet_header *header = vmdev->packet; + + /* Construct header in packet buffer */ + assert ( header != NULL ); + header->type = cpu_to_le16 ( VMBUS_CANCELLATION ); + header->hdr_qlen = cpu_to_le16 ( sizeof ( *header ) / 8 ); + header->flags = 0; + header->xid = xid; /* Non-endian */ + + return vmbus_send ( vmdev, header, NULL, 0 ); +} + +/** + * Get transfer page set from pageset ID + * + * @v vmdev VMBus device + * @v pageset Page set ID (in protocol byte order) + * @ret pages Page set, or NULL if not found + */ +static struct vmbus_xfer_pages * vmbus_xfer_pages ( struct vmbus_device *vmdev, + uint16_t pageset ) { + struct vmbus_xfer_pages *pages; + + /* Locate page set */ + list_for_each_entry ( pages, &vmdev->pages, list ) { + if ( pages->pageset == pageset ) + return pages; + } + + DBGC ( vmdev, "VMBUS %s unrecognised page set ID %#04x\n", + vmdev->dev.name, le16_to_cpu ( pageset ) ); + return NULL; +} + +/** + * Construct I/O buffer from transfer pages + * + * @v vmdev VMBus device + * @v header Transfer page header + * @ret iobuf I/O buffer, or NULL on error + */ +static struct io_buffer * +vmbus_xfer_page_iobuf ( struct vmbus_device *vmdev, + struct vmbus_packet_header *header ) { + struct vmbus_xfer_page_header *page_header = + container_of ( header, struct vmbus_xfer_page_header, header ); + struct vmbus_xfer_pages *pages; + struct io_buffer *iobuf; + size_t total_len; + size_t len; + size_t offset; + unsigned int range_count; + unsigned int i; + int rc; + + /* Sanity check */ + assert ( header->type == cpu_to_le16 ( VMBUS_DATA_XFER_PAGES ) ); + + /* Locate page set */ + pages = vmbus_xfer_pages ( vmdev, page_header->pageset ); + if ( ! pages ) + goto err_pages; + + /* Determine total length */ + range_count = le32_to_cpu ( page_header->range_count ); + for ( total_len = 0, i = 0 ; i < range_count ; i++ ) { + len = le32_to_cpu ( page_header->range[i].len ); + total_len += len; + } + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( total_len ); + if ( ! iobuf ) { + DBGC ( vmdev, "VMBUS %s could not allocate %zd-byte I/O " + "buffer\n", vmdev->dev.name, total_len ); + goto err_alloc; + } + + /* Populate I/O buffer */ + for ( i = 0 ; i < range_count ; i++ ) { + len = le32_to_cpu ( page_header->range[i].len ); + offset = le32_to_cpu ( page_header->range[i].offset ); + if ( ( rc = pages->op->copy ( pages, iob_put ( iobuf, len ), + offset, len ) ) != 0 ) { + DBGC ( vmdev, "VMBUS %s could not populate I/O buffer " + "range [%zd,%zd): %s\n", + vmdev->dev.name, offset, len, strerror ( rc ) ); + goto err_copy; + } + } + + return iobuf; + + err_copy: + free_iob ( iobuf ); + err_alloc: + err_pages: + return NULL; +} + +/** + * Poll ring buffer + * + * @v vmdev VMBus device + * @ret rc Return status code + */ +int vmbus_poll ( struct vmbus_device *vmdev ) { + struct vmbus_packet_header *header = vmdev->packet; + struct io_buffer *iobuf; + void *data; + size_t header_len; + size_t len; + size_t footer_len; + size_t ring_len; + size_t cons; + size_t old_cons; + uint64_t xid; + int rc; + + /* Sanity checks */ + assert ( vmdev->packet != NULL ); + assert ( vmdev->in != NULL ); + + /* Return immediately if buffer is empty */ + if ( ! vmbus_has_data ( vmdev ) ) + return 0; + cons = le32_to_cpu ( vmdev->in->cons ); + old_cons = cons; + + /* Consume (start of) header */ + cons = vmbus_consume ( vmdev, cons, header, sizeof ( *header ) ); + + /* Parse and sanity check header */ + header_len = ( le16_to_cpu ( header->hdr_qlen ) * 8 ); + if ( header_len < sizeof ( *header ) ) { + DBGC ( vmdev, "VMBUS %s received underlength header (%zd " + "bytes)\n", vmdev->dev.name, header_len ); + return -EINVAL; + } + len = ( ( le16_to_cpu ( header->qlen ) * 8 ) - header_len ); + footer_len = sizeof ( struct vmbus_packet_footer ); + ring_len = ( header_len + len + footer_len ); + if ( ring_len > vmdev->mtu ) { + DBGC ( vmdev, "VMBUS %s received overlength packet (%zd " + "bytes)\n", vmdev->dev.name, ring_len ); + return -ERANGE; + } + xid = le64_to_cpu ( header->xid ); + + /* Consume remainder of packet */ + cons = vmbus_consume ( vmdev, cons, + ( ( ( void * ) header ) + sizeof ( *header ) ), + ( ring_len - sizeof ( *header ) ) ); + DBGC2 ( vmdev, "VMBUS %s received:\n", vmdev->dev.name ); + DBGC2_HDA ( vmdev, old_cons, header, ring_len ); + assert ( ( ( cons - old_cons ) & ( vmdev->in_len - 1 ) ) == ring_len ); + + /* Update producer index */ + rmb(); + vmdev->in->cons = cpu_to_le32 ( cons ); + + /* Handle packet */ + data = ( ( ( void * ) header ) + header_len ); + switch ( header->type ) { + + case cpu_to_le16 ( VMBUS_DATA_INBAND ) : + if ( ( rc = vmdev->op->recv_control ( vmdev, xid, data, + len ) ) != 0 ) { + DBGC ( vmdev, "VMBUS %s could not handle control " + "packet: %s\n", + vmdev->dev.name, strerror ( rc ) ); + return rc; + } + break; + + case cpu_to_le16 ( VMBUS_DATA_XFER_PAGES ) : + iobuf = vmbus_xfer_page_iobuf ( vmdev, header ); + /* Call recv_data() even if I/O buffer allocation + * failed, to allow for completions to be sent. + */ + if ( ( rc = vmdev->op->recv_data ( vmdev, xid, data, len, + iob_disown ( iobuf ) ) )!=0){ + DBGC ( vmdev, "VMBUS %s could not handle data packet: " + "%s\n", vmdev->dev.name, strerror ( rc ) ); + return rc; + } + break; + + case cpu_to_le16 ( VMBUS_COMPLETION ) : + if ( ( rc = vmdev->op->recv_completion ( vmdev, xid, data, + len ) ) != 0 ) { + DBGC ( vmdev, "VMBUS %s could not handle completion: " + "%s\n", vmdev->dev.name, strerror ( rc ) ); + return rc; + } + break; + + case cpu_to_le16 ( VMBUS_CANCELLATION ) : + if ( ( rc = vmdev->op->recv_cancellation ( vmdev, xid ) ) != 0){ + DBGC ( vmdev, "VMBUS %s could not handle cancellation: " + "%s\n", vmdev->dev.name, strerror ( rc ) ); + return rc; + } + break; + + default: + DBGC ( vmdev, "VMBUS %s unknown packet type %d\n", + vmdev->dev.name, le16_to_cpu ( header->type ) ); + return -ENOTSUP; + } + + return 0; +} + +/** + * Dump channel status (for debugging) + * + * @v vmdev VMBus device + */ +void vmbus_dump_channel ( struct vmbus_device *vmdev ) { + size_t out_prod = le32_to_cpu ( vmdev->out->prod ); + size_t out_cons = le32_to_cpu ( vmdev->out->cons ); + size_t in_prod = le32_to_cpu ( vmdev->in->prod ); + size_t in_cons = le32_to_cpu ( vmdev->in->cons ); + size_t in_len; + size_t first; + size_t second; + + /* Dump ring status */ + DBGC ( vmdev, "VMBUS %s out %03zx:%03zx%s in %03zx:%03zx%s\n", + vmdev->dev.name, out_prod, out_cons, + ( vmdev->out->intr_mask ? "(m)" : "" ), in_prod, in_cons, + ( vmdev->in->intr_mask ? "(m)" : "" ) ); + + /* Dump inbound ring contents, if any */ + if ( in_prod != in_cons ) { + in_len = ( ( in_prod - in_cons ) & + ( vmdev->in_len - 1 ) ); + first = ( vmdev->in_len - in_cons ); + if ( first > in_len ) + first = in_len; + second = ( in_len - first ); + DBGC_HDA ( vmdev, in_cons, &vmdev->in->data[in_cons], first ); + DBGC_HDA ( vmdev, 0, &vmdev->in->data[0], second ); + } +} + +/** + * Find driver for VMBus device + * + * @v vmdev VMBus device + * @ret driver Driver, or NULL + */ +static struct vmbus_driver * vmbus_find_driver ( const union uuid *type ) { + struct vmbus_driver *vmdrv; + + for_each_table_entry ( vmdrv, VMBUS_DRIVERS ) { + if ( memcmp ( &vmdrv->type, type, sizeof ( *type ) ) == 0 ) + return vmdrv; + } + return NULL; +} + +/** + * Probe channels + * + * @v hv Hyper-V hypervisor + * @v parent Parent device + * @ret rc Return status code + */ +static int vmbus_probe_channels ( struct hv_hypervisor *hv, + struct device *parent ) { + struct vmbus *vmbus = hv->vmbus; + const struct vmbus_message_header *header = &vmbus->message->header; + const struct vmbus_offer_channel *offer = &vmbus->message->offer; + const union uuid *type; + struct vmbus_driver *driver; + struct vmbus_device *vmdev; + struct vmbus_device *tmp; + unsigned int channel; + int rc; + + /* Post message */ + if ( ( rc = vmbus_post_empty_message ( hv, VMBUS_REQUEST_OFFERS ) ) !=0) + goto err_post_message; + + /* Collect responses */ + while ( 1 ) { + + /* Wait for response */ + if ( ( rc = vmbus_wait_for_message ( hv ) ) != 0 ) + goto err_wait_for_message; + + /* Handle response */ + if ( header->type == cpu_to_le32 ( VMBUS_OFFER_CHANNEL ) ) { + + /* Parse offer */ + type = &offer->type; + channel = le32_to_cpu ( offer->channel ); + DBGC2 ( vmbus, "VMBUS %p offer %d type %s", + vmbus, channel, uuid_ntoa ( type ) ); + if ( offer->monitored ) + DBGC2 ( vmbus, " monitor %d", offer->monitor ); + DBGC2 ( vmbus, "\n" ); + + /* Look for a driver */ + driver = vmbus_find_driver ( type ); + if ( ! driver ) { + DBGC ( vmbus, "VMBUS %p has no driver for " + "type %s\n", vmbus, uuid_ntoa ( type ) ); + /* Not a fatal error */ + continue; + } + + /* Allocate and initialise device */ + vmdev = zalloc ( sizeof ( *vmdev ) ); + if ( ! vmdev ) { + rc = -ENOMEM; + goto err_alloc_vmdev; + } + snprintf ( vmdev->dev.name, sizeof ( vmdev->dev.name ), + "vmbus:%02x", channel ); + vmdev->dev.desc.bus_type = BUS_TYPE_HV; + INIT_LIST_HEAD ( &vmdev->dev.children ); + list_add_tail ( &vmdev->dev.siblings, + &parent->children ); + vmdev->dev.parent = parent; + vmdev->hv = hv; + vmdev->channel = channel; + vmdev->monitor = offer->monitor; + vmdev->signal = ( offer->monitored ? + vmbus_signal_monitor : + vmbus_signal_event ); + INIT_LIST_HEAD ( &vmdev->pages ); + vmdev->driver = driver; + vmdev->dev.driver_name = driver->name; + DBGC ( vmdev, "VMBUS %s has driver \"%s\"\n", + vmdev->dev.name, vmdev->driver->name ); + + } else if ( header->type == + cpu_to_le32 ( VMBUS_ALL_OFFERS_DELIVERED ) ) { + + break; + + } else { + DBGC ( vmbus, "VMBUS %p unexpected offer response type " + "%d\n", vmbus, le32_to_cpu ( header->type ) ); + rc = -EPROTO; + goto err_unexpected_offer; + } + } + + /* Probe all devices. We do this only after completing + * enumeration since devices will need to send and receive + * VMBus messages. + */ + list_for_each_entry ( vmdev, &parent->children, dev.siblings ) { + if ( ( rc = vmdev->driver->probe ( vmdev ) ) != 0 ) { + DBGC ( vmdev, "VMBUS %s could not probe: %s\n", + vmdev->dev.name, strerror ( rc ) ); + goto err_probe; + } + } + + return 0; + + err_probe: + /* Remove driver from each device that was already probed */ + list_for_each_entry_continue_reverse ( vmdev, &parent->children, + dev.siblings ) { + vmdev->driver->remove ( vmdev ); + } + err_unexpected_offer: + err_alloc_vmdev: + err_wait_for_message: + /* Free any devices allocated (but potentially not yet probed) */ + list_for_each_entry_safe ( vmdev, tmp, &parent->children, + dev.siblings ) { + list_del ( &vmdev->dev.siblings ); + free ( vmdev ); + } + err_post_message: + return rc; +} + +/** + * Remove channels + * + * @v hv Hyper-V hypervisor + * @v parent Parent device + */ +static void vmbus_remove_channels ( struct hv_hypervisor *hv __unused, + struct device *parent ) { + struct vmbus_device *vmdev; + struct vmbus_device *tmp; + + /* Remove devices */ + list_for_each_entry_safe ( vmdev, tmp, &parent->children, + dev.siblings ) { + vmdev->driver->remove ( vmdev ); + assert ( list_empty ( &vmdev->dev.children ) ); + assert ( vmdev->out == NULL ); + assert ( vmdev->in == NULL ); + assert ( vmdev->packet == NULL ); + assert ( list_empty ( &vmdev->pages ) ); + list_del ( &vmdev->dev.siblings ); + free ( vmdev ); + } +} + +/** + * Probe Hyper-V virtual machine bus + * + * @v hv Hyper-V hypervisor + * @v parent Parent device + * @ret rc Return status code + */ +int vmbus_probe ( struct hv_hypervisor *hv, struct device *parent ) { + struct vmbus *vmbus; + int rc; + + /* Allocate and initialise structure */ + vmbus = zalloc ( sizeof ( *vmbus ) ); + if ( ! vmbus ) { + rc = -ENOMEM; + goto err_alloc; + } + hv->vmbus = vmbus; + + /* Initialise message buffer pointer + * + * We use a pointer to the fixed-size Hyper-V received message + * buffer. This allows us to access fields within received + * messages without first checking the message size: any + * fields beyond the end of the message will read as zero. + */ + vmbus->message = ( ( void * ) hv->message->received.data ); + assert ( sizeof ( *vmbus->message ) <= + sizeof ( hv->message->received.data ) ); + + /* Allocate interrupt and monitor pages */ + if ( ( rc = hv_alloc_pages ( hv, &vmbus->intr, &vmbus->monitor_in, + &vmbus->monitor_out, NULL ) ) != 0 ) + goto err_alloc_pages; + + /* Enable message interrupt */ + hv_enable_sint ( hv, VMBUS_MESSAGE_SINT ); + + /* Initiate contact */ + if ( ( rc = vmbus_initiate_contact ( hv ) ) != 0 ) + goto err_initiate_contact; + + /* Enumerate channels */ + if ( ( rc = vmbus_probe_channels ( hv, parent ) ) != 0 ) + goto err_probe_channels; + + return 0; + + vmbus_remove_channels ( hv, parent ); + err_probe_channels: + vmbus_unload ( hv ); + err_initiate_contact: + hv_disable_sint ( hv, VMBUS_MESSAGE_SINT ); + hv_free_pages ( hv, vmbus->intr, vmbus->monitor_in, vmbus->monitor_out, + NULL ); + err_alloc_pages: + free ( vmbus ); + err_alloc: + return rc; +} + +/** + * Remove Hyper-V virtual machine bus + * + * @v hv Hyper-V hypervisor + * @v parent Parent device + */ +void vmbus_remove ( struct hv_hypervisor *hv, struct device *parent ) { + struct vmbus *vmbus = hv->vmbus; + + vmbus_remove_channels ( hv, parent ); + vmbus_unload ( hv ); + hv_disable_sint ( hv, VMBUS_MESSAGE_SINT ); + hv_free_pages ( hv, vmbus->intr, vmbus->monitor_in, vmbus->monitor_out, + NULL ); + free ( vmbus ); +} |