summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMichael Brown2014-12-11 18:22:18 +0100
committerMichael Brown2014-12-18 17:27:27 +0100
commitd77a546fb4c69546a81440bdeb0553b8db1d3de8 (patch)
tree8fa8d90c636e3f1302981a4e09804deb0024c5b4 /src
parent[rndis] Add generic RNDIS device abstraction (diff)
downloadipxe-d77a546fb4c69546a81440bdeb0553b8db1d3de8.tar.gz
ipxe-d77a546fb4c69546a81440bdeb0553b8db1d3de8.tar.xz
ipxe-d77a546fb4c69546a81440bdeb0553b8db1d3de8.zip
[hyperv] Add support for Hyper-V hypervisor
Add support for detecting and communicating with the Hyper-V hypervisor. Signed-off-by: Michael Brown <mcb30@ipxe.org>
Diffstat (limited to 'src')
-rw-r--r--src/arch/i386/include/bits/hyperv.h72
-rw-r--r--src/arch/x86/Makefile1
-rw-r--r--src/arch/x86/core/pic8259.c (renamed from src/arch/i386/core/pic8259.c)0
-rw-r--r--src/arch/x86/drivers/hyperv/hyperv.c553
-rw-r--r--src/arch/x86/drivers/hyperv/hyperv.h42
-rw-r--r--src/arch/x86/include/bits/errfile.h1
-rw-r--r--src/arch/x86/include/ipxe/cpuid.h3
-rw-r--r--src/arch/x86/include/pic8259.h (renamed from src/arch/i386/include/pic8259.h)3
-rw-r--r--src/arch/x86_64/include/bits/hyperv.h75
-rw-r--r--src/include/ipxe/hyperv.h230
10 files changed, 977 insertions, 3 deletions
diff --git a/src/arch/i386/include/bits/hyperv.h b/src/arch/i386/include/bits/hyperv.h
new file mode 100644
index 00000000..28387e7a
--- /dev/null
+++ b/src/arch/i386/include/bits/hyperv.h
@@ -0,0 +1,72 @@
+#ifndef _BITS_HYPERV_H
+#define _BITS_HYPERV_H
+
+/** @file
+ *
+ * Hyper-V interface
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stddef.h>
+#include <stdint.h>
+#include <ipxe/io.h>
+
+/**
+ * Issue hypercall
+ *
+ * @v hv Hyper-V hypervisor
+ * @v code Call code
+ * @v in Input parameters
+ * @v out Output parameters
+ * @ret status Status code
+ */
+static inline __attribute__ (( always_inline )) int
+hv_call ( struct hv_hypervisor *hv, unsigned int code, const void *in,
+ void *out ) {
+ void *hypercall = hv->hypercall;
+ uint32_t in_phys;
+ uint32_t out_phys;
+ uint32_t discard_ecx;
+ uint32_t discard_edx;
+ uint16_t result;
+
+ in_phys = ( ( __builtin_constant_p ( in ) && ( in == NULL ) )
+ ? 0 : virt_to_phys ( in ) );
+ out_phys = ( ( __builtin_constant_p ( out ) && ( out == NULL ) )
+ ? 0 : virt_to_phys ( out ) );
+ __asm__ __volatile__ ( "call *%9"
+ : "=a" ( result ), "=c" ( discard_ecx ),
+ "=d" ( discard_edx )
+ : "d" ( 0 ), "a" ( code ),
+ "b" ( 0 ), "c" ( in_phys ),
+ "D" ( 0 ), "S" ( out_phys ),
+ "m" ( hypercall ) );
+ return result;
+}
+
+/**
+ * Set bit atomically
+ *
+ * @v bits Bit field
+ * @v bit Bit to set
+ */
+static inline __attribute__ (( always_inline )) void
+hv_set_bit ( void *bits, unsigned int bit ) {
+ struct {
+ uint32_t dword[ ( bit / 32 ) + 1 ];
+ } *dwords = bits;
+
+ /* Set bit using "lock bts". Inform compiler that any memory
+ * from the start of the bit field up to and including the
+ * dword containing this bit may be modified. (This is
+ * overkill but shouldn't matter in practice since we're
+ * unlikely to subsequently read other bits from the same bit
+ * field.)
+ */
+ __asm__ __volatile__ ( "lock bts %1, %0"
+ : "+m" ( *dwords ) : "Ir" ( bit ) );
+}
+
+#endif /* _BITS_HYPERV_H */
diff --git a/src/arch/x86/Makefile b/src/arch/x86/Makefile
index e555587d..a0fee392 100644
--- a/src/arch/x86/Makefile
+++ b/src/arch/x86/Makefile
@@ -9,6 +9,7 @@ SRCDIRS += arch/x86/interface/efi
SRCDIRS += arch/x86/prefix
SRCDIRS += arch/x86/hci/commands
SRCDIRS += arch/x86/drivers/xen
+SRCDIRS += arch/x86/drivers/hyperv
# breaks building some of the linux-related objects
CFLAGS += -Ulinux
diff --git a/src/arch/i386/core/pic8259.c b/src/arch/x86/core/pic8259.c
index 0a9ea2e0..0a9ea2e0 100644
--- a/src/arch/i386/core/pic8259.c
+++ b/src/arch/x86/core/pic8259.c
diff --git a/src/arch/x86/drivers/hyperv/hyperv.c b/src/arch/x86/drivers/hyperv/hyperv.c
new file mode 100644
index 00000000..bd68d051
--- /dev/null
+++ b/src/arch/x86/drivers/hyperv/hyperv.c
@@ -0,0 +1,553 @@
+/*
+ * Copyright (C) 2014 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/** @file
+ *
+ * Hyper-V driver
+ *
+ */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <pic8259.h>
+#include <ipxe/malloc.h>
+#include <ipxe/device.h>
+#include <ipxe/cpuid.h>
+#include <ipxe/msr.h>
+#include <ipxe/hyperv.h>
+#include "hyperv.h"
+
+/** Maximum time to wait for a message response
+ *
+ * This is a policy decision.
+ */
+#define HV_MESSAGE_MAX_WAIT_MS 1000
+
+/**
+ * Convert a Hyper-V status code to an iPXE status code
+ *
+ * @v status Hyper-V status code
+ * @ret rc iPXE status code (before negation)
+ */
+#define EHV( status ) EPLATFORM ( EINFO_EPLATFORM, (status) )
+
+/**
+ * Allocate zeroed pages
+ *
+ * @v hv Hyper-V hypervisor
+ * @v ... Page addresses to fill in, terminated by NULL
+ * @ret rc Return status code
+ */
+__attribute__ (( sentinel )) int
+hv_alloc_pages ( struct hv_hypervisor *hv, ... ) {
+ va_list args;
+ void **page;
+ int i;
+
+ /* Allocate and zero pages */
+ va_start ( args, hv );
+ for ( i = 0 ; ( ( page = va_arg ( args, void ** ) ) != NULL ); i++ ) {
+ *page = malloc_dma ( PAGE_SIZE, PAGE_SIZE );
+ if ( ! *page )
+ goto err_alloc;
+ memset ( *page, 0, PAGE_SIZE );
+ }
+ va_end ( args );
+
+ return 0;
+
+ err_alloc:
+ va_end ( args );
+ va_start ( args, hv );
+ for ( ; i >= 0 ; i-- ) {
+ page = va_arg ( args, void ** );
+ free_dma ( *page, PAGE_SIZE );
+ }
+ va_end ( args );
+ return -ENOMEM;
+}
+
+/**
+ * Free pages
+ *
+ * @v hv Hyper-V hypervisor
+ * @v ... Page addresses, terminated by NULL
+ */
+__attribute__ (( sentinel )) void
+hv_free_pages ( struct hv_hypervisor *hv, ... ) {
+ va_list args;
+ void *page;
+
+ va_start ( args, hv );
+ while ( ( page = va_arg ( args, void * ) ) != NULL )
+ free_dma ( page, PAGE_SIZE );
+ va_end ( args );
+}
+
+/**
+ * Allocate message buffer
+ *
+ * @v hv Hyper-V hypervisor
+ * @ret rc Return status code
+ */
+static int hv_alloc_message ( struct hv_hypervisor *hv ) {
+
+ /* Allocate buffer. Must be aligned to at least 8 bytes and
+ * must not cross a page boundary, so align on its own size.
+ */
+ hv->message = malloc_dma ( sizeof ( *hv->message ),
+ sizeof ( *hv->message ) );
+ if ( ! hv->message )
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * Free message buffer
+ *
+ * @v hv Hyper-V hypervisor
+ */
+static void hv_free_message ( struct hv_hypervisor *hv ) {
+
+ /* Free buffer */
+ free_dma ( hv->message, sizeof ( *hv->message ) );
+}
+
+/**
+ * Check whether or not we are running in Hyper-V
+ *
+ * @v hv Hyper-V hypervisor
+ * @ret rc Return status code
+ */
+static int hv_check_hv ( struct hv_hypervisor *hv ) {
+ struct x86_features features;
+ uint32_t interface_id;
+ uint32_t discard_ebx;
+ uint32_t discard_ecx;
+ uint32_t discard_edx;
+
+ /* Check for presence of a hypervisor (not necessarily Hyper-V) */
+ x86_features ( &features );
+ if ( ! ( features.intel.ecx & CPUID_FEATURES_INTEL_ECX_HYPERVISOR ) ) {
+ DBGC ( hv, "HV %p not running in a hypervisor\n", hv );
+ return -ENODEV;
+ }
+
+ /* Check that hypervisor is Hyper-V */
+ cpuid ( HV_CPUID_INTERFACE_ID, &interface_id, &discard_ebx,
+ &discard_ecx, &discard_edx );
+ if ( interface_id != HV_INTERFACE_ID ) {
+ DBGC ( hv, "HV %p not running in Hyper-V (interface ID "
+ "%#08x)\n", hv, interface_id );
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+/**
+ * Map hypercall page
+ *
+ * @v hv Hyper-V hypervisor
+ * @ret rc Return status code
+ */
+static int hv_map_hypercall ( struct hv_hypervisor *hv ) {
+ union {
+ struct {
+ uint32_t ebx;
+ uint32_t ecx;
+ uint32_t edx;
+ } __attribute__ (( packed ));
+ char text[ 13 /* "bbbbccccdddd" + NUL */ ];
+ } vendor_id;
+ uint32_t build;
+ uint32_t version;
+ uint32_t discard_eax;
+ uint32_t discard_ecx;
+ uint32_t discard_edx;
+ uint64_t guest_os_id;
+ uint64_t hypercall;
+
+ /* Report guest OS identity */
+ guest_os_id = rdmsr ( HV_X64_MSR_GUEST_OS_ID );
+ if ( guest_os_id != 0 ) {
+ DBGC ( hv, "HV %p guest OS ID MSR already set to %#08llx\n",
+ hv, guest_os_id );
+ return -EBUSY;
+ }
+ guest_os_id = HV_GUEST_OS_ID_IPXE;
+ DBGC2 ( hv, "HV %p guest OS ID MSR is %#08llx\n", hv, guest_os_id );
+ wrmsr ( HV_X64_MSR_GUEST_OS_ID, guest_os_id );
+
+ /* Get hypervisor system identity (for debugging) */
+ cpuid ( HV_CPUID_VENDOR_ID, &discard_eax, &vendor_id.ebx,
+ &vendor_id.ecx, &vendor_id.edx );
+ vendor_id.text[ sizeof ( vendor_id.text ) - 1 ] = '\0';
+ cpuid ( HV_CPUID_HYPERVISOR_ID, &build, &version, &discard_ecx,
+ &discard_edx );
+ DBGC ( hv, "HV %p detected \"%s\" version %d.%d build %d\n", hv,
+ vendor_id.text, ( version >> 16 ), ( version & 0xffff ), build );
+
+ /* Map hypercall page */
+ hypercall = rdmsr ( HV_X64_MSR_HYPERCALL );
+ hypercall &= ( PAGE_SIZE - 1 );
+ hypercall |= ( virt_to_phys ( hv->hypercall ) | HV_HYPERCALL_ENABLE );
+ DBGC2 ( hv, "HV %p hypercall MSR is %#08llx\n", hv, hypercall );
+ wrmsr ( HV_X64_MSR_HYPERCALL, hypercall );
+
+ return 0;
+}
+
+/**
+ * Unmap hypercall page
+ *
+ * @v hv Hyper-V hypervisor
+ */
+static void hv_unmap_hypercall ( struct hv_hypervisor *hv ) {
+ uint64_t hypercall;
+ uint64_t guest_os_id;
+
+ /* Unmap the hypercall page */
+ hypercall = rdmsr ( HV_X64_MSR_HYPERCALL );
+ hypercall &= ( ( PAGE_SIZE - 1 ) & ~HV_HYPERCALL_ENABLE );
+ DBGC2 ( hv, "HV %p hypercall MSR is %#08llx\n", hv, hypercall );
+ wrmsr ( HV_X64_MSR_HYPERCALL, hypercall );
+
+ /* Reset the guest OS identity */
+ guest_os_id = 0;
+ DBGC2 ( hv, "HV %p guest OS ID MSR is %#08llx\n", hv, guest_os_id );
+ wrmsr ( HV_X64_MSR_GUEST_OS_ID, guest_os_id );
+}
+
+/**
+ * Map synthetic interrupt controller
+ *
+ * @v hv Hyper-V hypervisor
+ * @ret rc Return status code
+ */
+static int hv_map_synic ( struct hv_hypervisor *hv ) {
+ uint64_t simp;
+ uint64_t siefp;
+ uint64_t scontrol;
+
+ /* Map SynIC message page */
+ simp = rdmsr ( HV_X64_MSR_SIMP );
+ simp &= ( PAGE_SIZE - 1 );
+ simp |= ( virt_to_phys ( hv->synic.message ) | HV_SIMP_ENABLE );
+ DBGC2 ( hv, "HV %p SIMP MSR is %#08llx\n", hv, simp );
+ wrmsr ( HV_X64_MSR_SIMP, simp );
+
+ /* Map SynIC event page */
+ siefp = rdmsr ( HV_X64_MSR_SIEFP );
+ siefp &= ( PAGE_SIZE - 1 );
+ siefp |= ( virt_to_phys ( hv->synic.event ) | HV_SIEFP_ENABLE );
+ DBGC2 ( hv, "HV %p SIEFP MSR is %#08llx\n", hv, siefp );
+ wrmsr ( HV_X64_MSR_SIEFP, siefp );
+
+ /* Enable SynIC */
+ scontrol = rdmsr ( HV_X64_MSR_SCONTROL );
+ scontrol |= HV_SCONTROL_ENABLE;
+ DBGC2 ( hv, "HV %p SCONTROL MSR is %#08llx\n", hv, scontrol );
+ wrmsr ( HV_X64_MSR_SCONTROL, scontrol );
+
+ return 0;
+}
+
+/**
+ * Unmap synthetic interrupt controller
+ *
+ * @v hv Hyper-V hypervisor
+ */
+static void hv_unmap_synic ( struct hv_hypervisor *hv ) {
+ uint64_t scontrol;
+ uint64_t siefp;
+ uint64_t simp;
+
+ /* Disable SynIC */
+ scontrol = rdmsr ( HV_X64_MSR_SCONTROL );
+ scontrol &= ~HV_SCONTROL_ENABLE;
+ DBGC2 ( hv, "HV %p SCONTROL MSR is %#08llx\n", hv, scontrol );
+ wrmsr ( HV_X64_MSR_SCONTROL, scontrol );
+
+ /* Unmap SynIC event page */
+ siefp = rdmsr ( HV_X64_MSR_SIEFP );
+ siefp &= ( ( PAGE_SIZE - 1 ) & ~HV_SIEFP_ENABLE );
+ DBGC2 ( hv, "HV %p SIEFP MSR is %#08llx\n", hv, siefp );
+ wrmsr ( HV_X64_MSR_SIEFP, siefp );
+
+ /* Unmap SynIC message page */
+ simp = rdmsr ( HV_X64_MSR_SIMP );
+ simp &= ( ( PAGE_SIZE - 1 ) & ~HV_SIMP_ENABLE );
+ DBGC2 ( hv, "HV %p SIMP MSR is %#08llx\n", hv, simp );
+ wrmsr ( HV_X64_MSR_SIMP, simp );
+}
+
+/**
+ * Enable synthetic interrupt
+ *
+ * @v hv Hyper-V hypervisor
+ * @v sintx Synthetic interrupt number
+ */
+void hv_enable_sint ( struct hv_hypervisor *hv, unsigned int sintx ) {
+ unsigned long msr = HV_X64_MSR_SINT ( sintx );
+ uint64_t sint;
+
+ /* Enable synthetic interrupt
+ *
+ * We have to enable the interrupt, otherwise messages will
+ * not be delivered (even though the documentation implies
+ * that polling for messages is possible). We enable AutoEOI
+ * and hook the interrupt to the obsolete IRQ13 (FPU
+ * exception) vector, which will be implemented as a no-op.
+ */
+ sint = rdmsr ( msr );
+ sint &= ~( HV_SINT_MASKED | HV_SINT_VECTOR_MASK );
+ sint |= ( HV_SINT_AUTO_EOI |
+ HV_SINT_VECTOR ( IRQ_INT ( 13 /* See comment above */ ) ) );
+ DBGC2 ( hv, "HV %p SINT%d MSR is %#08llx\n", hv, sintx, sint );
+ wrmsr ( msr, sint );
+}
+
+/**
+ * Disable synthetic interrupt
+ *
+ * @v hv Hyper-V hypervisor
+ * @v sintx Synthetic interrupt number
+ */
+void hv_disable_sint ( struct hv_hypervisor *hv, unsigned int sintx ) {
+ unsigned long msr = HV_X64_MSR_SINT ( sintx );
+ uint64_t sint;
+
+ /* Disable synthetic interrupt */
+ sint = rdmsr ( msr );
+ sint &= ~HV_SINT_AUTO_EOI;
+ sint |= HV_SINT_MASKED;
+ DBGC2 ( hv, "HV %p SINT%d MSR is %#08llx\n", hv, sintx, sint );
+ wrmsr ( msr, sint );
+}
+
+/**
+ * Post message
+ *
+ * @v hv Hyper-V hypervisor
+ * @v id Connection ID
+ * @v type Message type
+ * @v data Message
+ * @v len Length of message
+ * @ret rc Return status code
+ */
+int hv_post_message ( struct hv_hypervisor *hv, unsigned int id,
+ unsigned int type, const void *data, size_t len ) {
+ struct hv_post_message *msg = &hv->message->posted;
+ int status;
+ int rc;
+
+ /* Sanity check */
+ assert ( len <= sizeof ( msg->data ) );
+
+ /* Construct message */
+ memset ( msg, 0, sizeof ( *msg ) );
+ msg->id = cpu_to_le32 ( id );
+ msg->type = cpu_to_le32 ( type );
+ msg->len = cpu_to_le32 ( len );
+ memcpy ( msg->data, data, len );
+ DBGC2 ( hv, "HV %p connection %d posting message type %#08x:\n",
+ hv, id, type );
+ DBGC2_HDA ( hv, 0, msg->data, len );
+
+ /* Post message */
+ if ( ( status = hv_call ( hv, HV_POST_MESSAGE, msg, NULL ) ) != 0 ) {
+ rc = -EHV ( status );
+ DBGC ( hv, "HV %p could not post message to %#08x: %s\n",
+ hv, id, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Wait for received message
+ *
+ * @v hv Hyper-V hypervisor
+ * @v sintx Synthetic interrupt number
+ * @ret rc Return status code
+ */
+int hv_wait_for_message ( struct hv_hypervisor *hv, unsigned int sintx ) {
+ struct hv_message *msg = &hv->message->received;
+ struct hv_message *src = &hv->synic.message[sintx];
+ unsigned int retries;
+ size_t len;
+
+ /* Wait for message to arrive */
+ for ( retries = 0 ; retries < HV_MESSAGE_MAX_WAIT_MS ; retries++ ) {
+
+ /* Check for message */
+ if ( src->type ) {
+
+ /* Copy message */
+ memset ( msg, 0, sizeof ( *msg ) );
+ len = src->len;
+ assert ( len <= sizeof ( *msg ) );
+ memcpy ( msg, src,
+ ( offsetof ( typeof ( *msg ), data ) + len ) );
+ DBGC2 ( hv, "HV %p SINT%d received message type "
+ "%#08x:\n", hv, sintx,
+ le32_to_cpu ( msg->type ) );
+ DBGC2_HDA ( hv, 0, msg->data, len );
+
+ /* Consume message */
+ src->type = 0;
+
+ return 0;
+ }
+
+ /* Trigger message delivery */
+ wrmsr ( HV_X64_MSR_EOM, 0 );
+
+ /* Delay */
+ mdelay ( 1 );
+ }
+
+ DBGC ( hv, "HV %p SINT%d timed out waiting for message\n",
+ hv, sintx );
+ return -ETIMEDOUT;
+}
+
+/**
+ * Signal event
+ *
+ * @v hv Hyper-V hypervisor
+ * @v id Connection ID
+ * @v flag Flag number
+ * @ret rc Return status code
+ */
+int hv_signal_event ( struct hv_hypervisor *hv, unsigned int id,
+ unsigned int flag ) {
+ struct hv_signal_event *event = &hv->message->signalled;
+ int status;
+ int rc;
+
+ /* Construct event */
+ memset ( event, 0, sizeof ( *event ) );
+ event->id = cpu_to_le32 ( id );
+ event->flag = cpu_to_le16 ( flag );
+
+ /* Signal event */
+ if ( ( status = hv_call ( hv, HV_SIGNAL_EVENT, event, NULL ) ) != 0 ) {
+ rc = -EHV ( status );
+ DBGC ( hv, "HV %p could not signal event to %#08x: %s\n",
+ hv, id, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Probe root device
+ *
+ * @v rootdev Root device
+ * @ret rc Return status code
+ */
+static int hv_probe ( struct root_device *rootdev ) {
+ struct hv_hypervisor *hv;
+ int rc;
+
+ /* Allocate and initialise structure */
+ hv = zalloc ( sizeof ( *hv ) );
+ if ( ! hv ) {
+ rc = -ENOMEM;
+ goto err_alloc;
+ }
+
+ /* Check we are running in Hyper-V */
+ if ( ( rc = hv_check_hv ( hv ) ) != 0 )
+ goto err_check_hv;
+
+ /* Allocate pages */
+ if ( ( rc = hv_alloc_pages ( hv, &hv->hypercall, &hv->synic.message,
+ &hv->synic.event, NULL ) ) != 0 )
+ goto err_alloc_pages;
+
+ /* Allocate message buffer */
+ if ( ( rc = hv_alloc_message ( hv ) ) != 0 )
+ goto err_alloc_message;
+
+ /* Map hypercall page */
+ if ( ( rc = hv_map_hypercall ( hv ) ) != 0 )
+ goto err_map_hypercall;
+
+ /* Map synthetic interrupt controller */
+ if ( ( rc = hv_map_synic ( hv ) ) != 0 )
+ goto err_map_synic;
+
+ rootdev_set_drvdata ( rootdev, hv );
+ return 0;
+
+ hv_unmap_synic ( hv );
+ err_map_synic:
+ hv_unmap_hypercall ( hv );
+ err_map_hypercall:
+ hv_free_message ( hv );
+ err_alloc_message:
+ hv_free_pages ( hv, hv->hypercall, hv->synic.message, hv->synic.event,
+ NULL );
+ err_alloc_pages:
+ err_check_hv:
+ free ( hv );
+ err_alloc:
+ return rc;
+}
+
+/**
+ * Remove root device
+ *
+ * @v rootdev Root device
+ */
+static void hv_remove ( struct root_device *rootdev ) {
+ struct hv_hypervisor *hv = rootdev_get_drvdata ( rootdev );
+
+ hv_unmap_synic ( hv );
+ hv_unmap_hypercall ( hv );
+ hv_free_message ( hv );
+ hv_free_pages ( hv, hv->hypercall, hv->synic.message, hv->synic.event,
+ NULL );
+ free ( hv );
+}
+
+/** Hyper-V root device driver */
+static struct root_driver hv_root_driver = {
+ .probe = hv_probe,
+ .remove = hv_remove,
+};
+
+/** Hyper-V root device */
+struct root_device hv_root_device __root_device = {
+ .dev = { .name = "Hyper-V" },
+ .driver = &hv_root_driver,
+};
diff --git a/src/arch/x86/drivers/hyperv/hyperv.h b/src/arch/x86/drivers/hyperv/hyperv.h
new file mode 100644
index 00000000..94bdb8a8
--- /dev/null
+++ b/src/arch/x86/drivers/hyperv/hyperv.h
@@ -0,0 +1,42 @@
+#ifndef _HYPERV_H
+#define _HYPERV_H
+
+/** @file
+ *
+ * Hyper-V driver
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/** Get vendor identification */
+#define HV_CPUID_VENDOR_ID 0x40000000UL
+
+/** Get interface identification */
+#define HV_CPUID_INTERFACE_ID 0x40000001UL
+
+/** Get hypervisor identification */
+#define HV_CPUID_HYPERVISOR_ID 0x40000002UL
+
+/** Guest OS identity MSR */
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000UL
+
+/** Hypercall page MSR */
+#define HV_X64_MSR_HYPERCALL 0x40000001UL
+
+/** SynIC control MSR */
+#define HV_X64_MSR_SCONTROL 0x40000080UL
+
+/** SynIC event flags page MSR */
+#define HV_X64_MSR_SIEFP 0x40000082UL
+
+/** SynIC message page MSR */
+#define HV_X64_MSR_SIMP 0x40000083UL
+
+/** SynIC end of message MSR */
+#define HV_X64_MSR_EOM 0x40000084UL
+
+/** SynIC interrupt source MSRs */
+#define HV_X64_MSR_SINT(x) ( 0x40000090UL + (x) )
+
+#endif /* _HYPERV_H */
diff --git a/src/arch/x86/include/bits/errfile.h b/src/arch/x86/include/bits/errfile.h
index 62457562..5fce86af 100644
--- a/src/arch/x86/include/bits/errfile.h
+++ b/src/arch/x86/include/bits/errfile.h
@@ -46,6 +46,7 @@ FILE_LICENCE ( GPL2_OR_LATER );
#define ERRFILE_timer_rdtsc ( ERRFILE_ARCH | ERRFILE_DRIVER | 0x00000000 )
#define ERRFILE_timer_bios ( ERRFILE_ARCH | ERRFILE_DRIVER | 0x00010000 )
#define ERRFILE_hvm ( ERRFILE_ARCH | ERRFILE_DRIVER | 0x00020000 )
+#define ERRFILE_hyperv ( ERRFILE_ARCH | ERRFILE_DRIVER | 0x00030000 )
#define ERRFILE_cpuid_cmd ( ERRFILE_ARCH | ERRFILE_OTHER | 0x00000000 )
#define ERRFILE_cpuid_settings ( ERRFILE_ARCH | ERRFILE_OTHER | 0x00010000 )
diff --git a/src/arch/x86/include/ipxe/cpuid.h b/src/arch/x86/include/ipxe/cpuid.h
index 2f78dfca..fa3a4bc8 100644
--- a/src/arch/x86/include/ipxe/cpuid.h
+++ b/src/arch/x86/include/ipxe/cpuid.h
@@ -39,6 +39,9 @@ struct x86_features {
/** Get standard features */
#define CPUID_FEATURES 0x00000001UL
+/** Hypervisor is present */
+#define CPUID_FEATURES_INTEL_ECX_HYPERVISOR 0x80000000UL
+
/** Get largest extended function */
#define CPUID_AMD_MAX_FN 0x80000000UL
diff --git a/src/arch/i386/include/pic8259.h b/src/arch/x86/include/pic8259.h
index a07e97d3..0d02266d 100644
--- a/src/arch/i386/include/pic8259.h
+++ b/src/arch/x86/include/pic8259.h
@@ -11,9 +11,6 @@ FILE_LICENCE ( GPL2_OR_LATER );
#include <ipxe/io.h>
-/* For segoff_t */
-#include "realmode.h"
-
#define IRQ_PIC_CUTOFF 8
/* 8259 register locations */
diff --git a/src/arch/x86_64/include/bits/hyperv.h b/src/arch/x86_64/include/bits/hyperv.h
new file mode 100644
index 00000000..88a3d559
--- /dev/null
+++ b/src/arch/x86_64/include/bits/hyperv.h
@@ -0,0 +1,75 @@
+#ifndef _BITS_HYPERV_H
+#define _BITS_HYPERV_H
+
+/** @file
+ *
+ * Hyper-V interface
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stddef.h>
+#include <stdint.h>
+#include <ipxe/io.h>
+
+/**
+ * Issue hypercall
+ *
+ * @v hv Hyper-V hypervisor
+ * @v code Call code
+ * @v in Input parameters
+ * @v out Output parameters
+ * @ret status Status code
+ */
+static inline __attribute__ (( always_inline )) int
+hv_call ( struct hv_hypervisor *hv, unsigned int code, const void *in,
+ void *out ) {
+ void *hypercall = hv->hypercall;
+ register uint64_t rcx asm ( "rcx" );
+ register uint64_t rdx asm ( "rdx" );
+ register uint64_t r8 asm ( "r8" );
+ uint64_t in_phys;
+ uint64_t out_phys;
+ uint16_t result;
+
+ in_phys = ( ( __builtin_constant_p ( in ) && ( in == NULL ) )
+ ? 0 : virt_to_phys ( in ) );
+ out_phys = ( ( __builtin_constant_p ( out ) && ( out == NULL ) )
+ ? 0 : virt_to_phys ( out ) );
+ rcx = code;
+ rdx = in_phys;
+ r8 = out_phys;
+ __asm__ __volatile__ ( "call *%4"
+ : "=a" ( result ), "+r" ( rcx ), "+r" ( rdx ),
+ "+r" ( r8 )
+ : "m" ( hypercall )
+ : "r9", "r10", "r11", "xmm0", "xmm1", "xmm2",
+ "xmm3", "xmm4", "xmm5" );
+ return result;
+}
+
+/**
+ * Set bit atomically
+ *
+ * @v bits Bit field
+ * @v bit Bit to set
+ */
+static inline __attribute__ (( always_inline )) void
+hv_set_bit ( void *bits, unsigned int bit ) {
+ struct {
+ uint64_t qword[ ( bit / 64 ) + 1 ];
+ } *qwords = bits;
+
+ /* Set bit using "lock bts". Inform compiler that any memory
+ * from the start of the bit field up to and including the
+ * qword containing this bit may be modified. (This is
+ * overkill but shouldn't matter in practice since we're
+ * unlikely to subsequently read other bits from the same bit
+ * field.)
+ */
+ __asm__ __volatile__ ( "lock bts %1, %0"
+ : "+m" ( *qwords ) : "Ir" ( bit ) );
+}
+
+#endif /* _BITS_HYPERV_H */
diff --git a/src/include/ipxe/hyperv.h b/src/include/ipxe/hyperv.h
new file mode 100644
index 00000000..10be1ca8
--- /dev/null
+++ b/src/include/ipxe/hyperv.h
@@ -0,0 +1,230 @@
+#ifndef _IPXE_HYPERV_H
+#define _IPXE_HYPERV_H
+
+/** @file
+ *
+ * Hyper-V interface
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <ipxe/io.h>
+
+/** Hyper-V interface identification */
+#define HV_INTERFACE_ID 0x31237648 /* "Hv#1" */
+
+/** Guest OS identity for iPXE
+ *
+ * This field comprises:
+ *
+ * Bit 63 : set to 1 to indicate an open source OS
+ * Bits 62:56 : OS Type
+ * Bits 55:48 : OS ID
+ * Bits 47:16 : Version
+ * Bits 15:0 : Build number
+ *
+ * There appears to be no central registry for the "OS Type". The
+ * specification states that "Linux is 0x100", and the FreeBSD source
+ * states that "FreeBSD is 0x200". Both of these statements are
+ * actually referring to the combined "OS Type" and "OS ID" field.
+ *
+ * We choose to use 0x98ae: this is generated by setting bit 63 (to
+ * indicate an open source OS) and setting the OS Type+ID equal to the
+ * PnP vendor ID used in romprefix.S. No version information or build
+ * number is included.
+ */
+#define HV_GUEST_OS_ID_IPXE ( ( 1ULL << 63 ) | ( 0x18aeULL << 48 ) )
+
+/** Enable hypercall page */
+#define HV_HYPERCALL_ENABLE 0x00000001UL
+
+/** Enable SynIC */
+#define HV_SCONTROL_ENABLE 0x00000001UL
+
+/** Enable SynIC event flags */
+#define HV_SIEFP_ENABLE 0x00000001UL
+
+/** Enable SynIC messages */
+#define HV_SIMP_ENABLE 0x00000001UL
+
+/** Perform implicit EOI upon synthetic interrupt delivery */
+#define HV_SINT_AUTO_EOI 0x00020000UL
+
+/** Mask synthetic interrupt */
+#define HV_SINT_MASKED 0x00010000UL
+
+/** Synthetic interrupt vector */
+#define HV_SINT_VECTOR(x) ( (x) << 0 )
+
+/** Synthetic interrupt vector mask */
+#define HV_SINT_VECTOR_MASK HV_SINT_VECTOR ( 0xff )
+
+/** Post message */
+#define HV_POST_MESSAGE 0x005c
+
+/** A posted message
+ *
+ * This is the input parameter list for the HvPostMessage hypercall.
+ */
+struct hv_post_message {
+ /** Connection ID */
+ uint32_t id;
+ /** Padding */
+ uint32_t reserved;
+ /** Type */
+ uint32_t type;
+ /** Length of message */
+ uint32_t len;
+ /** Message */
+ uint8_t data[240];
+} __attribute__ (( packed ));
+
+/** A received message
+ *
+ * This is the HV_MESSAGE structure from the Hypervisor Top-Level
+ * Functional Specification. The field order given in the
+ * documentation is incorrect.
+ */
+struct hv_message {
+ /** Type */
+ uint32_t type;
+ /** Length of message */
+ uint8_t len;
+ /** Flags */
+ uint8_t flags;
+ /** Padding */
+ uint16_t reserved;
+ /** Origin */
+ uint64_t origin;
+ /** Message */
+ uint8_t data[240];
+} __attribute__ (( packed ));
+
+/** Signal event */
+#define HV_SIGNAL_EVENT 0x005d
+
+/** A signalled event */
+struct hv_signal_event {
+ /** Connection ID */
+ uint32_t id;
+ /** Flag number */
+ uint16_t flag;
+ /** Reserved */
+ uint16_t reserved;
+} __attribute__ (( packed ));
+
+/** A received event */
+struct hv_event {
+ /** Event flags */
+ uint8_t flags[256];
+} __attribute__ (( packed ));
+
+/** A monitor trigger group
+ *
+ * This is the HV_MONITOR_TRIGGER_GROUP structure from the Hypervisor
+ * Top-Level Functional Specification.
+ */
+struct hv_monitor_trigger {
+ /** Pending events */
+ uint32_t pending;
+ /** Armed events */
+ uint32_t armed;
+} __attribute__ (( packed ));
+
+/** A monitor parameter set
+ *
+ * This is the HV_MONITOR_PARAMETER structure from the Hypervisor
+ * Top-Level Functional Specification.
+ */
+struct hv_monitor_parameter {
+ /** Connection ID */
+ uint32_t id;
+ /** Flag number */
+ uint16_t flag;
+ /** Reserved */
+ uint16_t reserved;
+} __attribute__ (( packed ));
+
+/** A monitor page
+ *
+ * This is the HV_MONITOR_PAGE structure from the Hypervisor Top-Level
+ * Functional Specification.
+ */
+struct hv_monitor {
+ /** Flags */
+ uint32_t flags;
+ /** Reserved */
+ uint8_t reserved_a[4];
+ /** Trigger groups */
+ struct hv_monitor_trigger trigger[4];
+ /** Reserved */
+ uint8_t reserved_b[536];
+ /** Latencies */
+ uint16 latency[4][32];
+ /** Reserved */
+ uint8_t reserved_c[256];
+ /** Parameters */
+ struct hv_monitor_parameter param[4][32];
+ /** Reserved */
+ uint8_t reserved_d[1984];
+} __attribute__ (( packed ));
+
+/** A synthetic interrupt controller */
+struct hv_synic {
+ /** Message page */
+ struct hv_message *message;
+ /** Event flag page */
+ struct hv_event *event;
+};
+
+/** A message buffer */
+union hv_message_buffer {
+ /** Posted message */
+ struct hv_post_message posted;
+ /** Received message */
+ struct hv_message received;
+ /** Signalled event */
+ struct hv_signal_event signalled;
+};
+
+/** A Hyper-V hypervisor */
+struct hv_hypervisor {
+ /** Hypercall page */
+ void *hypercall;
+ /** Synthetic interrupt controller (SynIC) */
+ struct hv_synic synic;
+ /** Message buffer */
+ union hv_message_buffer *message;
+};
+
+#include <bits/hyperv.h>
+
+/**
+ * Calculate the number of pages covering an address range
+ *
+ * @v data Start of data
+ * @v len Length of data (must be non-zero)
+ * @ret pfn_count Number of pages covered
+ */
+static inline unsigned int hv_pfn_count ( physaddr_t data, size_t len ) {
+ unsigned int first_pfn = ( data / PAGE_SIZE );
+ unsigned int last_pfn = ( ( data + len - 1 ) / PAGE_SIZE );
+
+ return ( last_pfn - first_pfn + 1 );
+}
+
+extern __attribute__ (( sentinel )) int
+hv_alloc_pages ( struct hv_hypervisor *hv, ... );
+extern __attribute__ (( sentinel )) void
+hv_free_pages ( struct hv_hypervisor *hv, ... );
+extern void hv_enable_sint ( struct hv_hypervisor *hv, unsigned int sintx );
+extern void hv_disable_sint ( struct hv_hypervisor *hv, unsigned int sintx );
+extern int hv_post_message ( struct hv_hypervisor *hv, unsigned int id,
+ unsigned int type, const void *data, size_t len );
+extern int hv_wait_for_message ( struct hv_hypervisor *hv, unsigned int sintx );
+extern int hv_signal_event ( struct hv_hypervisor *hv, unsigned int id,
+ unsigned int flag );
+
+#endif /* _IPXE_HYPERV_H */