summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/arch/x86/drivers/hyperv/hyperv.c135
-rw-r--r--src/drivers/net/netvsc.c47
-rw-r--r--src/drivers/net/netvsc.h15
-rw-r--r--src/include/ipxe/hyperv.h3
-rw-r--r--src/include/ipxe/vmbus.h26
-rw-r--r--src/interface/hyperv/vmbus.c133
6 files changed, 345 insertions, 14 deletions
diff --git a/src/arch/x86/drivers/hyperv/hyperv.c b/src/arch/x86/drivers/hyperv/hyperv.c
index b90937df..98c2b30c 100644
--- a/src/arch/x86/drivers/hyperv/hyperv.c
+++ b/src/arch/x86/drivers/hyperv/hyperv.c
@@ -40,6 +40,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
#include <ipxe/malloc.h>
#include <ipxe/device.h>
#include <ipxe/timer.h>
+#include <ipxe/quiesce.h>
#include <ipxe/cpuid.h>
#include <ipxe/msr.h>
#include <ipxe/hyperv.h>
@@ -299,6 +300,10 @@ static void hv_map_synic ( struct hv_hypervisor *hv ) {
uint64_t siefp;
uint64_t scontrol;
+ /* Zero SynIC message and event pages */
+ memset ( hv->synic.message, 0, PAGE_SIZE );
+ memset ( hv->synic.event, 0, PAGE_SIZE );
+
/* Map SynIC message page */
simp = rdmsr ( HV_X64_MSR_SIMP );
simp &= ( PAGE_SIZE - 1 );
@@ -321,21 +326,14 @@ static void hv_map_synic ( struct hv_hypervisor *hv ) {
}
/**
- * Unmap synthetic interrupt controller
+ * Unmap synthetic interrupt controller, leaving SCONTROL untouched
*
* @v hv Hyper-V hypervisor
*/
-static void hv_unmap_synic ( struct hv_hypervisor *hv ) {
- uint64_t scontrol;
+static void hv_unmap_synic_no_scontrol ( struct hv_hypervisor *hv ) {
uint64_t siefp;
uint64_t simp;
- /* Disable SynIC */
- scontrol = rdmsr ( HV_X64_MSR_SCONTROL );
- scontrol &= ~HV_SCONTROL_ENABLE;
- DBGC2 ( hv, "HV %p SCONTROL MSR is %#08llx\n", hv, scontrol );
- wrmsr ( HV_X64_MSR_SCONTROL, scontrol );
-
/* Unmap SynIC event page */
siefp = rdmsr ( HV_X64_MSR_SIEFP );
siefp &= ( ( PAGE_SIZE - 1 ) & ~HV_SIEFP_ENABLE );
@@ -350,6 +348,24 @@ static void hv_unmap_synic ( struct hv_hypervisor *hv ) {
}
/**
+ * Unmap synthetic interrupt controller
+ *
+ * @v hv Hyper-V hypervisor
+ */
+static void hv_unmap_synic ( struct hv_hypervisor *hv ) {
+ uint64_t scontrol;
+
+ /* Disable SynIC */
+ scontrol = rdmsr ( HV_X64_MSR_SCONTROL );
+ scontrol &= ~HV_SCONTROL_ENABLE;
+ DBGC2 ( hv, "HV %p SCONTROL MSR is %#08llx\n", hv, scontrol );
+ wrmsr ( HV_X64_MSR_SCONTROL, scontrol );
+
+ /* Unmap SynIC event and message pages */
+ hv_unmap_synic_no_scontrol ( hv );
+}
+
+/**
* Enable synthetic interrupt
*
* @v hv Hyper-V hypervisor
@@ -385,8 +401,12 @@ void hv_disable_sint ( struct hv_hypervisor *hv, unsigned int sintx ) {
unsigned long msr = HV_X64_MSR_SINT ( sintx );
uint64_t sint;
- /* Disable synthetic interrupt */
+ /* Do nothing if interrupt is already disabled */
sint = rdmsr ( msr );
+ if ( sint & HV_SINT_MASKED )
+ return;
+
+ /* Disable synthetic interrupt */
sint &= ~HV_SINT_AUTO_EOI;
sint |= HV_SINT_MASKED;
DBGC2 ( hv, "HV %p SINT%d MSR is %#08llx\n", hv, sintx, sint );
@@ -589,6 +609,7 @@ static void hv_remove ( struct root_device *rootdev ) {
hv_free_pages ( hv, hv->hypercall, hv->synic.message, hv->synic.event,
NULL );
free ( hv );
+ rootdev_set_drvdata ( rootdev, NULL );
}
/** Hyper-V root device driver */
@@ -604,6 +625,100 @@ struct root_device hv_root_device __root_device = {
};
/**
+ * Quiesce system
+ *
+ */
+static void hv_quiesce ( void ) {
+ struct hv_hypervisor *hv = rootdev_get_drvdata ( &hv_root_device );
+ unsigned int i;
+
+ /* Do nothing if we are not running in Hyper-V */
+ if ( ! hv )
+ return;
+
+ /* The "enlightened" portions of the Windows Server 2016 boot
+ * process will not cleanly take ownership of an active
+ * Hyper-V connection. Experimentation shows that the minimum
+ * requirement is that we disable the SynIC message page
+ * (i.e. zero the SIMP MSR).
+ *
+ * We cannot perform a full shutdown of the Hyper-V
+ * connection. Experimentation shows that if we disable the
+ * SynIC (i.e. zero the SCONTROL MSR) then Windows Server 2016
+ * will enter an indefinite wait loop.
+ *
+ * Attempt to create a safe handover environment by resetting
+ * all MSRs except for SCONTROL.
+ *
+ * Note that we do not shut down our VMBus devices, since we
+ * may need to unquiesce the system and continue operation.
+ */
+
+ /* Disable all synthetic interrupts */
+ for ( i = 0 ; i <= HV_SINT_MAX ; i++ )
+ hv_disable_sint ( hv, i );
+
+ /* Unmap synthetic interrupt controller, leaving SCONTROL
+ * enabled (see above).
+ */
+ hv_unmap_synic_no_scontrol ( hv );
+
+ /* Unmap hypercall page */
+ hv_unmap_hypercall ( hv );
+
+ DBGC ( hv, "HV %p quiesced\n", hv );
+}
+
+/**
+ * Unquiesce system
+ *
+ */
+static void hv_unquiesce ( void ) {
+ struct hv_hypervisor *hv = rootdev_get_drvdata ( &hv_root_device );
+ uint64_t simp;
+ int rc;
+
+ /* Do nothing if we are not running in Hyper-V */
+ if ( ! hv )
+ return;
+
+ /* Experimentation shows that the "enlightened" portions of
+ * Windows Server 2016 will break our Hyper-V connection at
+ * some point during a SAN boot. Surprisingly it does not
+ * change the guest OS ID MSR, but it does leave the SynIC
+ * message page disabled.
+ *
+ * Our own explicit quiescing procedure will also disable the
+ * SynIC message page. We can therefore use the SynIC message
+ * page enable bit as a heuristic to determine when we need to
+ * reestablish our Hyper-V connection.
+ */
+ simp = rdmsr ( HV_X64_MSR_SIMP );
+ if ( simp & HV_SIMP_ENABLE )
+ return;
+
+ /* Remap hypercall page */
+ hv_map_hypercall ( hv );
+
+ /* Remap synthetic interrupt controller */
+ hv_map_synic ( hv );
+
+ /* Reset Hyper-V devices */
+ if ( ( rc = vmbus_reset ( hv, &hv_root_device.dev ) ) != 0 ) {
+ DBGC ( hv, "HV %p could not unquiesce: %s\n",
+ hv, strerror ( rc ) );
+ /* Nothing we can do */
+ return;
+ }
+}
+
+/** Hyper-V quiescer */
+struct quiescer hv_quiescer __quiescer = {
+ .quiesce = hv_quiesce,
+ .unquiesce = hv_unquiesce,
+};
+
+/**
* Probe timer
*
* @ret rc Return status code
diff --git a/src/drivers/net/netvsc.c b/src/drivers/net/netvsc.c
index d269cd63..5be52fb8 100644
--- a/src/drivers/net/netvsc.c
+++ b/src/drivers/net/netvsc.c
@@ -259,6 +259,15 @@ static int netvsc_revoke_buffer ( struct netvsc_device *netvsc,
struct netvsc_revoke_buffer_message msg;
int rc;
+ /* If the buffer's GPADL is obsolete (i.e. was created before
+ * the most recent Hyper-V reset), then we will never receive
+ * a response to the revoke message. Since the GPADL is
+ * already destroyed as far as the hypervisor is concerned, no
+ * further action is required.
+ */
+ if ( netvsc_is_obsolete ( netvsc ) )
+ return 0;
+
/* Construct message */
memset ( &msg, 0, sizeof ( msg ) );
msg.header.type = cpu_to_le32 ( buffer->revoke_type );
@@ -474,6 +483,14 @@ static int netvsc_transmit ( struct rndis_device *rndis,
uint64_t xid;
int rc;
+ /* If the device is obsolete (i.e. was opened before the most
+ * recent Hyper-V reset), then we will never receive transmit
+ * completions. Fail transmissions immediately to minimise
+ * the delay in closing and reopening the device.
+ */
+ if ( netvsc_is_obsolete ( netvsc ) )
+ return -EPIPE;
+
/* Sanity check */
assert ( iob_len ( iobuf ) >= sizeof ( *header ) );
assert ( iob_len ( iobuf ) == le32_to_cpu ( header->len ) );
@@ -824,6 +841,35 @@ static int netvsc_probe ( struct vmbus_device *vmdev ) {
}
/**
+ * Reset device
+ *
+ * @v vmdev VMBus device
+ * @ret rc Return status code
+ */
+static int netvsc_reset ( struct vmbus_device *vmdev ) {
+ struct rndis_device *rndis = vmbus_get_drvdata ( vmdev );
+ struct netvsc_device *netvsc = rndis->priv;
+ struct net_device *netdev = rndis->netdev;
+ int rc;
+
+ /* A closed device holds no NetVSC (or RNDIS) state, so there
+ * is nothing to reset.
+ */
+ if ( ! netdev_is_open ( netdev ) )
+ return 0;
+
+ /* Close and reopen device to reset any stale state */
+ netdev_close ( netdev );
+ if ( ( rc = netdev_open ( netdev ) ) != 0 ) {
+ DBGC ( netvsc, "NETVSC %s could not reopen: %s\n",
+ netvsc->name, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
* Remove device
*
* @v vmdev VMBus device
@@ -844,5 +890,6 @@ struct vmbus_driver netvsc_driver __vmbus_driver = {
.type = VMBUS_TYPE ( 0xf8615163, 0xdf3e, 0x46c5, 0x913f,
0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e ),
.probe = netvsc_probe,
+ .reset = netvsc_reset,
.remove = netvsc_remove,
};
diff --git a/src/drivers/net/netvsc.h b/src/drivers/net/netvsc.h
index 39eeb891..93192357 100644
--- a/src/drivers/net/netvsc.h
+++ b/src/drivers/net/netvsc.h
@@ -362,4 +362,19 @@ struct netvsc_device {
int wait_rc;
};
+/**
+ * Check if NetVSC device is obsolete
+ *
+ * @v netvsc NetVSC device
+ * @v is_obsolete NetVSC device is obsolete
+ *
+ * Check if NetVSC device is obsolete (i.e. was opened before the most
+ * recent Hyper-V reset).
+ */
+static inline __attribute__ (( always_inline )) int
+netvsc_is_obsolete ( struct netvsc_device *netvsc ) {
+
+ return vmbus_gpadl_is_obsolete ( netvsc->rx.gpadl );
+}
+
#endif /* _NETVSC_H */
diff --git a/src/include/ipxe/hyperv.h b/src/include/ipxe/hyperv.h
index c61e2a08..9194a976 100644
--- a/src/include/ipxe/hyperv.h
+++ b/src/include/ipxe/hyperv.h
@@ -61,6 +61,9 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
/** Synthetic interrupt vector mask */
#define HV_SINT_VECTOR_MASK HV_SINT_VECTOR ( 0xff )
+/** Maximum synthetic interrupt number */
+#define HV_SINT_MAX 15
+
/** Post message */
#define HV_POST_MESSAGE 0x005c
diff --git a/src/include/ipxe/vmbus.h b/src/include/ipxe/vmbus.h
index 26fc578c..68244185 100644
--- a/src/include/ipxe/vmbus.h
+++ b/src/include/ipxe/vmbus.h
@@ -479,6 +479,8 @@ struct vmbus_device {
/** Hyper-V hypervisor */
struct hv_hypervisor *hv;
+ /** Channel instance */
+ union uuid instance;
/** Channel ID */
unsigned int channel;
/** Monitor ID */
@@ -527,6 +529,12 @@ struct vmbus_driver {
* @ret rc Return status code
*/
int ( * probe ) ( struct vmbus_device *vmdev );
+ /** Reset device
+ *
+ * @v vmdev VMBus device
+ * @ret rc Return status code
+ */
+ int ( * reset ) ( struct vmbus_device *vmdev );
/** Remove device
*
* @v vmdev VMBus device
@@ -609,6 +617,23 @@ vmbus_unregister_pages ( struct vmbus_device *vmdev,
list_del ( &pages->list );
}
+extern unsigned int vmbus_obsolete_gpadl;
+
+/**
+ * Check if GPADL is obsolete
+ *
+ * @v gpadl GPADL ID
+ * @v is_obsolete GPADL ID is obsolete
+ *
+ * Check if GPADL is obsolete (i.e. was created before the most recent
+ * Hyper-V reset).
+ */
+static inline __attribute__ (( always_inline )) int
+vmbus_gpadl_is_obsolete ( unsigned int gpadl ) {
+
+ return ( gpadl <= vmbus_obsolete_gpadl );
+}
+
extern int vmbus_establish_gpadl ( struct vmbus_device *vmdev, userptr_t data,
size_t len );
extern int vmbus_gpadl_teardown ( struct vmbus_device *vmdev,
@@ -629,6 +654,7 @@ extern int vmbus_poll ( struct vmbus_device *vmdev );
extern void vmbus_dump_channel ( struct vmbus_device *vmdev );
extern int vmbus_probe ( struct hv_hypervisor *hv, struct device *parent );
+extern int vmbus_reset ( struct hv_hypervisor *hv, struct device *parent );
extern void vmbus_remove ( struct hv_hypervisor *hv, struct device *parent );
#endif /* _IPXE_VMBUS_H */
diff --git a/src/interface/hyperv/vmbus.c b/src/interface/hyperv/vmbus.c
index 7915ddfe..45a7caec 100644
--- a/src/interface/hyperv/vmbus.c
+++ b/src/interface/hyperv/vmbus.c
@@ -50,6 +50,16 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
*/
#define VMBUS_GPADL_MAGIC 0x18ae0000
+/** Current (i.e. most recently issued) GPADL ID */
+static unsigned int vmbus_gpadl = VMBUS_GPADL_MAGIC;
+
+/** Obsolete GPADL ID threshold
+ *
+ * When the Hyper-V connection is reset, any previous GPADLs are
+ * automatically rendered obsolete.
+ */
+unsigned int vmbus_obsolete_gpadl;
+
/**
* Post message
*
@@ -281,12 +291,12 @@ int vmbus_establish_gpadl ( struct vmbus_device *vmdev, userptr_t data,
uint64_t pfn[pfn_count];
} __attribute__ (( packed )) gpadlhdr;
const struct vmbus_gpadl_created *created = &vmbus->message->created;
- static unsigned int gpadl = VMBUS_GPADL_MAGIC;
+ unsigned int gpadl;
unsigned int i;
int rc;
/* Allocate GPADL ID */
- gpadl++;
+ gpadl = ++vmbus_gpadl;
/* Construct message */
memset ( &gpadlhdr, 0, sizeof ( gpadlhdr ) );
@@ -347,6 +357,15 @@ int vmbus_gpadl_teardown ( struct vmbus_device *vmdev, unsigned int gpadl ) {
const struct vmbus_gpadl_torndown *torndown = &vmbus->message->torndown;
int rc;
+ /* If GPADL is obsolete (i.e. was created before the most
+ * recent Hyper-V reset), then we will never receive a
+ * response to the teardown message. Since the GPADL is
+ * already destroyed as far as the hypervisor is concerned, no
+ * further action is required.
+ */
+ if ( vmbus_gpadl_is_obsolete ( gpadl ) )
+ return 0;
+
/* Construct message */
memset ( &teardown, 0, sizeof ( teardown ) );
teardown.header.type = cpu_to_le32 ( VMBUS_GPADL_TEARDOWN );
@@ -530,8 +549,7 @@ void vmbus_close ( struct vmbus_device *vmdev ) {
}
/* Tear down GPADL */
- if ( ( rc = vmbus_gpadl_teardown ( vmdev,
- vmdev->gpadl ) ) != 0 ) {
+ if ( ( rc = vmbus_gpadl_teardown ( vmdev, vmdev->gpadl ) ) != 0 ) {
DBGC ( vmdev, "VMBUS %s failed to tear down channel GPADL: "
"%s\n", vmdev->dev.name, strerror ( rc ) );
/* We can't prevent the remote VM from continuing to
@@ -1187,6 +1205,8 @@ static int vmbus_probe_channels ( struct hv_hypervisor *hv,
&parent->children );
vmdev->dev.parent = parent;
vmdev->hv = hv;
+ memcpy ( &vmdev->instance, &offer->instance,
+ sizeof ( vmdev->instance ) );
vmdev->channel = channel;
vmdev->monitor = offer->monitor;
vmdev->signal = ( offer->monitored ?
@@ -1201,6 +1221,7 @@ static int vmbus_probe_channels ( struct hv_hypervisor *hv,
} else if ( header->type ==
cpu_to_le32 ( VMBUS_ALL_OFFERS_DELIVERED ) ) {
+ /* End of offer list */
break;
} else {
@@ -1244,6 +1265,77 @@ static int vmbus_probe_channels ( struct hv_hypervisor *hv,
return rc;
}
+
+/**
+ * Reset channels
+ *
+ * @v hv Hyper-V hypervisor
+ * @v parent Parent device
+ * @ret rc Return status code
+ */
+static int vmbus_reset_channels ( struct hv_hypervisor *hv,
+ struct device *parent ) {
+ struct vmbus *vmbus = hv->vmbus;
+ const struct vmbus_message_header *header = &vmbus->message->header;
+ const struct vmbus_offer_channel *offer = &vmbus->message->offer;
+ const union uuid *type;
+ struct vmbus_device *vmdev;
+ unsigned int channel;
+ int rc;
+
+ /* Post message */
+ if ( ( rc = vmbus_post_empty_message ( hv, VMBUS_REQUEST_OFFERS ) ) !=0)
+ return rc;
+
+ /* Collect responses */
+ while ( 1 ) {
+
+ /* Wait for response */
+ if ( ( rc = vmbus_wait_for_any_message ( hv ) ) != 0 )
+ return rc;
+
+ /* Handle response */
+ if ( header->type == cpu_to_le32 ( VMBUS_OFFER_CHANNEL ) ) {
+
+ /* Parse offer */
+ type = &offer->type;
+ channel = le32_to_cpu ( offer->channel );
+ DBGC2 ( vmbus, "VMBUS %p offer %d type %s",
+ vmbus, channel, uuid_ntoa ( type ) );
+ if ( offer->monitored )
+ DBGC2 ( vmbus, " monitor %d", offer->monitor );
+ DBGC2 ( vmbus, "\n" );
+
+ /* Do nothing with the offer; we already have all
+ * of the relevant state from the initial probe.
+ */
+
+ } else if ( header->type ==
+ cpu_to_le32 ( VMBUS_ALL_OFFERS_DELIVERED ) ) {
+
+ /* End of offer list */
+ break;
+
+ } else {
+ DBGC ( vmbus, "VMBUS %p unexpected offer response type "
+ "%d\n", vmbus, le32_to_cpu ( header->type ) );
+ return -EPROTO;
+ }
+ }
+
+ /* Reset all devices */
+ list_for_each_entry ( vmdev, &parent->children, dev.siblings ) {
+ if ( ( rc = vmdev->driver->reset ( vmdev ) ) != 0 ) {
+ DBGC ( vmdev, "VMBUS %s could not reset: %s\n",
+ vmdev->dev.name, strerror ( rc ) );
+ /* Continue attempting to reset other devices */
+ continue;
+ }
+ }
+
+ return 0;
+}
+
/**
* Remove channels
*
@@ -1331,6 +1423,39 @@ int vmbus_probe ( struct hv_hypervisor *hv, struct device *parent ) {
}
/**
+ * Reset Hyper-V virtual machine bus
+ *
+ * @v hv Hyper-V hypervisor
+ * @v parent Parent device
+ * @ret rc Return status code
+ */
+int vmbus_reset ( struct hv_hypervisor *hv, struct device *parent ) {
+ struct vmbus *vmbus = hv->vmbus;
+ int rc;
+
+ /* Mark all existent GPADLs as obsolete */
+ vmbus_obsolete_gpadl = vmbus_gpadl;
+
+ /* Clear interrupt and monitor pages */
+ memset ( vmbus->intr, 0, PAGE_SIZE );
+ memset ( vmbus->monitor_in, 0, PAGE_SIZE );
+ memset ( vmbus->monitor_out, 0, PAGE_SIZE );
+
+ /* Enable message interrupt */
+ hv_enable_sint ( hv, VMBUS_MESSAGE_SINT );
+
+ /* Renegotiate protocol version */
+ if ( ( rc = vmbus_negotiate_version ( hv ) ) != 0 )
+ return rc;
+
+ /* Reenumerate channels */
+ if ( ( rc = vmbus_reset_channels ( hv, parent ) ) != 0 )
+ return rc;
+
+ return 0;
+}
+
+/**
* Remove Hyper-V virtual machine bus
*
* @v hv Hyper-V hypervisor