From 9ccbfe14ddfce379ee24684b3648376b130293cd Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:00 +0000 Subject: postcopy: Add vhost-user flag for postcopy and check it Add a vhost feature flag for postcopy support, and use the postcopy notifier to check it before allowing postcopy. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/interop/vhost-user.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'docs') diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt index cb3a7595aa..91a572d781 100644 --- a/docs/interop/vhost-user.txt +++ b/docs/interop/vhost-user.txt @@ -290,6 +290,15 @@ Once the source has finished migration, rings will be stopped by the source. No further update must be done before rings are restarted. +In postcopy migration the slave is started before all the memory has been +received from the source host, and care must be taken to avoid accessing pages +that have yet to be received. The slave opens a 'userfault'-fd and registers +the memory with it; this fd is then passed back over to the master. +The master services requests on the userfaultfd for pages that are accessed +and when the page is available it performs WAKE ioctl's on the userfaultfd +to wake the stalled slave. The client indicates support for this via the +VHOST_USER_PROTOCOL_F_PAGEFAULT feature. + Memory access ------------- @@ -369,6 +378,7 @@ Protocol features #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN 6 #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7 +#define VHOST_USER_PROTOCOL_F_PAGEFAULT 8 Master message types -------------------- -- cgit v1.2.3-55-g7522 From d3dff7a5a1e0a6eff963fabc4d06879d060f34ee Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:01 +0000 Subject: vhost-user: Add 'VHOST_USER_POSTCOPY_ADVISE' message Wire up a notifier to send a VHOST_USER_POSTCOPY_ADVISE message on an incoming advise. Later patches will fill in the behaviour/contents of the message. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/libvhost-user/libvhost-user.c | 11 ++++++++ contrib/libvhost-user/libvhost-user.h | 3 +++ docs/interop/vhost-user.txt | 10 ++++++++ hw/virtio/vhost-user.c | 48 +++++++++++++++++++++++++++++++++++ migration/postcopy-ram.h | 1 + migration/savevm.c | 6 +++++ 6 files changed, 79 insertions(+) (limited to 'docs') diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 2e358b5bce..37d4228193 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -86,6 +86,7 @@ vu_request_to_string(unsigned int req) REQ(VHOST_USER_SET_VRING_ENDIAN), REQ(VHOST_USER_GET_CONFIG), REQ(VHOST_USER_SET_CONFIG), + REQ(VHOST_USER_POSTCOPY_ADVISE), REQ(VHOST_USER_MAX), }; #undef REQ @@ -856,6 +857,14 @@ vu_set_config(VuDev *dev, VhostUserMsg *vmsg) return false; } +static bool +vu_set_postcopy_advise(VuDev *dev, VhostUserMsg *vmsg) +{ + /* TODO: Open ufd, pass it back in the request */ + vmsg->size = 0; + return true; /* = send a reply */ +} + static bool vu_process_message(VuDev *dev, VhostUserMsg *vmsg) { @@ -927,6 +936,8 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) return vu_set_config(dev, vmsg); case VHOST_USER_NONE: break; + case VHOST_USER_POSTCOPY_ADVISE: + return vu_set_postcopy_advise(dev, vmsg); default: vmsg_close_fds(vmsg); vu_panic(dev, "Unhandled request: %d", vmsg->request); diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index 96db29c4ce..00d78a8810 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -83,6 +83,9 @@ typedef enum VhostUserRequest { VHOST_USER_SET_VRING_ENDIAN = 23, VHOST_USER_GET_CONFIG = 24, VHOST_USER_SET_CONFIG = 25, + VHOST_USER_CREATE_CRYPTO_SESSION = 26, + VHOST_USER_CLOSE_CRYPTO_SESSION = 27, + VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_MAX } VhostUserRequest; diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt index 91a572d781..7854e50008 100644 --- a/docs/interop/vhost-user.txt +++ b/docs/interop/vhost-user.txt @@ -699,6 +699,16 @@ Master message types feature has been successfully negotiated. It's a required feature for crypto devices. + * VHOST_USER_POSTCOPY_ADVISE + Id: 28 + Master payload: N/A + Slave payload: userfault fd + + When VHOST_USER_PROTOCOL_F_PAGEFAULT is supported, the + master advises slave that a migration with postcopy enabled is underway, + the slave must open a userfaultfd for later use. + Note that at this stage the migration is still in precopy mode. + Slave message types ------------------- diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index aab35c4845..ceb17b0554 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -78,6 +78,7 @@ typedef enum VhostUserRequest { VHOST_USER_SET_CONFIG = 25, VHOST_USER_CREATE_CRYPTO_SESSION = 26, VHOST_USER_CLOSE_CRYPTO_SESSION = 27, + VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_MAX } VhostUserRequest; @@ -795,6 +796,50 @@ out: return ret; } +/* + * Called at the start of an inbound postcopy on reception of the + * 'advise' command. + */ +static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) +{ + struct vhost_user *u = dev->opaque; + CharBackend *chr = u->chr; + int ufd; + VhostUserMsg msg = { + .hdr.request = VHOST_USER_POSTCOPY_ADVISE, + .hdr.flags = VHOST_USER_VERSION, + }; + + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + error_setg(errp, "Failed to send postcopy_advise to vhost"); + return -1; + } + + if (vhost_user_read(dev, &msg) < 0) { + error_setg(errp, "Failed to get postcopy_advise reply from vhost"); + return -1; + } + + if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { + error_setg(errp, "Unexpected msg type. Expected %d received %d", + VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); + return -1; + } + + if (msg.hdr.size) { + error_setg(errp, "Received bad msg size."); + return -1; + } + ufd = qemu_chr_fe_get_msgfd(chr); + if (ufd < 0) { + error_setg(errp, "%s: Failed to get ufd", __func__); + return -1; + } + + /* TODO: register ufd with userfault thread */ + return 0; +} + static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, void *opaque) { @@ -814,6 +859,9 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, } break; + case POSTCOPY_NOTIFY_INBOUND_ADVISE: + return vhost_user_postcopy_advise(dev, pnd->errp); + default: /* We ignore notifications we don't know */ break; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 2e879bbacb..0421c98d57 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -130,6 +130,7 @@ void postcopy_infrastructure_init(void); */ enum PostcopyNotifyReason { POSTCOPY_NOTIFY_PROBE = 0, + POSTCOPY_NOTIFY_INBOUND_ADVISE, }; struct PostcopyNotifyData { diff --git a/migration/savevm.c b/migration/savevm.c index 358c5b51e2..1f2bf12a28 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1386,6 +1386,7 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, { PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE); uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps; + Error *local_err = NULL; trace_loadvm_postcopy_handle_advise(); if (ps != POSTCOPY_INCOMING_NONE) { @@ -1451,6 +1452,11 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, return -1; } + if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) { + error_report_err(local_err); + return -1; + } + if (ram_postcopy_incoming_init(mis)) { return -1; } -- cgit v1.2.3-55-g7522 From 6864a7b5aced6d8d9b287b92db8d7a996ea2e8a3 Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:06 +0000 Subject: vhost+postcopy: Transmit 'listen' to slave Notify the vhost-user slave on reception of the 'postcopy-listen' event from the source. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Marc-André Lureau Reviewed-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/libvhost-user/libvhost-user.c | 19 +++++++++++++++++++ contrib/libvhost-user/libvhost-user.h | 2 ++ docs/interop/vhost-user.txt | 11 +++++++++++ hw/virtio/trace-events | 3 +++ hw/virtio/vhost-user.c | 34 ++++++++++++++++++++++++++++++++++ migration/postcopy-ram.h | 1 + migration/savevm.c | 7 +++++++ 7 files changed, 77 insertions(+) (limited to 'docs') diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 9e31f47b7a..e53b1953df 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -98,6 +98,7 @@ vu_request_to_string(unsigned int req) REQ(VHOST_USER_GET_CONFIG), REQ(VHOST_USER_SET_CONFIG), REQ(VHOST_USER_POSTCOPY_ADVISE), + REQ(VHOST_USER_POSTCOPY_LISTEN), REQ(VHOST_USER_MAX), }; #undef REQ @@ -931,6 +932,22 @@ out: return true; /* = send a reply */ } +static bool +vu_set_postcopy_listen(VuDev *dev, VhostUserMsg *vmsg) +{ + vmsg->payload.u64 = -1; + vmsg->size = sizeof(vmsg->payload.u64); + + if (dev->nregions) { + vu_panic(dev, "Regions already registered at postcopy-listen"); + return true; + } + dev->postcopy_listening = true; + + vmsg->flags = VHOST_USER_VERSION | VHOST_USER_REPLY_MASK; + vmsg->payload.u64 = 0; /* Success */ + return true; +} static bool vu_process_message(VuDev *dev, VhostUserMsg *vmsg) { @@ -1004,6 +1021,8 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) break; case VHOST_USER_POSTCOPY_ADVISE: return vu_set_postcopy_advise(dev, vmsg); + case VHOST_USER_POSTCOPY_LISTEN: + return vu_set_postcopy_listen(dev, vmsg); default: vmsg_close_fds(vmsg); vu_panic(dev, "Unhandled request: %d", vmsg->request); diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index 074b7860f6..ed505cf0c1 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -86,6 +86,7 @@ typedef enum VhostUserRequest { VHOST_USER_CREATE_CRYPTO_SESSION = 26, VHOST_USER_CLOSE_CRYPTO_SESSION = 27, VHOST_USER_POSTCOPY_ADVISE = 28, + VHOST_USER_POSTCOPY_LISTEN = 29, VHOST_USER_MAX } VhostUserRequest; @@ -285,6 +286,7 @@ struct VuDev { /* Postcopy data */ int postcopy_ufd; + bool postcopy_listening; }; typedef struct VuVirtqElement { diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt index 7854e50008..0d24203d31 100644 --- a/docs/interop/vhost-user.txt +++ b/docs/interop/vhost-user.txt @@ -709,6 +709,17 @@ Master message types the slave must open a userfaultfd for later use. Note that at this stage the migration is still in precopy mode. + * VHOST_USER_POSTCOPY_LISTEN + Id: 29 + Master payload: N/A + + Master advises slave that a transition to postcopy mode has happened. + The slave must ensure that shared memory is registered with userfaultfd + to cause faulting of non-present pages. + + This is always sent sometime after a VHOST_USER_POSTCOPY_ADVISE, and + thus only when VHOST_USER_PROTOCOL_F_PAGEFAULT is supported. + Slave message types ------------------- diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 742ff0f90b..06ec03d6e7 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -6,6 +6,9 @@ vhost_region_add_section(const char *name, uint64_t gpa, uint64_t size, uint64_t vhost_region_add_section_abut(const char *name, uint64_t new_size) "%s: 0x%"PRIx64 vhost_section(const char *name, int r) "%s:%d" +# hw/virtio/vhost-user.c +vhost_user_postcopy_listen(void) "" + # hw/virtio/virtio.c virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u" diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 5900583437..c3ab2994fb 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -20,6 +20,7 @@ #include "sysemu/cryptodev.h" #include "migration/migration.h" #include "migration/postcopy-ram.h" +#include "trace.h" #include #include @@ -79,6 +80,7 @@ typedef enum VhostUserRequest { VHOST_USER_CREATE_CRYPTO_SESSION = 26, VHOST_USER_CLOSE_CRYPTO_SESSION = 27, VHOST_USER_POSTCOPY_ADVISE = 28, + VHOST_USER_POSTCOPY_LISTEN = 29, VHOST_USER_MAX } VhostUserRequest; @@ -172,6 +174,8 @@ struct vhost_user { int slave_fd; NotifierWithReturn postcopy_notifier; struct PostCopyFD postcopy_fd; + /* True once we've entered postcopy_listen */ + bool postcopy_listen; }; static bool ioeventfd_enabled(void) @@ -858,6 +862,33 @@ static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) return 0; } +/* + * Called at the switch to postcopy on reception of the 'listen' command. + */ +static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) +{ + struct vhost_user *u = dev->opaque; + int ret; + VhostUserMsg msg = { + .hdr.request = VHOST_USER_POSTCOPY_LISTEN, + .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, + }; + u->postcopy_listen = true; + trace_vhost_user_postcopy_listen(); + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + error_setg(errp, "Failed to send postcopy_listen to vhost"); + return -1; + } + + ret = process_message_reply(dev, &msg); + if (ret) { + error_setg(errp, "Failed to receive reply to postcopy_listen"); + return ret; + } + + return 0; +} + static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, void *opaque) { @@ -880,6 +911,9 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, case POSTCOPY_NOTIFY_INBOUND_ADVISE: return vhost_user_postcopy_advise(dev, pnd->errp); + case POSTCOPY_NOTIFY_INBOUND_LISTEN: + return vhost_user_postcopy_listen(dev, pnd->errp); + default: /* We ignore notifications we don't know */ break; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index f21eef6702..c8ced3470b 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -131,6 +131,7 @@ void postcopy_infrastructure_init(void); enum PostcopyNotifyReason { POSTCOPY_NOTIFY_PROBE = 0, POSTCOPY_NOTIFY_INBOUND_ADVISE, + POSTCOPY_NOTIFY_INBOUND_LISTEN, }; struct PostcopyNotifyData { diff --git a/migration/savevm.c b/migration/savevm.c index 1f2bf12a28..305c3ceaf5 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1618,6 +1618,8 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) { PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING); trace_loadvm_postcopy_handle_listen(); + Error *local_err = NULL; + if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) { error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps); return -1; @@ -1643,6 +1645,11 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) } } + if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) { + error_report_err(local_err); + return -1; + } + if (mis->have_listen_thread) { error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread"); return -1; -- cgit v1.2.3-55-g7522 From 9bb38019942c2f3f44b98f5830e369faec701e55 Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:10 +0000 Subject: vhost+postcopy: Send address back to qemu We need a better way, but at the moment we need the address of the mappings sent back to qemu so it can interpret the messages on the userfaultfd it reads. This is done as a 3 stage set: QEMU -> client set_mem_table mmap stuff, get addresses client -> qemu here are the addresses qemu -> client OK - now you can use them That ensures that qemu has registered the new addresses in it's userfault code before the client starts accessing them. Note: We don't ask for the default 'ack' reply since we've got our own. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/libvhost-user/libvhost-user.c | 24 ++++++++++++- docs/interop/vhost-user.txt | 9 +++++ hw/virtio/trace-events | 1 + hw/virtio/vhost-user.c | 67 +++++++++++++++++++++++++++++++++-- 4 files changed, 98 insertions(+), 3 deletions(-) (limited to 'docs') diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 7c8cd5878e..6314549b65 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -491,10 +491,32 @@ vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) dev_region->mmap_addr); } + /* Return the address to QEMU so that it can translate the ufd + * fault addresses back. + */ + msg_region->userspace_addr = (uintptr_t)(mmap_addr + + dev_region->mmap_offset); close(vmsg->fds[i]); } - /* TODO: Get address back to QEMU */ + /* Send the message back to qemu with the addresses filled in */ + vmsg->fd_num = 0; + if (!vu_message_write(dev, dev->sock, vmsg)) { + vu_panic(dev, "failed to respond to set-mem-table for postcopy"); + return false; + } + + /* Wait for QEMU to confirm that it's registered the handler for the + * faults. + */ + if (!vu_message_read(dev, dev->sock, vmsg) || + vmsg->size != sizeof(vmsg->payload.u64) || + vmsg->payload.u64 != 0) { + vu_panic(dev, "failed to receive valid ack for postcopy set-mem-table"); + return false; + } + + /* OK, now we can go and register the memory and generate faults */ for (i = 0; i < dev->nregions; i++) { VuDevRegion *dev_region = &dev->regions[i]; #ifdef UFFDIO_REGISTER diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt index 0d24203d31..e295ef12ca 100644 --- a/docs/interop/vhost-user.txt +++ b/docs/interop/vhost-user.txt @@ -455,12 +455,21 @@ Master message types Id: 5 Equivalent ioctl: VHOST_SET_MEM_TABLE Master payload: memory regions description + Slave payload: (postcopy only) memory regions description Sets the memory map regions on the slave so it can translate the vring addresses. In the ancillary data there is an array of file descriptors for each memory mapped region. The size and ordering of the fds matches the number and ordering of memory regions. + When VHOST_USER_POSTCOPY_LISTEN has been received, SET_MEM_TABLE replies with + the bases of the memory mapped regions to the master. The slave must + have mmap'd the regions but not yet accessed them and should not yet generate + a userfault event. Note NEED_REPLY_MASK is not set in this case. + QEMU will then reply back to the list of mappings with an empty + VHOST_USER_SET_MEM_TABLE as an acknowledgment; only upon reception of this + message may the guest start accessing the memory and generating faults. + * VHOST_USER_SET_LOG_BASE Id: 6 diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 06ec03d6e7..05d18ada77 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -8,6 +8,7 @@ vhost_section(const char *name, int r) "%s:%d" # hw/virtio/vhost-user.c vhost_user_postcopy_listen(void) "" +vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d region %d" # hw/virtio/virtio.c virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index b6757ebae3..1603d70bea 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -174,6 +174,7 @@ struct vhost_user { int slave_fd; NotifierWithReturn postcopy_notifier; struct PostCopyFD postcopy_fd; + uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; /* True once we've entered postcopy_listen */ bool postcopy_listen; }; @@ -343,12 +344,15 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, struct vhost_memory *mem) { + struct vhost_user *u = dev->opaque; int fds[VHOST_MEMORY_MAX_NREGIONS]; int i, fd; size_t fd_num = 0; bool reply_supported = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK); - /* TODO: Add actual postcopy differences */ + VhostUserMsg msg_reply; + int region_i, msg_i; + VhostUserMsg msg = { .hdr.request = VHOST_USER_SET_MEM_TABLE, .hdr.flags = VHOST_USER_VERSION, @@ -395,6 +399,64 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, return -1; } + if (vhost_user_read(dev, &msg_reply) < 0) { + return -1; + } + + if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { + error_report("%s: Received unexpected msg type." + "Expected %d received %d", __func__, + VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); + return -1; + } + /* We're using the same structure, just reusing one of the + * fields, so it should be the same size. + */ + if (msg_reply.hdr.size != msg.hdr.size) { + error_report("%s: Unexpected size for postcopy reply " + "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size); + return -1; + } + + memset(u->postcopy_client_bases, 0, + sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS); + + /* They're in the same order as the regions that were sent + * but some of the regions were skipped (above) if they + * didn't have fd's + */ + for (msg_i = 0, region_i = 0; + region_i < dev->mem->nregions; + region_i++) { + if (msg_i < fd_num && + msg_reply.payload.memory.regions[msg_i].guest_phys_addr == + dev->mem->regions[region_i].guest_phys_addr) { + u->postcopy_client_bases[region_i] = + msg_reply.payload.memory.regions[msg_i].userspace_addr; + trace_vhost_user_set_mem_table_postcopy( + msg_reply.payload.memory.regions[msg_i].userspace_addr, + msg.payload.memory.regions[msg_i].userspace_addr, + msg_i, region_i); + msg_i++; + } + } + if (msg_i != fd_num) { + error_report("%s: postcopy reply not fully consumed " + "%d vs %zd", + __func__, msg_i, fd_num); + return -1; + } + /* Now we've registered this with the postcopy code, we ack to the client, + * because now we're in the position to be able to deal with any faults + * it generates. + */ + /* TODO: Use this for failure cases as well with a bad value */ + msg.hdr.size = sizeof(msg.payload.u64); + msg.payload.u64 = 0; /* OK */ + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } + if (reply_supported) { return process_message_reply(dev, &msg); } @@ -411,7 +473,8 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, size_t fd_num = 0; bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; bool reply_supported = virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_REPLY_ACK); + VHOST_USER_PROTOCOL_F_REPLY_ACK) && + !do_postcopy; if (do_postcopy) { /* Postcopy has enough differences that it's best done in it's own -- cgit v1.2.3-55-g7522 From c639187e3342cb14e100d14ce4854444f7ae98d5 Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:19 +0000 Subject: vhost-user: Add VHOST_USER_POSTCOPY_END message This message is sent just before the end of postcopy to get the client to stop using userfault since we wont respond to any more requests. It should close userfaultfd so that any other pages get mapped to the backing file automatically by the kernel, since at this point we know we've received everything. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/libvhost-user/libvhost-user.c | 23 +++++++++++++++++++++++ contrib/libvhost-user/libvhost-user.h | 1 + docs/interop/vhost-user.txt | 12 ++++++++++++ hw/virtio/vhost-user.c | 1 + 4 files changed, 37 insertions(+) (limited to 'docs') diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 5feed52098..504ff5ea59 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -99,6 +99,7 @@ vu_request_to_string(unsigned int req) REQ(VHOST_USER_SET_CONFIG), REQ(VHOST_USER_POSTCOPY_ADVISE), REQ(VHOST_USER_POSTCOPY_LISTEN), + REQ(VHOST_USER_POSTCOPY_END), REQ(VHOST_USER_MAX), }; #undef REQ @@ -1094,6 +1095,26 @@ vu_set_postcopy_listen(VuDev *dev, VhostUserMsg *vmsg) vmsg->payload.u64 = 0; /* Success */ return true; } + +static bool +vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg) +{ + DPRINT("%s: Entry\n", __func__); + dev->postcopy_listening = false; + if (dev->postcopy_ufd > 0) { + close(dev->postcopy_ufd); + dev->postcopy_ufd = -1; + DPRINT("%s: Done close\n", __func__); + } + + vmsg->fd_num = 0; + vmsg->payload.u64 = 0; + vmsg->size = sizeof(vmsg->payload.u64); + vmsg->flags = VHOST_USER_VERSION | VHOST_USER_REPLY_MASK; + DPRINT("%s: exit\n", __func__); + return true; +} + static bool vu_process_message(VuDev *dev, VhostUserMsg *vmsg) { @@ -1169,6 +1190,8 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) return vu_set_postcopy_advise(dev, vmsg); case VHOST_USER_POSTCOPY_LISTEN: return vu_set_postcopy_listen(dev, vmsg); + case VHOST_USER_POSTCOPY_END: + return vu_set_postcopy_end(dev, vmsg); default: vmsg_close_fds(vmsg); vu_panic(dev, "Unhandled request: %d", vmsg->request); diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index ed505cf0c1..79f7a53ee8 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -87,6 +87,7 @@ typedef enum VhostUserRequest { VHOST_USER_CLOSE_CRYPTO_SESSION = 27, VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_POSTCOPY_LISTEN = 29, + VHOST_USER_POSTCOPY_END = 30, VHOST_USER_MAX } VhostUserRequest; diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt index e295ef12ca..c058c407df 100644 --- a/docs/interop/vhost-user.txt +++ b/docs/interop/vhost-user.txt @@ -729,6 +729,18 @@ Master message types This is always sent sometime after a VHOST_USER_POSTCOPY_ADVISE, and thus only when VHOST_USER_PROTOCOL_F_PAGEFAULT is supported. + * VHOST_USER_POSTCOPY_END + Id: 30 + Slave payload: u64 + + Master advises that postcopy migration has now completed. The + slave must disable the userfaultfd. The response is an acknowledgement + only. + When VHOST_USER_PROTOCOL_F_PAGEFAULT is supported, this message + is sent at the end of the migration, after VHOST_USER_POSTCOPY_LISTEN + was previously sent. + The value returned is an error indication; 0 is success. + Slave message types ------------------- diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index a785aefd3e..230f2f9d55 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -82,6 +82,7 @@ typedef enum VhostUserRequest { VHOST_USER_CLOSE_CRYPTO_SESSION = 27, VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_POSTCOPY_LISTEN = 29, + VHOST_USER_POSTCOPY_END = 30, VHOST_USER_MAX } VhostUserRequest; -- cgit v1.2.3-55-g7522 From 1dc61e7b37d339c42ec9bd7a7eec1ef2c22f351c Mon Sep 17 00:00:00 2001 From: Dr. David Alan Gilbert Date: Mon, 12 Mar 2018 17:21:24 +0000 Subject: postcopy shared docs Add some notes to the migration documentation for shared memory postcopy. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/devel/migration.rst | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'docs') diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst index 9d1b7657f0..e32b087f6e 100644 --- a/docs/devel/migration.rst +++ b/docs/devel/migration.rst @@ -577,3 +577,44 @@ Postcopy now works with hugetlbfs backed memory: hugepages works well, however 1GB hugepages are likely to be problematic since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link, and until the full page is transferred the destination thread is blocked. + +Postcopy with shared memory +--------------------------- + +Postcopy migration with shared memory needs explicit support from the other +processes that share memory and from QEMU. There are restrictions on the type of +memory that userfault can support shared. + +The Linux kernel userfault support works on `/dev/shm` memory and on `hugetlbfs` +(although the kernel doesn't provide an equivalent to `madvise(MADV_DONTNEED)` +for hugetlbfs which may be a problem in some configurations). + +The vhost-user code in QEMU supports clients that have Postcopy support, +and the `vhost-user-bridge` (in `tests/`) and the DPDK package have changes +to support postcopy. + +The client needs to open a userfaultfd and register the areas +of memory that it maps with userfault. The client must then pass the +userfaultfd back to QEMU together with a mapping table that allows +fault addresses in the clients address space to be converted back to +RAMBlock/offsets. The client's userfaultfd is added to the postcopy +fault-thread and page requests are made on behalf of the client by QEMU. +QEMU performs 'wake' operations on the client's userfaultfd to allow it +to continue after a page has arrived. + +.. note:: + There are two future improvements that would be nice: + a) Some way to make QEMU ignorant of the addresses in the clients + address space + b) Avoiding the need for QEMU to perform ufd-wake calls after the + pages have arrived + +Retro-fitting postcopy to existing clients is possible: + a) A mechanism is needed for the registration with userfault as above, + and the registration needs to be coordinated with the phases of + postcopy. In vhost-user extra messages are added to the existing + control channel. + b) Any thread that can block due to guest memory accesses must be + identified and the implication understood; for example if the + guest memory access is made while holding a lock then all other + threads waiting for that lock will also be blocked. -- cgit v1.2.3-55-g7522