diff options
author | Richard Henderson | 2021-10-19 16:41:04 +0200 |
---|---|---|
committer | Richard Henderson | 2021-10-19 16:41:04 +0200 |
commit | 50352cce138ef3b30c1cda28a4df68fff5da3202 (patch) | |
tree | cac0f47717ae21a351c720847fedb41425a10263 /migration/rdma.c | |
parent | Merge remote-tracking branch 'remotes/bsdimp/tags/pull-bsd-user-20211018-pull... (diff) | |
parent | migration/rdma: advise prefetch write for ODP region (diff) | |
download | qemu-50352cce138ef3b30c1cda28a4df68fff5da3202.tar.gz qemu-50352cce138ef3b30c1cda28a4df68fff5da3202.tar.xz qemu-50352cce138ef3b30c1cda28a4df68fff5da3202.zip |
Merge remote-tracking branch 'remotes/juanquintela/tags/migration.next-pull-request' into staging
Migration Pull request (3rd try)
Hi
This should fix all the freebsd problems.
Please apply,
# gpg: Signature made Tue 19 Oct 2021 02:28:51 AM PDT
# gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full]
# gpg: aka "Juan Quintela <quintela@trasno.org>" [full]
* remotes/juanquintela/tags/migration.next-pull-request:
migration/rdma: advise prefetch write for ODP region
migration/rdma: Try to register On-Demand Paging memory region
migration: allow enabling mutilfd for specific protocol only
migration: allow multifd for socket protocol only
migration/ram: Don't passs RAMState to migration_clear_memory_region_dirty_bitmap_*()
multifd: Unconditionally unregister yank function
multifd: Implement yank for multifd send side
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'migration/rdma.c')
-rw-r--r-- | migration/rdma.c | 115 |
1 files changed, 95 insertions, 20 deletions
diff --git a/migration/rdma.c b/migration/rdma.c index 5c2d113aa9..2a3c7889b9 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -1117,19 +1117,82 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma) return 0; } +/* Check whether On-Demand Paging is supported by RDAM device */ +static bool rdma_support_odp(struct ibv_context *dev) +{ + struct ibv_device_attr_ex attr = {0}; + int ret = ibv_query_device_ex(dev, NULL, &attr); + if (ret) { + return false; + } + + if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) { + return true; + } + + return false; +} + +/* + * ibv_advise_mr to avoid RNR NAK error as far as possible. + * The responder mr registering with ODP will sent RNR NAK back to + * the requester in the face of the page fault. + */ +static void qemu_rdma_advise_prefetch_mr(struct ibv_pd *pd, uint64_t addr, + uint32_t len, uint32_t lkey, + const char *name, bool wr) +{ +#ifdef HAVE_IBV_ADVISE_MR + int ret; + int advice = wr ? IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE : + IBV_ADVISE_MR_ADVICE_PREFETCH; + struct ibv_sge sg_list = {.lkey = lkey, .addr = addr, .length = len}; + + ret = ibv_advise_mr(pd, advice, + IBV_ADVISE_MR_FLAG_FLUSH, &sg_list, 1); + /* ignore the error */ + if (ret) { + trace_qemu_rdma_advise_mr(name, len, addr, strerror(errno)); + } else { + trace_qemu_rdma_advise_mr(name, len, addr, "successed"); + } +#endif +} + static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma) { int i; RDMALocalBlocks *local = &rdma->local_ram_blocks; for (i = 0; i < local->nb_blocks; i++) { + int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; + local->block[i].mr = ibv_reg_mr(rdma->pd, local->block[i].local_host_addr, - local->block[i].length, - IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_WRITE + local->block[i].length, access ); + + if (!local->block[i].mr && + errno == ENOTSUP && rdma_support_odp(rdma->verbs)) { + access |= IBV_ACCESS_ON_DEMAND; + /* register ODP mr */ + local->block[i].mr = + ibv_reg_mr(rdma->pd, + local->block[i].local_host_addr, + local->block[i].length, access); + trace_qemu_rdma_register_odp_mr(local->block[i].block_name); + + if (local->block[i].mr) { + qemu_rdma_advise_prefetch_mr(rdma->pd, + (uintptr_t)local->block[i].local_host_addr, + local->block[i].length, + local->block[i].mr->lkey, + local->block[i].block_name, + true); + } + } + if (!local->block[i].mr) { perror("Failed to register local dest ram block!"); break; @@ -1215,28 +1278,40 @@ static int qemu_rdma_register_and_get_keys(RDMAContext *rdma, */ if (!block->pmr[chunk]) { uint64_t len = chunk_end - chunk_start; + int access = rkey ? IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE : + 0; trace_qemu_rdma_register_and_get_keys(len, chunk_start); - block->pmr[chunk] = ibv_reg_mr(rdma->pd, - chunk_start, len, - (rkey ? (IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_WRITE) : 0)); - - if (!block->pmr[chunk]) { - perror("Failed to register chunk!"); - fprintf(stderr, "Chunk details: block: %d chunk index %d" - " start %" PRIuPTR " end %" PRIuPTR - " host %" PRIuPTR - " local %" PRIuPTR " registrations: %d\n", - block->index, chunk, (uintptr_t)chunk_start, - (uintptr_t)chunk_end, host_addr, - (uintptr_t)block->local_host_addr, - rdma->total_registrations); - return -1; + block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access); + if (!block->pmr[chunk] && + errno == ENOTSUP && rdma_support_odp(rdma->verbs)) { + access |= IBV_ACCESS_ON_DEMAND; + /* register ODP mr */ + block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access); + trace_qemu_rdma_register_odp_mr(block->block_name); + + if (block->pmr[chunk]) { + qemu_rdma_advise_prefetch_mr(rdma->pd, (uintptr_t)chunk_start, + len, block->pmr[chunk]->lkey, + block->block_name, rkey); + + } } - rdma->total_registrations++; } + if (!block->pmr[chunk]) { + perror("Failed to register chunk!"); + fprintf(stderr, "Chunk details: block: %d chunk index %d" + " start %" PRIuPTR " end %" PRIuPTR + " host %" PRIuPTR + " local %" PRIuPTR " registrations: %d\n", + block->index, chunk, (uintptr_t)chunk_start, + (uintptr_t)chunk_end, host_addr, + (uintptr_t)block->local_host_addr, + rdma->total_registrations); + return -1; + } + rdma->total_registrations++; if (lkey) { *lkey = block->pmr[chunk]->lkey; |