diff options
author | Linus Torvalds | 2019-03-10 00:53:03 +0100 |
---|---|---|
committer | Linus Torvalds | 2019-03-10 00:53:03 +0100 |
commit | a50243b1ddcdd766d0d17fbfeeb1a22e62fdc461 (patch) | |
tree | 3dbf847105558eaac3658a46c4934df503c866a2 /drivers/infiniband/hw/mlx5/mr.c | |
parent | Merge tag 'pci-v5.1-changes' of git://git.kernel.org/pub/scm/linux/kernel/git... (diff) | |
parent | net/mlx5: ODP support for XRC transport is not enabled by default in FW (diff) | |
download | kernel-qcow2-linux-a50243b1ddcdd766d0d17fbfeeb1a22e62fdc461.tar.gz kernel-qcow2-linux-a50243b1ddcdd766d0d17fbfeeb1a22e62fdc461.tar.xz kernel-qcow2-linux-a50243b1ddcdd766d0d17fbfeeb1a22e62fdc461.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"This has been a slightly more active cycle than normal with ongoing
core changes and quite a lot of collected driver updates.
- Various driver fixes for bnxt_re, cxgb4, hns, mlx5, pvrdma, rxe
- A new data transfer mode for HFI1 giving higher performance
- Significant functional and bug fix update to the mlx5
On-Demand-Paging MR feature
- A chip hang reset recovery system for hns
- Change mm->pinned_vm to an atomic64
- Update bnxt_re to support a new 57500 chip
- A sane netlink 'rdma link add' method for creating rxe devices and
fixing the various unregistration race conditions in rxe's
unregister flow
- Allow lookup up objects by an ID over netlink
- Various reworking of the core to driver interface:
- drivers should not assume umem SGLs are in PAGE_SIZE chunks
- ucontext is accessed via udata not other means
- start to make the core code responsible for object memory
allocation
- drivers should convert struct device to struct ib_device via a
helper
- drivers have more tools to avoid use after unregister problems"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (280 commits)
net/mlx5: ODP support for XRC transport is not enabled by default in FW
IB/hfi1: Close race condition on user context disable and close
RDMA/umem: Revert broken 'off by one' fix
RDMA/umem: minor bug fix in error handling path
RDMA/hns: Use GFP_ATOMIC in hns_roce_v2_modify_qp
cxgb4: kfree mhp after the debug print
IB/rdmavt: Fix concurrency panics in QP post_send and modify to error
IB/rdmavt: Fix loopback send with invalidate ordering
IB/iser: Fix dma_nents type definition
IB/mlx5: Set correct write permissions for implicit ODP MR
bnxt_re: Clean cq for kernel consumers only
RDMA/uverbs: Don't do double free of allocated PD
RDMA: Handle ucontext allocations by IB/core
RDMA/core: Fix a WARN() message
bnxt_re: fix the regression due to changes in alloc_pbl
IB/mlx4: Increase the timeout for CM cache
IB/core: Abort page fault handler silently during owning process exit
IB/mlx5: Validate correct PD before prefetch MR
IB/mlx5: Protect against prefetch of invalid MR
RDMA/uverbs: Store PR pointer before it is overwritten
...
Diffstat (limited to 'drivers/infiniband/hw/mlx5/mr.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 126 |
1 files changed, 46 insertions, 80 deletions
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bf2b6ea23851..c85f00255884 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -71,10 +71,9 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - /* Wait until all page fault handlers using the mr complete. */ - synchronize_srcu(&dev->mr_srcu); -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + /* Wait until all page fault handlers using the mr complete. */ + synchronize_srcu(&dev->mr_srcu); return err; } @@ -95,10 +94,9 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING static void update_odp_mr(struct mlx5_ib_mr *mr) { - if (mr->umem->is_odp) { + if (is_odp_mr(mr)) { /* * This barrier prevents the compiler from moving the * setting of umem->odp_data->private to point to our @@ -121,7 +119,6 @@ static void update_odp_mr(struct mlx5_ib_mr *mr) smp_wmb(); } } -#endif static void reg_mr_callback(int status, struct mlx5_async_work *context) { @@ -257,9 +254,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + synchronize_srcu(&dev->mr_srcu); list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { list_del(&mr->list); @@ -611,52 +607,27 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) dev->cache.root = NULL; } -static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) +static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; + struct dentry *dir; int i; if (!mlx5_debugfs_root || dev->rep) - return 0; + return; cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); - if (!cache->root) - return -ENOMEM; for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; sprintf(ent->name, "%d", ent->order); - ent->dir = debugfs_create_dir(ent->name, cache->root); - if (!ent->dir) - goto err; - - ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, - &size_fops); - if (!ent->fsize) - goto err; - - ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, - &limit_fops); - if (!ent->flimit) - goto err; - - ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, - &ent->cur); - if (!ent->fcur) - goto err; - - ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, - &ent->miss); - if (!ent->fmiss) - goto err; + dir = debugfs_create_dir(ent->name, cache->root); + debugfs_create_file("size", 0600, dir, ent, &size_fops); + debugfs_create_file("limit", 0600, dir, ent, &limit_fops); + debugfs_create_u32("cur", 0400, dir, &ent->cur); + debugfs_create_u32("miss", 0600, dir, &ent->miss); } - - return 0; -err: - mlx5_mr_cache_debugfs_cleanup(dev); - - return -ENOMEM; } static void delay_time_func(struct timer_list *t) @@ -670,7 +641,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; - int err; int i; mutex_init(&dev->slow_path_mutex); @@ -715,14 +685,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) queue_work(cache->wq, &ent->work); } - err = mlx5_mr_cache_debugfs_init(dev); - if (err) - mlx5_ib_warn(dev, "cache debugfs failure\n"); - - /* - * We don't want to fail driver if debugfs failed to initialize, - * so we are not forwarding error to the user. - */ + mlx5_mr_cache_debugfs_init(dev); return 0; } @@ -822,18 +785,17 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) return MLX5_MAX_UMR_SHIFT; } -static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, - int access_flags, struct ib_umem **umem, - int *npages, int *page_shift, int *ncont, - int *order) +static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, + u64 start, u64 length, int access_flags, + struct ib_umem **umem, int *npages, int *page_shift, + int *ncont, int *order) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); struct ib_umem *u; int err; *umem = NULL; - u = ib_umem_get(pd->uobject->context, start, length, access_flags, 0); + u = ib_umem_get(udata, start, length, access_flags, 0); err = PTR_ERR_OR_ZERO(u); if (err) { mlx5_ib_dbg(dev, "umem get failed (%d)\n", err); @@ -1306,21 +1268,20 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (!start && length == U64_MAX) { + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && + length == U64_MAX) { if (!(access_flags & IB_ACCESS_ON_DEMAND) || !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) return ERR_PTR(-EINVAL); - mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); + mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); if (IS_ERR(mr)) return ERR_CAST(mr); return &mr->ibmr; } -#endif - err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, - &page_shift, &ncont, &order); + err = mr_umem_get(dev, udata, start, length, access_flags, &umem, + &npages, &page_shift, &ncont, &order); if (err < 0) return ERR_PTR(err); @@ -1361,9 +1322,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->umem = umem; set_mr_fields(dev, mr, npages, length, access_flags); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); -#endif if (!populate_mtts) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; @@ -1380,9 +1339,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - mr->live = 1; -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + mr->live = 1; + atomic_set(&mr->num_pending_prefetch, 0); + } + return &mr->ibmr; error: ib_umem_release(umem); @@ -1470,8 +1431,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, flags |= IB_MR_REREG_TRANS; ib_umem_release(mr->umem); mr->umem = NULL; - err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, - &npages, &page_shift, &ncont, &order); + err = mr_umem_get(dev, udata, addr, len, access_flags, + &mr->umem, &npages, &page_shift, &ncont, + &order); if (err) goto err; } @@ -1497,9 +1459,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } mr->allocated_from_cache = 0; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - mr->live = 1; -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + mr->live = 1; } else { /* * Send a UMR WQE @@ -1528,9 +1489,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, set_mr_fields(dev, mr, npages, len, access_flags); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); -#endif return 0; err: @@ -1616,12 +1575,19 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) int npages = mr->npages; struct ib_umem *umem = mr->umem; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (umem && umem->is_odp) { + if (is_odp_mr(mr)) { struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem); - /* Prevent new page faults from succeeding */ + /* Prevent new page faults and + * prefetch requests from succeeding + */ mr->live = 0; + + /* dequeue pending prefetch requests for the mr */ + if (atomic_read(&mr->num_pending_prefetch)) + flush_workqueue(system_unbound_wq); + WARN_ON(atomic_read(&mr->num_pending_prefetch)); + /* Wait for all running page-fault handlers to finish. */ synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ @@ -1641,7 +1607,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Avoid double-freeing the umem. */ umem = NULL; } -#endif + clean_mr(dev, mr); /* |