From ea4baf7f116a18382df331db2123d98bc1c3cd83 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 18 Dec 2018 14:28:30 +0200 Subject: RDMA: Rename port_callback to init_port Most provider routines are callback routines which ib core invokes. _callback suffix doesn't convey information about when such callback is invoked. Therefore, rename port_callback to init_port. Additionally, store the init_port function pointer in ib_device_ops, so that it can be accessed in subsequent patches when binding rdma device to net namespace. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 8872453e26c0..66867e92ddea 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -574,9 +574,7 @@ port_cleanup: * callback for each device that is added. @device must be allocated * with ib_alloc_device(). */ -int ib_register_device(struct ib_device *device, const char *name, - int (*port_callback)(struct ib_device *, u8, - struct kobject *)) +int ib_register_device(struct ib_device *device, const char *name) { int ret; struct ib_client *client; @@ -613,7 +611,7 @@ int ib_register_device(struct ib_device *device, const char *name, goto dev_cleanup; } - ret = ib_device_register_sysfs(device, port_callback); + ret = ib_device_register_sysfs(device); if (ret) { dev_warn(&device->dev, "Couldn't register device with driver model\n"); @@ -1283,6 +1281,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, get_vector_affinity); SET_DEVICE_OP(dev_ops, get_vf_config); SET_DEVICE_OP(dev_ops, get_vf_stats); + SET_DEVICE_OP(dev_ops, init_port); SET_DEVICE_OP(dev_ops, map_mr_sg); SET_DEVICE_OP(dev_ops, map_phys_fmr); SET_DEVICE_OP(dev_ops, mmap); -- cgit v1.2.3-55-g7522 From 54747231150f0dddf68f2ee29ec2970fcc433909 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 18 Dec 2018 14:15:56 +0200 Subject: RDMA: Introduce and use rdma_device_to_ibdev() Introduce and use rdma_device_to_ibdev() API for those drivers which are registering one sysfs group and also use in ib_core. In subsequent patch, device->provider_ibdev one-to-one mapping is no longer holds true during accessing sysfs entries. Therefore, introduce an API rdma_device_to_ibdev() that provides such information. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 2 -- drivers/infiniband/core/sysfs.c | 12 ++++++------ drivers/infiniband/hw/bnxt_re/main.c | 6 ++++-- drivers/infiniband/hw/cxgb3/iwch_provider.c | 14 ++++++++------ drivers/infiniband/hw/cxgb4/provider.c | 14 ++++++++------ drivers/infiniband/hw/hfi1/sysfs.c | 16 ++++++++-------- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 5 ++--- drivers/infiniband/hw/mlx4/main.c | 7 ++++--- drivers/infiniband/hw/mlx5/main.c | 13 ++++++++----- drivers/infiniband/hw/mthca/mthca_provider.c | 9 ++++++--- drivers/infiniband/hw/nes/nes_verbs.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 6 ++++-- drivers/infiniband/hw/qedr/main.c | 3 ++- drivers/infiniband/hw/qib/qib_sysfs.c | 18 +++++++++--------- drivers/infiniband/hw/usnic/usnic_ib_sysfs.c | 26 +++++++++++--------------- drivers/infiniband/sw/rxe/rxe_verbs.c | 4 ++-- include/rdma/ib_verbs.h | 23 +++++++++++++++++++++++ 17 files changed, 106 insertions(+), 74 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 66867e92ddea..f8180cf1a004 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -296,8 +296,6 @@ struct ib_device *ib_alloc_device(size_t size) device->dev.class = &ib_class; device_initialize(&device->dev); - dev_set_drvdata(&device->dev, device); - INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->event_handler_lock); rwlock_init(&device->client_data_lock); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 7a5679933df6..c75692802da8 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1187,7 +1187,7 @@ err_put: static ssize_t node_type_show(struct device *device, struct device_attribute *attr, char *buf) { - struct ib_device *dev = container_of(device, struct ib_device, dev); + struct ib_device *dev = rdma_device_to_ibdev(device); switch (dev->node_type) { case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); @@ -1204,7 +1204,7 @@ static DEVICE_ATTR_RO(node_type); static ssize_t sys_image_guid_show(struct device *device, struct device_attribute *dev_attr, char *buf) { - struct ib_device *dev = container_of(device, struct ib_device, dev); + struct ib_device *dev = rdma_device_to_ibdev(device); return sprintf(buf, "%04x:%04x:%04x:%04x\n", be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]), @@ -1217,7 +1217,7 @@ static DEVICE_ATTR_RO(sys_image_guid); static ssize_t node_guid_show(struct device *device, struct device_attribute *attr, char *buf) { - struct ib_device *dev = container_of(device, struct ib_device, dev); + struct ib_device *dev = rdma_device_to_ibdev(device); return sprintf(buf, "%04x:%04x:%04x:%04x\n", be16_to_cpu(((__be16 *) &dev->node_guid)[0]), @@ -1230,7 +1230,7 @@ static DEVICE_ATTR_RO(node_guid); static ssize_t node_desc_show(struct device *device, struct device_attribute *attr, char *buf) { - struct ib_device *dev = container_of(device, struct ib_device, dev); + struct ib_device *dev = rdma_device_to_ibdev(device); return sprintf(buf, "%.64s\n", dev->node_desc); } @@ -1239,7 +1239,7 @@ static ssize_t node_desc_store(struct device *device, struct device_attribute *attr, const char *buf, size_t count) { - struct ib_device *dev = container_of(device, struct ib_device, dev); + struct ib_device *dev = rdma_device_to_ibdev(device); struct ib_device_modify desc = {}; int ret; @@ -1258,7 +1258,7 @@ static DEVICE_ATTR_RW(node_desc); static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr, char *buf) { - struct ib_device *dev = container_of(device, struct ib_device, dev); + struct ib_device *dev = rdma_device_to_ibdev(device); ib_get_device_fw_str(dev, buf); strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 797a3e943366..16eecfa5882c 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -538,7 +538,8 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev) static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { - struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev); + struct bnxt_re_dev *rdev = + rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor); } @@ -547,7 +548,8 @@ static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { - struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev); + struct bnxt_re_dev *rdev = + rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc); } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index ffdde3cca268..07c20cd07f33 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1130,8 +1130,9 @@ static int iwch_query_port(struct ib_device *ibdev, static ssize_t hw_rev_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, - ibdev.dev); + struct iwch_dev *iwch_dev = + rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); + pr_debug("%s dev 0x%p\n", __func__, dev); return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); } @@ -1140,8 +1141,8 @@ static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, - ibdev.dev); + struct iwch_dev *iwch_dev = + rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); struct ethtool_drvinfo info; struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; @@ -1154,8 +1155,9 @@ static DEVICE_ATTR_RO(hca_type); static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, - ibdev.dev); + struct iwch_dev *iwch_dev = + rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); + pr_debug("%s dev 0x%p\n", __func__, dev); return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor, iwch_dev->rdev.rnic_info.pdev->device); diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 0a99894b0160..f977f8e7e162 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -376,8 +376,9 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, static ssize_t hw_rev_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, - ibdev.dev); + struct c4iw_dev *c4iw_dev = + rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); + pr_debug("dev 0x%p\n", dev); return sprintf(buf, "%d\n", CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); @@ -387,8 +388,8 @@ static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, - ibdev.dev); + struct c4iw_dev *c4iw_dev = + rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); struct ethtool_drvinfo info; struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0]; @@ -401,8 +402,9 @@ static DEVICE_ATTR_RO(hca_type); static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, - ibdev.dev); + struct c4iw_dev *c4iw_dev = + rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); + pr_debug("dev 0x%p\n", dev); return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, c4iw_dev->rdev.lldi.pdev->device); diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 2be513d4c9da..90f62c4bddba 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -498,7 +498,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); } @@ -508,7 +508,7 @@ static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); int ret; @@ -524,7 +524,7 @@ static ssize_t boardversion_show(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ @@ -536,7 +536,7 @@ static ssize_t nctxts_show(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); /* @@ -555,7 +555,7 @@ static ssize_t nfreectxts_show(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ @@ -567,7 +567,7 @@ static ssize_t serial_show(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); return scnprintf(buf, PAGE_SIZE, "%s", dd->serial); @@ -579,7 +579,7 @@ static ssize_t chip_reset_store(struct device *device, size_t count) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); int ret; @@ -609,7 +609,7 @@ static ssize_t tempsense_show(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); struct hfi1_temp temp; int ret; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index af66ab9d150b..12b31a8440be 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2139,9 +2139,8 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr) static ssize_t hw_rev_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct i40iw_ib_device *iwibdev = container_of(dev, - struct i40iw_ib_device, - ibdev.dev); + struct i40iw_ib_device *iwibdev = + rdma_device_to_drv_device(dev, struct i40iw_ib_device, ibdev); u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev; return sprintf(buf, "%x\n", hw_rev); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c3f950d82ed0..dc2ffd293a11 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2043,7 +2043,7 @@ static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = - container_of(device, struct mlx4_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -2052,7 +2052,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = - container_of(device, struct mlx4_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); return sprintf(buf, "%x\n", dev->dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -2061,7 +2061,8 @@ static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = - container_of(device, struct mlx4_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); + return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id); } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 948617a60d44..4b1b56d54301 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4104,7 +4104,7 @@ static ssize_t fw_pages_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); } @@ -4114,7 +4114,7 @@ static ssize_t reg_pages_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); } @@ -4124,7 +4124,8 @@ static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); + return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -4133,7 +4134,8 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); + return sprintf(buf, "%x\n", dev->mdev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -4142,7 +4144,8 @@ static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); + return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, dev->mdev->board_id); } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 3473c6c51b92..63003b4d2485 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1081,7 +1081,8 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { struct mthca_dev *dev = - container_of(device, struct mthca_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); + return sprintf(buf, "%x\n", dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -1090,7 +1091,8 @@ static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct mthca_dev *dev = - container_of(device, struct mthca_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); + switch (dev->pdev->device) { case PCI_DEVICE_ID_MELLANOX_TAVOR: return sprintf(buf, "MT23108\n"); @@ -1111,7 +1113,8 @@ static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { struct mthca_dev *dev = - container_of(device, struct mthca_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); + return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); } static DEVICE_ATTR_RO(board_id); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 34601f0cbd74..034156f7e9ed 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -2560,7 +2560,7 @@ static ssize_t hw_rev_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nes_ib_device *nesibdev = - container_of(dev, struct nes_ib_device, ibdev.dev); + rdma_device_to_drv_device(dev, struct nes_ib_device, ibdev); struct nes_vnic *nesvnic = nesibdev->nesvnic; nes_debug(NES_DBG_INIT, "\n"); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index f45b996f617f..b0491b9ecfe4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -118,7 +118,8 @@ static void get_dev_fw_str(struct ib_device *device, char *str) static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { - struct ocrdma_dev *dev = dev_get_drvdata(device); + struct ocrdma_dev *dev = + rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); } @@ -127,7 +128,8 @@ static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { - struct ocrdma_dev *dev = dev_get_drvdata(device); + struct ocrdma_dev *dev = + rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]); } diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 8e5c76d06855..f85e72b65a10 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -137,7 +137,8 @@ static int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num, static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { - struct qedr_dev *dev = dev_get_drvdata(device); + struct qedr_dev *dev = + rdma_device_to_drv_device(device, struct qedr_dev, ibdev); return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor); } diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 1cf4ca3f23e3..905206a0c2d5 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -555,7 +555,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); } @@ -565,7 +565,7 @@ static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); int ret; @@ -590,7 +590,7 @@ static ssize_t boardversion_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ @@ -602,7 +602,7 @@ static ssize_t localbus_info_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ @@ -614,7 +614,7 @@ static ssize_t nctxts_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of user ports (contexts) available. */ @@ -630,7 +630,7 @@ static ssize_t nfreectxts_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ @@ -642,7 +642,7 @@ static ssize_t serial_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); buf[sizeof(dd->serial)] = '\0'; @@ -657,7 +657,7 @@ static ssize_t chip_reset_store(struct device *device, size_t count) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); int ret; @@ -679,7 +679,7 @@ static ssize_t tempsense_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); int ret; int idx; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index a7e4b2ccfaf8..c85d48ae7442 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -50,7 +50,7 @@ static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { struct usnic_ib_dev *us_ibdev = - container_of(device, struct usnic_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); unsigned short subsystem_device_id; mutex_lock(&us_ibdev->usdev_lock); @@ -67,14 +67,13 @@ static DEVICE_ATTR_RO(board_id); static ssize_t config_show(struct device *device, struct device_attribute *attr, char *buf) { - struct usnic_ib_dev *us_ibdev; + struct usnic_ib_dev *us_ibdev = + rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); char *ptr; unsigned left; unsigned n; enum usnic_vnic_res_type res_type; - us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); - /* Buffer space limit is 1 page */ ptr = buf; left = PAGE_SIZE; @@ -130,9 +129,8 @@ static DEVICE_ATTR_RO(config); static ssize_t iface_show(struct device *device, struct device_attribute *attr, char *buf) { - struct usnic_ib_dev *us_ibdev; - - us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + struct usnic_ib_dev *us_ibdev = + rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); return scnprintf(buf, PAGE_SIZE, "%s\n", netdev_name(us_ibdev->netdev)); @@ -142,9 +140,8 @@ static DEVICE_ATTR_RO(iface); static ssize_t max_vf_show(struct device *device, struct device_attribute *attr, char *buf) { - struct usnic_ib_dev *us_ibdev; - - us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + struct usnic_ib_dev *us_ibdev = + rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); return scnprintf(buf, PAGE_SIZE, "%u\n", kref_read(&us_ibdev->vf_cnt)); @@ -154,10 +151,10 @@ static DEVICE_ATTR_RO(max_vf); static ssize_t qp_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) { - struct usnic_ib_dev *us_ibdev; + struct usnic_ib_dev *us_ibdev = + rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); int qp_per_vf; - us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); @@ -169,9 +166,8 @@ static DEVICE_ATTR_RO(qp_per_vf); static ssize_t cq_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) { - struct usnic_ib_dev *us_ibdev; - - us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + struct usnic_ib_dev *us_ibdev = + rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); return scnprintf(buf, PAGE_SIZE, "%d\n", us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 43171148e9c5..3d01247a28db 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1129,8 +1129,8 @@ static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) static ssize_t parent_show(struct device *device, struct device_attribute *attr, char *buf) { - struct rxe_dev *rxe = container_of(device, struct rxe_dev, - ib_dev.dev); + struct rxe_dev *rxe = + rdma_device_to_drv_device(device, struct rxe_dev, ib_dev); return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1)); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1d1902fd9f87..94b6e1dd4dab 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4241,4 +4241,27 @@ rdma_set_device_sysfs_group(struct ib_device *dev, dev->groups[1] = group; } +/** + * rdma_device_to_ibdev - Get ib_device pointer from device pointer + * + * @device: device pointer for which ib_device pointer to retrieve + * + * rdma_device_to_ibdev() retrieves ib_device pointer from device. + * + */ +static inline struct ib_device *rdma_device_to_ibdev(struct device *device) +{ + return container_of(device, struct ib_device, dev); +} + +/** + * rdma_device_to_drv_device - Helper macro to reach back to driver's + * ib_device holder structure from device pointer. + * + * NOTE: New drivers should not make use of this API; This API is only for + * existing drivers who have exposed sysfs entries using + * rdma_set_device_sysfs_group(). + */ +#define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \ + container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member) #endif /* IB_VERBS_H */ -- cgit v1.2.3-55-g7522 From 344684e6d02ff21ff2fce8cf1b2de3fd3cafcac7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 12 Jan 2019 02:42:42 +0000 Subject: RDMA/device: Use __ib_device_get_by_name() in ib_device_rename() No reason to open code this loop. Signed-off-by: Jason Gunthorpe Reviewed-by: Steve Wise --- drivers/infiniband/core/device.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f8180cf1a004..4a9aa6d10c5e 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -183,18 +183,15 @@ static struct ib_device *__ib_device_get_by_name(const char *name) int ib_device_rename(struct ib_device *ibdev, const char *name) { - struct ib_device *device; int ret = 0; if (!strcmp(name, dev_name(&ibdev->dev))) return ret; mutex_lock(&device_mutex); - list_for_each_entry(device, &device_list, core_list) { - if (!strcmp(name, dev_name(&device->dev))) { - ret = -EEXIST; - goto out; - } + if (__ib_device_get_by_name(name)) { + ret = -EEXIST; + goto out; } ret = device_rename(&ibdev->dev, name); -- cgit v1.2.3-55-g7522 From 7527a7b157d1191b23562ed70154ae93bd65f845 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 17 Jan 2019 20:14:15 +0200 Subject: IB/core: Simplify rdma cgroup registration RDMA cgroup registration routine always returns success, so simplify function to be void and run clang formatter over whole CONFIG_CGROUP_RDMA art of core_priv.h. This reduces unwinding error path for regular registration and future net namespace change functionality for rdma device. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Acked-by: Tejun Heo Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cgroup.c | 5 ++--- drivers/infiniband/core/core_priv.h | 17 +++++++++++------ drivers/infiniband/core/device.c | 8 +------- include/linux/cgroup_rdma.h | 2 +- kernel/cgroup/rdma.c | 5 +---- 5 files changed, 16 insertions(+), 21 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c index 126ac5f99db7..388fd04e5f63 100644 --- a/drivers/infiniband/core/cgroup.c +++ b/drivers/infiniband/core/cgroup.c @@ -21,12 +21,11 @@ * Register with the rdma cgroup. Should be called before * exposing rdma device to user space applications to avoid * resource accounting leak. - * Returns 0 on success or otherwise failure code. */ -int ib_device_register_rdmacg(struct ib_device *device) +void ib_device_register_rdmacg(struct ib_device *device) { device->cg_device.name = device->name; - return rdmacg_register_device(&device->cg_device); + rdmacg_register_device(&device->cg_device); } /** diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index aca75c74e451..42a49982f66e 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -115,7 +115,7 @@ void ib_cache_cleanup_one(struct ib_device *device); void ib_cache_release_one(struct ib_device *device); #ifdef CONFIG_CGROUP_RDMA -int ib_device_register_rdmacg(struct ib_device *device); +void ib_device_register_rdmacg(struct ib_device *device); void ib_device_unregister_rdmacg(struct ib_device *device); int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj, @@ -126,21 +126,26 @@ void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj, struct ib_device *device, enum rdmacg_resource_type resource_index); #else -static inline int ib_device_register_rdmacg(struct ib_device *device) -{ return 0; } +static inline void ib_device_register_rdmacg(struct ib_device *device) +{ +} static inline void ib_device_unregister_rdmacg(struct ib_device *device) -{ } +{ +} static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj, struct ib_device *device, enum rdmacg_resource_type resource_index) -{ return 0; } +{ + return 0; +} static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj, struct ib_device *device, enum rdmacg_resource_type resource_index) -{ } +{ +} #endif static inline bool rdma_is_upper_dev_rcu(struct net_device *dev, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 4a9aa6d10c5e..200431c540f2 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -599,12 +599,7 @@ int ib_register_device(struct ib_device *device, const char *name) device->index = __dev_new_index(); - ret = ib_device_register_rdmacg(device); - if (ret) { - dev_warn(&device->dev, - "Couldn't register device with rdma cgroup\n"); - goto dev_cleanup; - } + ib_device_register_rdmacg(device); ret = ib_device_register_sysfs(device); if (ret) { @@ -627,7 +622,6 @@ int ib_register_device(struct ib_device *device, const char *name) cg_cleanup: ib_device_unregister_rdmacg(device); -dev_cleanup: cleanup_device(device); out: mutex_unlock(&device_mutex); diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h index e94290b29e99..ef1bae2983f3 100644 --- a/include/linux/cgroup_rdma.h +++ b/include/linux/cgroup_rdma.h @@ -39,7 +39,7 @@ struct rdmacg_device { * APIs for RDMA/IB stack to publish when a device wants to * participate in resource accounting */ -int rdmacg_register_device(struct rdmacg_device *device); +void rdmacg_register_device(struct rdmacg_device *device); void rdmacg_unregister_device(struct rdmacg_device *device); /* APIs for RDMA/IB stack to charge/uncharge pool specific resources */ diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c index d3bbb757ee49..1d75ae7f1cb7 100644 --- a/kernel/cgroup/rdma.c +++ b/kernel/cgroup/rdma.c @@ -313,10 +313,8 @@ EXPORT_SYMBOL(rdmacg_try_charge); * If IB stack wish a device to participate in rdma cgroup resource * tracking, it must invoke this API to register with rdma cgroup before * any user space application can start using the RDMA resources. - * Returns 0 on success or EINVAL when table length given is beyond - * supported size. */ -int rdmacg_register_device(struct rdmacg_device *device) +void rdmacg_register_device(struct rdmacg_device *device) { INIT_LIST_HEAD(&device->dev_node); INIT_LIST_HEAD(&device->rpools); @@ -324,7 +322,6 @@ int rdmacg_register_device(struct rdmacg_device *device) mutex_lock(&rdmacg_mutex); list_add_tail(&device->dev_node, &rdmacg_devices); mutex_unlock(&rdmacg_mutex); - return 0; } EXPORT_SYMBOL(rdmacg_register_device); -- cgit v1.2.3-55-g7522 From 459cc69fa4c17caf21de596693d8a07170820a58 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 30 Jan 2019 12:49:11 +0200 Subject: RDMA: Provide safe ib_alloc_device() function All callers to ib_alloc_device() provide a larger size than struct ib_device and rely on the fact that struct ib_device is embedded in their driver specific structure as the first member. Provide a safer variant of ib_alloc_device() that checks and enforces this approach to make sure the drivers are using it right. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 6 +++--- drivers/infiniband/hw/bnxt_re/main.c | 2 +- drivers/infiniband/hw/cxgb3/iwch.c | 2 +- drivers/infiniband/hw/cxgb4/device.c | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 +- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/infiniband/hw/mlx5/ib_rep.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/infiniband/hw/mthca/mthca_main.c | 2 +- drivers/infiniband/hw/nes/nes_verbs.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 2 +- drivers/infiniband/hw/qedr/main.c | 2 +- drivers/infiniband/hw/usnic/usnic_ib_main.c | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 2 +- drivers/infiniband/sw/rdmavt/vt.c | 2 +- drivers/infiniband/sw/rxe/rxe_net.c | 2 +- include/rdma/ib_verbs.h | 8 +++++++- 19 files changed, 27 insertions(+), 21 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 200431c540f2..b511cfa00bdb 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -268,7 +268,7 @@ static struct class ib_class = { }; /** - * ib_alloc_device - allocate an IB device struct + * _ib_alloc_device - allocate an IB device struct * @size:size of structure to allocate * * Low-level drivers should use ib_alloc_device() to allocate &struct @@ -277,7 +277,7 @@ static struct class ib_class = { * ib_dealloc_device() must be used to free structures allocated with * ib_alloc_device(). */ -struct ib_device *ib_alloc_device(size_t size) +struct ib_device *_ib_alloc_device(size_t size) { struct ib_device *device; @@ -303,7 +303,7 @@ struct ib_device *ib_alloc_device(size_t size) return device; } -EXPORT_SYMBOL(ib_alloc_device); +EXPORT_SYMBOL(_ib_alloc_device); /** * ib_dealloc_device - free an IB device struct diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 16eecfa5882c..08777506d0b1 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -688,7 +688,7 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev, struct bnxt_re_dev *rdev; /* Allocate bnxt_re_dev instance here */ - rdev = (struct bnxt_re_dev *)ib_alloc_device(sizeof(*rdev)); + rdev = ib_alloc_device(bnxt_re_dev, ibdev); if (!rdev) { dev_err(NULL, "%s: bnxt_re_dev allocation failure!", ROCE_DRV_MODULE_NAME); diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 591de319c178..fb03bc492ef7 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -146,7 +146,7 @@ static void open_rnic_dev(struct t3cdev *tdev) pr_debug("%s t3cdev %p\n", __func__, tdev); pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION); - rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp)); + rnicp = ib_alloc_device(iwch_dev, ibdev); if (!rnicp) { pr_err("Cannot allocate ib device\n"); return; diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 9c10fff6dcfb..4b4e2464b705 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -966,7 +966,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) pr_info("%s: On-Chip Queues not supported on this device\n", pci_name(infop->pdev)); - devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp)); + devp = ib_alloc_device(c4iw_dev, ibdev); if (!devp) { pr_err("Cannot allocate ib device\n"); return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index b74c742b000c..fa08c22aad66 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -5002,7 +5002,7 @@ static int hns_roce_probe(struct platform_device *pdev) struct hns_roce_dev *hr_dev; struct device *dev = &pdev->dev; - hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev)); + hr_dev = ib_alloc_device(hns_roce_dev, ib_dev); if (!hr_dev) return -ENOMEM; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 5c483b437bdd..48a5d6548cd4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6059,7 +6059,7 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) struct hns_roce_dev *hr_dev; int ret; - hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev)); + hr_dev = ib_alloc_device(hns_roce_dev, ib_dev); if (!hr_dev) return -ENOMEM; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 12b31a8440be..d4ab46dd9e6c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2762,7 +2762,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev struct net_device *netdev = iwdev->netdev; struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context; - iwibdev = (struct i40iw_ib_device *)ib_alloc_device(sizeof(*iwibdev)); + iwibdev = ib_alloc_device(i40iw_ib_device, ibdev); if (!iwibdev) { i40iw_pr_err("iwdev == NULL\n"); return NULL; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index dc2ffd293a11..d66002a31000 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2635,7 +2635,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (num_ports == 0) return NULL; - ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); + ibdev = ib_alloc_device(mlx4_ib_dev, ib_dev); if (!ibdev) { dev_err(&dev->persist->pdev->dev, "Device struct alloc failed\n"); diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 46a9ddc8ca56..6d7b8bad4b61 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -70,7 +70,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { struct mlx5_ib_dev *ibdev; - ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev)); + ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!ibdev) return -ENOMEM; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 96bf1b2f9dd7..8161acda64e6 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6508,7 +6508,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) return mlx5_ib_add_slave_port(mdev); - dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); + dev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!dev) return NULL; diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 92c49bff22bc..fe9654a7af71 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -961,7 +961,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) /* We can handle large RDMA requests, so allow larger segments. */ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); - mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev); + mdev = ib_alloc_device(mthca_dev, ib_dev); if (!mdev) { dev_err(&pdev->dev, "Device struct alloc failed, " "aborting.\n"); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 034156f7e9ed..6eb991d40035 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3669,7 +3669,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; - nesibdev = (struct nes_ib_device *)ib_alloc_device(sizeof(struct nes_ib_device)); + nesibdev = ib_alloc_device(nes_ib_device, ibdev); if (nesibdev == NULL) { return NULL; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index b0491b9ecfe4..88970a6bb555 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -297,7 +297,7 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) u8 lstate = 0; struct ocrdma_dev *dev; - dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev)); + dev = ib_alloc_device(ocrdma_dev, ibdev); if (!dev) { pr_err("Unable to allocate ib device\n"); return NULL; diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index f85e72b65a10..878e9e23652b 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -853,7 +853,7 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev, struct qedr_dev *dev; int rc = 0; - dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev)); + dev = ib_alloc_device(qedr_dev, ibdev); if (!dev) { pr_err("Unable to allocate ib device\n"); return NULL; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 3201dd1899c7..0c0a288cb585 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -372,7 +372,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) usnic_dbg("\n"); netdev = pci_get_drvdata(dev); - us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev)); + us_ibdev = ib_alloc_device(usnic_ib_dev, ib_dev); if (!us_ibdev) { usnic_err("Device %s context alloc failed\n", netdev_name(pci_get_drvdata(dev))); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index a5f02276d903..e582beaf9430 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -795,7 +795,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev)); /* Allocate zero-out device */ - dev = (struct pvrdma_dev *)ib_alloc_device(sizeof(*dev)); + dev = ib_alloc_device(pvrdma_dev, ib_dev); if (!dev) { dev_err(&pdev->dev, "failed to allocate IB device\n"); return -ENOMEM; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 7de7389d0235..b3f0c5578925 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -91,7 +91,7 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) { struct rvt_dev_info *rdi; - rdi = (struct rvt_dev_info *)ib_alloc_device(size); + rdi = container_of(_ib_alloc_device(size), struct rvt_dev_info, ibdev); if (!rdi) return rdi; diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 8fd03ae20efc..19f3c69916b1 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -555,7 +555,7 @@ struct rxe_dev *rxe_net_add(struct net_device *ndev) int err; struct rxe_dev *rxe = NULL; - rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe)); + rxe = ib_alloc_device(rxe_dev, ib_dev); if (!rxe) return NULL; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 71ea144ec823..a1a1e710642c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2621,7 +2621,13 @@ struct ib_client { struct list_head list; }; -struct ib_device *ib_alloc_device(size_t size); +struct ib_device *_ib_alloc_device(size_t size); +#define ib_alloc_device(drv_struct, member) \ + container_of(_ib_alloc_device(sizeof(struct drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof( \ + struct drv_struct, member))), \ + struct drv_struct, member) + void ib_dealloc_device(struct ib_device *device); void ib_get_device_fw_str(struct ib_device *device, char *str); -- cgit v1.2.3-55-g7522 From 6780c4fa9d6e091b2f206ac429a40e2e8d2e45f3 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 22 Jan 2019 10:08:22 +0200 Subject: RDMA: Add indication for in kernel API support to IB device Drivers that do not provide kernel verbs support should not be used by ib kernel clients at all. In case a device does not implement all mandatory verbs for kverbs usage mark it as a non kverbs provider and prevent its usage for all clients except for uverbs. The device is marked as a non kverbs provider using the 'kverbs_provider' flag which should only be set by the core code. The clients can choose whether kverbs are requested for its usage using the 'no_kverbs_req' flag which is currently set for uverbs only. This patch allows drivers to remove mandatory verbs stubs and simply set the callbacks to NULL. The IB device will be registered as a non-kverbs provider. Note that verbs that are required for the device registration process must be implemented. Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 10 ++++++---- drivers/infiniband/core/uverbs_main.c | 1 + include/rdma/ib_verbs.h | 5 +++++ 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b511cfa00bdb..9d2e108235e9 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -121,13 +121,12 @@ static int ib_device_check_mandatory(struct ib_device *device) }; int i; + device->kverbs_provider = true; for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { if (!*(void **) ((void *) &device->ops + mandatory_table[i].offset)) { - dev_warn(&device->dev, - "Device is missing mandatory function %s\n", - mandatory_table[i].name); - return -EINVAL; + device->kverbs_provider = false; + break; } } @@ -325,6 +324,9 @@ static int add_client_context(struct ib_device *device, struct ib_client *client { struct ib_client_data *context; + if (!device->kverbs_provider && !client->no_kverbs_req) + return -EOPNOTSUPP; + context = kmalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 996f167d1436..d628747e058c 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1151,6 +1151,7 @@ static const struct file_operations uverbs_mmap_fops = { static struct ib_client uverbs_client = { .name = "uverbs", + .no_kverbs_req = true, .add = ib_uverbs_add_one, .remove = ib_uverbs_remove_one }; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a1a1e710642c..4183a03b46b5 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2565,6 +2565,8 @@ struct ib_device { __be64 node_guid; u32 local_dma_lkey; u16 is_switch:1; + /* Indicates kernel verbs support, should not be used in drivers */ + u16 kverbs_provider:1; u8 node_type; u8 phys_port_cnt; struct ib_device_attr attrs; @@ -2619,6 +2621,9 @@ struct ib_client { const struct sockaddr *addr, void *client_data); struct list_head list; + + /* kverbs are not required by the client */ + u8 no_kverbs_req:1; }; struct ib_device *_ib_alloc_device(size_t size); -- cgit v1.2.3-55-g7522 From 0ad699c0edc97a864177679dd67f2ccd73b07cb7 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 30 Jan 2019 12:48:58 +0200 Subject: RDMA/core: Simplify restrack interface In the current implementation, we have one restrack root per-device and all users are simply providing it directly. Let's simplify the interface and have callers provide the ib_device and internally access the restrack_root. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 4 ++-- drivers/infiniband/core/nldev.c | 4 ++-- drivers/infiniband/core/restrack.c | 26 +++++++++++++++++++++----- include/rdma/restrack.h | 23 ++++------------------- 4 files changed, 29 insertions(+), 28 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 9d2e108235e9..919e94ff4b25 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -287,7 +287,7 @@ struct ib_device *_ib_alloc_device(size_t size) if (!device) return NULL; - rdma_restrack_init(&device->res); + rdma_restrack_init(device); device->dev.class = &ib_class; device_initialize(&device->dev); @@ -315,7 +315,7 @@ void ib_dealloc_device(struct ib_device *device) WARN_ON(!list_empty(&device->client_data_list)); WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && device->reg_state != IB_DEV_UNINITIALIZED); - rdma_restrack_clean(&device->res); + rdma_restrack_clean(device); put_device(&device->dev); } EXPORT_SYMBOL(ib_dealloc_device); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 1742ff4fbf79..ee98fc9058b1 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -314,7 +314,6 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) [RDMA_RESTRACK_CTX] = "ctx", }; - struct rdma_restrack_root *res = &device->res; struct nlattr *table_attr; int ret, i, curr; @@ -328,7 +327,8 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) for (i = 0; i < RDMA_RESTRACK_MAX; i++) { if (!names[i]) continue; - curr = rdma_restrack_count(res, i, task_active_pid_ns(current)); + curr = rdma_restrack_count(device, i, + task_active_pid_ns(current)); ret = fill_res_info_entry(msg, names[i], curr); if (ret) goto err; diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 46a5c553c624..0ade3da0a5c7 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -18,8 +18,14 @@ static int fill_res_noop(struct sk_buff *msg, return 0; } -void rdma_restrack_init(struct rdma_restrack_root *res) +/** + * rdma_restrack_init() - initialize resource tracking + * @dev: IB device + */ +void rdma_restrack_init(struct ib_device *dev) { + struct rdma_restrack_root *res = &dev->res; + init_rwsem(&res->rwsem); res->fill_res_entry = fill_res_noop; } @@ -38,11 +44,15 @@ static const char *type2str(enum rdma_restrack_type type) return names[type]; }; -void rdma_restrack_clean(struct rdma_restrack_root *res) +/** + * rdma_restrack_clean() - clean resource tracking + * @dev: IB device + */ +void rdma_restrack_clean(struct ib_device *dev) { + struct rdma_restrack_root *res = &dev->res; struct rdma_restrack_entry *e; char buf[TASK_COMM_LEN]; - struct ib_device *dev; const char *owner; int bkt; @@ -72,10 +82,16 @@ void rdma_restrack_clean(struct rdma_restrack_root *res) pr_err("restrack: %s", CUT_HERE); } -int rdma_restrack_count(struct rdma_restrack_root *res, - enum rdma_restrack_type type, +/** + * rdma_restrack_count() - the current usage of specific object + * @dev: IB device + * @type: actual type of object to operate + * @ns: PID namespace + */ +int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, struct pid_namespace *ns) { + struct rdma_restrack_root *res = &dev->res; struct rdma_restrack_entry *e; u32 cnt = 0; diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 8f179be9d9a9..f756fc48eee5 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -49,6 +49,7 @@ enum rdma_restrack_type { }; #define RDMA_RESTRACK_HASH_BITS 8 +struct ib_device; struct rdma_restrack_entry; /** @@ -122,25 +123,9 @@ struct rdma_restrack_entry { bool user; }; -/** - * rdma_restrack_init() - initialize resource tracking - * @res: resource tracking root - */ -void rdma_restrack_init(struct rdma_restrack_root *res); - -/** - * rdma_restrack_clean() - clean resource tracking - * @res: resource tracking root - */ -void rdma_restrack_clean(struct rdma_restrack_root *res); - -/** - * rdma_restrack_count() - the current usage of specific object - * @res: resource entry - * @type: actual type of object to operate - * @ns: PID namespace - */ -int rdma_restrack_count(struct rdma_restrack_root *res, +void rdma_restrack_init(struct ib_device *dev); +void rdma_restrack_clean(struct ib_device *dev); +int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, struct pid_namespace *ns); -- cgit v1.2.3-55-g7522 From 02da37509705d3ba6a58fe4799a0caf6b4baecb0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 30 Jan 2019 12:49:02 +0200 Subject: RDMA/core: Use the ops infrastructure to keep all callbacks in one place As preparation to hide rdma_restrack_root, refactor the code to use the ops structure instead of a special callback which is hidden in rdma_restrack_root. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/nldev.c | 28 ++++++++++++++++++---------- drivers/infiniband/core/restrack.c | 7 ------- drivers/infiniband/hw/cxgb4/provider.c | 2 +- include/rdma/ib_verbs.h | 5 +++++ include/rdma/restrack.h | 7 ------- 6 files changed, 25 insertions(+), 25 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 919e94ff4b25..b9f725df4195 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1263,6 +1263,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, disassociate_ucontext); SET_DEVICE_OP(dev_ops, drain_rq); SET_DEVICE_OP(dev_ops, drain_sq); + SET_DEVICE_OP(dev_ops, fill_res_entry); SET_DEVICE_OP(dev_ops, get_dev_fw_str); SET_DEVICE_OP(dev_ops, get_dma_mr); SET_DEVICE_OP(dev_ops, get_hw_stats); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index ee98fc9058b1..25a248847575 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -361,11 +361,19 @@ static int fill_res_name_pid(struct sk_buff *msg, return 0; } +static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (!dev->ops.fill_res_entry) + return false; + return dev->ops.fill_res_entry(msg, res); +} + static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_qp *qp = container_of(res, struct ib_qp, res); - struct rdma_restrack_root *resroot = &qp->device->res; + struct ib_device *dev = qp->device; struct ib_qp_init_attr qp_init_attr; struct nlattr *entry_attr; struct ib_qp_attr qp_attr; @@ -415,7 +423,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); @@ -432,7 +440,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, { struct rdma_id_private *id_priv = container_of(res, struct rdma_id_private, res); - struct rdma_restrack_root *resroot = &id_priv->id.device->res; + struct ib_device *dev = id_priv->id.device; struct rdma_cm_id *cm_id = &id_priv->id; struct nlattr *entry_attr; @@ -474,7 +482,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); @@ -490,7 +498,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_cq *cq = container_of(res, struct ib_cq, res); - struct rdma_restrack_root *resroot = &cq->device->res; + struct ib_device *dev = cq->device; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY); @@ -511,7 +519,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); @@ -527,7 +535,7 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_mr *mr = container_of(res, struct ib_mr, res); - struct rdma_restrack_root *resroot = &mr->pd->device->res; + struct ib_device *dev = mr->pd->device; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY); @@ -548,7 +556,7 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); @@ -564,7 +572,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_pd *pd = container_of(res, struct ib_pd, res); - struct rdma_restrack_root *resroot = &pd->device->res; + struct ib_device *dev = pd->device; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY); @@ -591,7 +599,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index bd7770ed4174..f80b37d437ac 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -12,12 +12,6 @@ #include "cma_priv.h" -static int fill_res_noop(struct sk_buff *msg, - struct rdma_restrack_entry *entry) -{ - return 0; -} - /** * rdma_restrack_init() - initialize resource tracking * @dev: IB device @@ -27,7 +21,6 @@ void rdma_restrack_init(struct ib_device *dev) struct rdma_restrack_root *res = &dev->res; init_rwsem(&res->rwsem); - res->fill_res_entry = fill_res_noop; } static const char *type2str(enum rdma_restrack_type type) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index f977f8e7e162..cb5b713bbf39 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -549,6 +549,7 @@ static const struct ib_device_ops c4iw_dev_ops = { .destroy_cq = c4iw_destroy_cq, .destroy_qp = c4iw_destroy_qp, .destroy_srq = c4iw_destroy_srq, + .fill_res_entry = fill_res_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = c4iw_get_dma_mr, .get_hw_stats = c4iw_get_mib, @@ -629,7 +630,6 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref; dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref; dev->ibdev.iwcm->get_qp = c4iw_get_qp; - dev->ibdev.res.fill_res_entry = fill_res_entry; memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, sizeof(dev->ibdev.iwcm->ifname)); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4183a03b46b5..5fc3be884444 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2510,6 +2510,11 @@ struct ib_device_ops { */ int (*init_port)(struct ib_device *device, u8 port_num, struct kobject *port_sysfs); + /** + * Allows rdma drivers to add their own restrack attributes. + */ + int (*fill_res_entry)(struct sk_buff *msg, + struct rdma_restrack_entry *entry); }; struct ib_device { diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index f756fc48eee5..cc66cc7a11d3 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -65,13 +65,6 @@ struct rdma_restrack_root { * @hash: global database for all resources per-device */ DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS); - /** - * @fill_res_entry: driver-specific fill function - * - * Allows rdma drivers to add their own restrack attributes. - */ - int (*fill_res_entry)(struct sk_buff *msg, - struct rdma_restrack_entry *entry); }; /** -- cgit v1.2.3-55-g7522 From c66f67414c1f88554485bb2a0abf8b5c0d741de7 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Sat, 2 Feb 2019 11:09:45 +0200 Subject: IB/core: Don't register each MAD agent for LSM notifier When creating many MAD agents in a short period of time, receive packet processing can be delayed long enough to cause timeouts while new agents are being added to the atomic notifier chain with IRQs disabled. Notifier chain registration and unregstration is an O(n) operation. With large numbers of MAD agents being created and destroyed simultaneously the CPUs spend too much time with interrupts disabled. Instead of each MAD agent registering for it's own LSM notification, maintain a list of agents internally and register once, this registration already existed for handling the PKeys. This list is write mostly, so a normal spin lock is used vs a read/write lock. All MAD agents must be checked, so a single list is used instead of breaking them down per device. Notifier calls are done under rcu_read_lock, so there isn't a risk of similar packet timeouts while checking the MAD agents security settings when notified. Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Acked-by: Paul Moore Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 5 ++++ drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/security.c | 50 +++++++++++++++++++++---------------- include/rdma/ib_mad.h | 3 +-- 4 files changed, 35 insertions(+), 24 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index bcb3e3029a9b..d053110207eb 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -202,6 +202,7 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent, enum ib_qp_type qp_type); void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent); int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index); +void ib_mad_agent_security_change(void); #else static inline void ib_security_destroy_port_pkey_list(struct ib_device *device) { @@ -267,6 +268,10 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map, { return 0; } + +static inline void ib_mad_agent_security_change(void) +{ +} #endif struct ib_device *ib_device_get_by_index(u32 ifindex); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 55221990d946..32cd35c9b21e 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -452,6 +452,7 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; schedule_work(&ib_policy_change_work); + ib_mad_agent_security_change(); return NOTIFY_OK; } diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 7662e9347238..a70d2ba312ed 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -39,6 +39,10 @@ #include "core_priv.h" #include "mad_priv.h" +static LIST_HEAD(mad_agent_list); +/* Lock to protect mad_agent_list */ +static DEFINE_SPINLOCK(mad_agent_list_lock); + static struct pkey_index_qp_list *get_pkey_idx_qp_list(struct ib_port_pkey *pp) { struct pkey_index_qp_list *pkey = NULL; @@ -676,19 +680,18 @@ static int ib_security_pkey_access(struct ib_device *dev, return security_ib_pkey_access(sec, subnet_prefix, pkey); } -static int ib_mad_agent_security_change(struct notifier_block *nb, - unsigned long event, - void *data) +void ib_mad_agent_security_change(void) { - struct ib_mad_agent *ag = container_of(nb, struct ib_mad_agent, lsm_nb); - - if (event != LSM_POLICY_CHANGE) - return NOTIFY_DONE; - - ag->smp_allowed = !security_ib_endport_manage_subnet( - ag->security, dev_name(&ag->device->dev), ag->port_num); - - return NOTIFY_OK; + struct ib_mad_agent *ag; + + spin_lock(&mad_agent_list_lock); + list_for_each_entry(ag, + &mad_agent_list, + mad_agent_sec_list) + WRITE_ONCE(ag->smp_allowed, + !security_ib_endport_manage_subnet(ag->security, + dev_name(&ag->device->dev), ag->port_num)); + spin_unlock(&mad_agent_list_lock); } int ib_mad_agent_security_setup(struct ib_mad_agent *agent, @@ -699,6 +702,8 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent, if (!rdma_protocol_ib(agent->device, agent->port_num)) return 0; + INIT_LIST_HEAD(&agent->mad_agent_sec_list); + ret = security_ib_alloc_security(&agent->security); if (ret) return ret; @@ -706,22 +711,20 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent, if (qp_type != IB_QPT_SMI) return 0; + spin_lock(&mad_agent_list_lock); ret = security_ib_endport_manage_subnet(agent->security, dev_name(&agent->device->dev), agent->port_num); if (ret) goto free_security; - agent->lsm_nb.notifier_call = ib_mad_agent_security_change; - ret = register_lsm_notifier(&agent->lsm_nb); - if (ret) - goto free_security; - - agent->smp_allowed = true; - agent->lsm_nb_reg = true; + WRITE_ONCE(agent->smp_allowed, true); + list_add(&agent->mad_agent_sec_list, &mad_agent_list); + spin_unlock(&mad_agent_list_lock); return 0; free_security: + spin_unlock(&mad_agent_list_lock); security_ib_free_security(agent->security); return ret; } @@ -731,8 +734,11 @@ void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent) if (!rdma_protocol_ib(agent->device, agent->port_num)) return; - if (agent->lsm_nb_reg) - unregister_lsm_notifier(&agent->lsm_nb); + if (agent->qp->qp_type == IB_QPT_SMI) { + spin_lock(&mad_agent_list_lock); + list_del(&agent->mad_agent_sec_list); + spin_unlock(&mad_agent_list_lock); + } security_ib_free_security(agent->security); } @@ -743,7 +749,7 @@ int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index) return 0; if (map->agent.qp->qp_type == IB_QPT_SMI) { - if (!map->agent.smp_allowed) + if (!READ_ONCE(map->agent.smp_allowed)) return -EACCES; return 0; } diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 1c0b914f199d..79ba8219e7dc 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -617,11 +617,10 @@ struct ib_mad_agent { u32 hi_tid; u32 flags; void *security; - struct notifier_block lsm_nb; + struct list_head mad_agent_sec_list; u8 port_num; u8 rmpp_version; bool smp_allowed; - bool lsm_nb_reg; }; /** -- cgit v1.2.3-55-g7522 From 30471d4b20335d9bd9ae9b2382a1e1e97d18d86d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 3 Feb 2019 14:55:50 +0200 Subject: RDMA/core: Share driver structure size with core Add new macros to be used in drivers while registering ops structure and IB/core while calling allocation routines, so drivers won't need to perform kzalloc/kfree in their paths. The change in allocation stage allows us to initialize common fields prior to calling to drivers (e.g. restrack). Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 2 ++ include/rdma/ib_verbs.h | 13 +++++++++++++ 2 files changed, 15 insertions(+) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 32cd35c9b21e..d806a5c7b202 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1228,6 +1228,8 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) (ptr)->name = ops->name; \ } while (0) +#define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name) + SET_DEVICE_OP(dev_ops, add_gid); SET_DEVICE_OP(dev_ops, advise_mr); SET_DEVICE_OP(dev_ops, alloc_dm); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 2e1f1e885ee5..e29eae4aec84 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2264,6 +2264,19 @@ struct ib_counters_read_attr { struct uverbs_attr_bundle; +#define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \ + .size_##ib_struct = \ + (sizeof(struct drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) + \ + BUILD_BUG_ON_ZERO( \ + !__same_type(((struct drv_struct *)NULL)->member, \ + struct ib_struct))) + +#define rdma_zalloc_drv_obj(ib_dev, ib_type) \ + ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, GFP_KERNEL)) + +#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct + /** * struct ib_device_ops - InfiniBand device operations * This structure defines all the InfiniBand device operations, providers will -- cgit v1.2.3-55-g7522 From 21a428a019c9a6d133e745b529b9bf18c1187e70 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 3 Feb 2019 14:55:51 +0200 Subject: RDMA: Handle PD allocations by IB/core The PD allocations in IB/core allows us to simplify drivers and their error flows in their .alloc_pd() paths. The changes in .alloc_pd() go hand in had with relevant update in .dealloc_pd(). We will use this opportunity and convert .dealloc_pd() to don't fail, as it was suggested a long time ago, failures are not happening as we have never seen a WARN_ON print. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 2 + drivers/infiniband/core/uverbs_cmd.c | 15 ++-- drivers/infiniband/core/uverbs_std_types.c | 2 +- drivers/infiniband/core/verbs.c | 27 +++++--- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 37 ++++------ drivers/infiniband/hw/bnxt_re/ib_verbs.h | 9 ++- drivers/infiniband/hw/bnxt_re/main.c | 1 + drivers/infiniband/hw/cxgb3/iwch_provider.c | 25 +++---- drivers/infiniband/hw/cxgb4/provider.c | 25 +++---- drivers/infiniband/hw/hns/hns_roce_device.h | 7 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 27 +++++--- drivers/infiniband/hw/hns/hns_roce_main.c | 1 + drivers/infiniband/hw/hns/hns_roce_pd.c | 25 +++---- drivers/infiniband/hw/i40iw/i40iw_utils.c | 1 - drivers/infiniband/hw/i40iw/i40iw_verbs.c | 32 ++++----- drivers/infiniband/hw/mlx4/main.c | 36 ++++------ drivers/infiniband/hw/mlx5/main.c | 48 +++++++------ drivers/infiniband/hw/mthca/mthca_provider.c | 29 +++----- drivers/infiniband/hw/nes/nes_verbs.c | 32 +++------ drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 + drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 92 +++++++++++-------------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 6 +- drivers/infiniband/hw/qedr/main.c | 1 + drivers/infiniband/hw/qedr/verbs.c | 34 +++------ drivers/infiniband/hw/qedr/verbs.h | 6 +- drivers/infiniband/hw/usnic/usnic_ib_main.c | 1 + drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 26 ++----- drivers/infiniband/hw/usnic/usnic_ib_verbs.h | 7 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 1 + drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 43 ++++-------- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 7 +- drivers/infiniband/sw/rdmavt/pd.c | 29 +++----- drivers/infiniband/sw/rdmavt/pd.h | 7 +- drivers/infiniband/sw/rdmavt/vt.c | 1 + drivers/infiniband/sw/rxe/rxe_pool.c | 60 +++++++++++++--- drivers/infiniband/sw/rxe/rxe_pool.h | 4 ++ drivers/infiniband/sw/rxe/rxe_verbs.c | 16 ++--- drivers/infiniband/sw/rxe/rxe_verbs.h | 2 +- include/rdma/ib_verbs.h | 9 +-- 39 files changed, 325 insertions(+), 409 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d806a5c7b202..57e1e177921e 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1319,6 +1319,8 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, set_vf_guid); SET_DEVICE_OP(dev_ops, set_vf_link_state); SET_DEVICE_OP(dev_ops, unmap_fmr); + + SET_OBJ_SIZE(dev_ops, ib_pd); } EXPORT_SYMBOL(ib_set_device_ops); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index aa260cafbd85..5ac143f22df0 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -407,9 +407,9 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = ib_dev->ops.alloc_pd(ib_dev, uobj->context, &attrs->driver_udata); - if (IS_ERR(pd)) { - ret = PTR_ERR(pd); + pd = rdma_zalloc_drv_obj(ib_dev, ib_pd); + if (!pd) { + ret = -ENOMEM; goto err; } @@ -417,11 +417,15 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) pd->uobject = uobj; pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); + pd->res.type = RDMA_RESTRACK_PD; + + ret = ib_dev->ops.alloc_pd(pd, uobj->context, &attrs->driver_udata); + if (ret) + goto err_alloc; uobj->object = pd; memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; - pd->res.type = RDMA_RESTRACK_PD; rdma_restrack_uadd(&pd->res); ret = uverbs_response(attrs, &resp, sizeof(resp)); @@ -432,7 +436,8 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) err_copy: ib_dealloc_pd(pd); - +err_alloc: + kfree(pd); err: uobj_alloc_abort(uobj); return ret; diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index cbc72312eb41..f224cb727224 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -188,7 +188,7 @@ static int uverbs_free_pd(struct ib_uobject *uobject, if (ret) return ret; - ib_dealloc_pd((struct ib_pd *)uobject->object); + ib_dealloc_pd(pd); return 0; } diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 3220fb42ecce..de5d895a5054 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -254,10 +254,11 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, { struct ib_pd *pd; int mr_access_flags = 0; + int ret; - pd = device->ops.alloc_pd(device, NULL, NULL); - if (IS_ERR(pd)) - return pd; + pd = rdma_zalloc_drv_obj(device, ib_pd); + if (!pd) + return ERR_PTR(-ENOMEM); pd->device = device; pd->uobject = NULL; @@ -265,6 +266,16 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, atomic_set(&pd->usecnt, 0); pd->flags = flags; + pd->res.type = RDMA_RESTRACK_PD; + rdma_restrack_set_task(&pd->res, caller); + + ret = device->ops.alloc_pd(pd, NULL, NULL); + if (ret) { + kfree(pd); + return ERR_PTR(ret); + } + rdma_restrack_kadd(&pd->res); + if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; else @@ -275,10 +286,6 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; } - pd->res.type = RDMA_RESTRACK_PD; - rdma_restrack_set_task(&pd->res, caller); - rdma_restrack_kadd(&pd->res); - if (mr_access_flags) { struct ib_mr *mr; @@ -329,10 +336,8 @@ void ib_dealloc_pd(struct ib_pd *pd) WARN_ON(atomic_read(&pd->usecnt)); rdma_restrack_del(&pd->res); - /* Making delalloc_pd a void return is a WIP, no driver should return - an error here. */ - ret = pd->device->ops.dealloc_pd(pd); - WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); + pd->device->ops.dealloc_pd(pd); + kfree(pd); } EXPORT_SYMBOL(ib_dealloc_pd); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 1d7469e23cde..1606571af63d 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -563,41 +563,29 @@ fail: } /* Protection Domains */ -int bnxt_re_dealloc_pd(struct ib_pd *ib_pd) +void bnxt_re_dealloc_pd(struct ib_pd *ib_pd) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; - int rc; bnxt_re_destroy_fence_mr(pd); - if (pd->qplib_pd.id) { - rc = bnxt_qplib_dealloc_pd(&rdev->qplib_res, - &rdev->qplib_res.pd_tbl, - &pd->qplib_pd); - if (rc) - dev_err(rdev_to_dev(rdev), "Failed to deallocate HW PD"); - } - - kfree(pd); - return 0; + if (pd->qplib_pd.id) + bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, + &pd->qplib_pd); } -struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *ucontext, - struct ib_udata *udata) +int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *ucontext, + struct ib_udata *udata) { + struct ib_device *ibdev = ibpd->device; struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct bnxt_re_ucontext *ucntx = container_of(ucontext, struct bnxt_re_ucontext, ib_uctx); - struct bnxt_re_pd *pd; + struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ib_pd); int rc; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - pd->rdev = rdev; if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) { dev_err(rdev_to_dev(rdev), "Failed to allocate HW PD"); @@ -637,13 +625,12 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev, if (bnxt_re_create_fence_mr(pd)) dev_warn(rdev_to_dev(rdev), "Failed to create Fence-MR\n"); - return &pd->ib_pd; + return 0; dbfail: - (void)bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, - &pd->qplib_pd); + bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, + &pd->qplib_pd); fail: - kfree(pd); - return ERR_PTR(rc); + return rc; } /* Address Handles */ diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index c4af72604b4f..c7cca803cfa3 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -56,8 +56,8 @@ struct bnxt_re_fence_data { }; struct bnxt_re_pd { + struct ib_pd ib_pd; struct bnxt_re_dev *rdev; - struct ib_pd ib_pd; struct bnxt_qplib_pd qplib_pd; struct bnxt_re_fence_data fence; }; @@ -163,10 +163,9 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, int index, union ib_gid *gid); enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, u8 port_num); -struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata); -int bnxt_re_dealloc_pd(struct ib_pd *pd); +int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata); +void bnxt_re_dealloc_pd(struct ib_pd *pd); struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 0d40a930c192..0a89ef6e5754 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -637,6 +637,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .query_srq = bnxt_re_query_srq, .reg_user_mr = bnxt_re_reg_user_mr, .req_notify_cq = bnxt_re_req_notify_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd), }; static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 804c1fc7bfc1..4cc9a6ae2139 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -370,7 +370,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return ret; } -static int iwch_deallocate_pd(struct ib_pd *pd) +static void iwch_deallocate_pd(struct ib_pd *pd) { struct iwch_dev *rhp; struct iwch_pd *php; @@ -379,15 +379,13 @@ static int iwch_deallocate_pd(struct ib_pd *pd) rhp = php->rhp; pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid); - kfree(php); - return 0; } -static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int iwch_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct iwch_pd *php; + struct iwch_pd *php = to_iwch_pd(pd); + struct ib_device *ibdev = pd->device; u32 pdid; struct iwch_dev *rhp; @@ -395,12 +393,8 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev, rhp = (struct iwch_dev *) ibdev; pdid = cxio_hal_get_pdid(rhp->rdev.rscp); if (!pdid) - return ERR_PTR(-EINVAL); - php = kzalloc(sizeof(*php), GFP_KERNEL); - if (!php) { - cxio_hal_put_pdid(rhp->rdev.rscp, pdid); - return ERR_PTR(-ENOMEM); - } + return -EINVAL; + php->pdid = pdid; php->rhp = rhp; if (context) { @@ -408,11 +402,11 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev, if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { iwch_deallocate_pd(&php->ibpd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); - return &php->ibpd; + return 0; } static int iwch_dereg_mr(struct ib_mr *ib_mr) @@ -1350,6 +1344,7 @@ static const struct ib_device_ops iwch_dev_ops = { .reg_user_mr = iwch_reg_user_mr, .req_notify_cq = iwch_arm_cq, .resize_cq = iwch_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd), }; int iwch_register_device(struct iwch_dev *dev) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index f59bf7e5a589..680b5e98491d 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -209,7 +209,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return ret; } -static int c4iw_deallocate_pd(struct ib_pd *pd) +static void c4iw_deallocate_pd(struct ib_pd *pd) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -221,15 +221,13 @@ static int c4iw_deallocate_pd(struct ib_pd *pd) mutex_lock(&rhp->rdev.stats.lock); rhp->rdev.stats.pd.cur--; mutex_unlock(&rhp->rdev.stats.lock); - kfree(php); - return 0; } -static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct c4iw_pd *php; + struct c4iw_pd *php = to_c4iw_pd(pd); + struct ib_device *ibdev = pd->device; u32 pdid; struct c4iw_dev *rhp; @@ -237,12 +235,8 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, rhp = (struct c4iw_dev *) ibdev; pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table); if (!pdid) - return ERR_PTR(-EINVAL); - php = kzalloc(sizeof(*php), GFP_KERNEL); - if (!php) { - c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid); - return ERR_PTR(-ENOMEM); - } + return -EINVAL; + php->pdid = pdid; php->rhp = rhp; if (context) { @@ -250,7 +244,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { c4iw_deallocate_pd(&php->ibpd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } mutex_lock(&rhp->rdev.stats.lock); @@ -259,7 +253,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur; mutex_unlock(&rhp->rdev.stats.lock); pr_debug("pdid 0x%0x ptr 0x%p\n", pdid, php); - return &php->ibpd; + return 0; } static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index, @@ -570,6 +564,7 @@ static const struct ib_device_ops c4iw_dev_ops = { .query_qp = c4iw_ib_query_qp, .reg_user_mr = c4iw_reg_user_mr, .req_notify_cq = c4iw_arm_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd), }; void c4iw_register_device(struct work_struct *work) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 8ca8d74dfb6a..9ee86daf1700 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1114,10 +1114,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); -struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, - struct ib_ucontext *context, - struct ib_udata *udata); -int hns_roce_dealloc_pd(struct ib_pd *pd); +int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata); +void hns_roce_dealloc_pd(struct ib_pd *pd); struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index fa08c22aad66..a18b88c95995 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -711,13 +711,14 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) struct ib_qp_attr attr = { 0 }; struct hns_roce_v1_priv *priv; struct hns_roce_qp *hr_qp; + struct ib_device *ibdev; struct ib_cq *cq; struct ib_pd *pd; union ib_gid dgid; u64 subnet_prefix; int attr_mask = 0; + int ret = -ENOMEM; int i, j; - int ret; u8 queue_en[HNS_ROCE_V1_RESV_QP] = { 0 }; u8 phy_port; u8 port = 0; @@ -742,12 +743,16 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) free_mr->mr_free_cq->ib_cq.cq_context = NULL; atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0); - pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL); - if (IS_ERR(pd)) { - dev_err(dev, "Create pd for reserved loop qp failed!"); - ret = -ENOMEM; + ibdev = &hr_dev->ib_dev; + pd = rdma_zalloc_drv_obj(ibdev, ib_pd); + if (pd) + goto alloc_mem_failed; + + pd->device = ibdev; + ret = hns_roce_alloc_pd(pd, NULL, NULL); + if (ret) goto alloc_pd_failed; - } + free_mr->mr_free_pd = to_hr_pd(pd); free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev; free_mr->mr_free_pd->ibpd.uobject = NULL; @@ -854,10 +859,12 @@ create_lp_qp_failed: dev_err(dev, "Destroy qp %d for mr free failed!\n", i); } - if (hns_roce_dealloc_pd(pd)) - dev_err(dev, "Destroy pd for create_lp_qp failed!\n"); + hns_roce_dealloc_pd(pd); alloc_pd_failed: + kfree(pd); + +alloc_mem_failed: if (hns_roce_ib_destroy_cq(cq)) dev_err(dev, "Destroy cq for create_lp_qp failed!\n"); @@ -891,9 +898,7 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) if (ret) dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret); - ret = hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd); - if (ret) - dev_err(dev, "Destroy pd for mr_free failed(%d)!\n", ret); + hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd); } static int hns_roce_db_init(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 67a8c4333f4f..ccf10622586c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -472,6 +472,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .query_pkey = hns_roce_query_pkey, .query_port = hns_roce_query_port, .reg_user_mr = hns_roce_reg_user_mr, + INIT_RDMA_OBJ_SIZE(ib_pd, hns_roce_pd, ibpd), }; static const struct ib_device_ops hns_roce_dev_mr_ops = { diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 4a29b2cb9bab..b9b97c5e97e6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -57,24 +57,19 @@ void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev) hns_roce_bitmap_cleanup(&hr_dev->pd_bitmap); } -struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, - struct ib_ucontext *context, - struct ib_udata *udata) +int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { + struct ib_device *ib_dev = ibpd->device; struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); struct device *dev = hr_dev->dev; - struct hns_roce_pd *pd; + struct hns_roce_pd *pd = to_hr_pd(ibpd); int ret; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn); if (ret) { - kfree(pd); dev_err(dev, "[alloc_pd]hns_roce_pd_alloc failed!\n"); - return ERR_PTR(ret); + return ret; } if (context) { @@ -83,21 +78,17 @@ struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn); dev_err(dev, "[alloc_pd]ib_copy_to_udata failed!\n"); - kfree(pd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } - return &pd->ibpd; + return 0; } EXPORT_SYMBOL_GPL(hns_roce_alloc_pd); -int hns_roce_dealloc_pd(struct ib_pd *pd) +void hns_roce_dealloc_pd(struct ib_pd *pd) { hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn); - kfree(to_hr_pd(pd)); - - return 0; } EXPORT_SYMBOL_GPL(hns_roce_dealloc_pd); diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 59e978141ad4..c5a881172524 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -601,7 +601,6 @@ void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev) if (!atomic_dec_and_test(&iwpd->usecount)) return; i40iw_free_resource(iwdev, iwdev->allocated_pds, iwpd->sc_pd.pd_id); - kfree(iwpd); } /** diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index d4ab46dd9e6c..28449ad57b37 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -312,16 +312,15 @@ static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_ /** * i40iw_alloc_pd - allocate protection domain - * @ibdev: device pointer from stack + * @pd: PD pointer * @context: user context created during alloc * @udata: user data */ -static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int i40iw_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct i40iw_pd *iwpd; - struct i40iw_device *iwdev = to_iwdev(ibdev); + struct i40iw_pd *iwpd = to_iwpd(pd); + struct i40iw_device *iwdev = to_iwdev(pd->device); struct i40iw_sc_dev *dev = &iwdev->sc_dev; struct i40iw_alloc_pd_resp uresp; struct i40iw_sc_pd *sc_pd; @@ -330,19 +329,13 @@ static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev, int err; if (iwdev->closing) - return ERR_PTR(-ENODEV); + return -ENODEV; err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds, iwdev->max_pd, &pd_id, &iwdev->next_pd); if (err) { i40iw_pr_err("alloc resource failed\n"); - return ERR_PTR(err); - } - - iwpd = kzalloc(sizeof(*iwpd), GFP_KERNEL); - if (!iwpd) { - err = -ENOMEM; - goto free_res; + return err; } sc_pd = &iwpd->sc_pd; @@ -361,25 +354,23 @@ static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev, } i40iw_add_pdusecount(iwpd); - return &iwpd->ibpd; + return 0; + error: - kfree(iwpd); -free_res: i40iw_free_resource(iwdev, iwdev->allocated_pds, pd_id); - return ERR_PTR(err); + return err; } /** * i40iw_dealloc_pd - deallocate pd * @ibpd: ptr of pd to be deallocated */ -static int i40iw_dealloc_pd(struct ib_pd *ibpd) +static void i40iw_dealloc_pd(struct ib_pd *ibpd) { struct i40iw_pd *iwpd = to_iwpd(ibpd); struct i40iw_device *iwdev = to_iwdev(ibpd->device); i40iw_rem_pdusecount(iwpd, iwdev); - return 0; } /** @@ -2750,6 +2741,7 @@ static const struct ib_device_ops i40iw_dev_ops = { .query_qp = i40iw_query_qp, .reg_user_mr = i40iw_reg_user_mr, .req_notify_cq = i40iw_req_notify_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, i40iw_pd, ibpd), }; /** diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index d66002a31000..c0f6aea7ed7c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1186,38 +1186,27 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) } } -static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct mlx4_ib_pd *pd; + struct mlx4_ib_pd *pd = to_mpd(ibpd); + struct ib_device *ibdev = ibpd->device; int err; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn); - if (err) { - kfree(pd); - return ERR_PTR(err); - } + if (err) + return err; - if (context) - if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) { - mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); - kfree(pd); - return ERR_PTR(-EFAULT); - } - return &pd->ibpd; + if (context && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) { + mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); + return -EFAULT; + } + return 0; } -static int mlx4_ib_dealloc_pd(struct ib_pd *pd) +static void mlx4_ib_dealloc_pd(struct ib_pd *pd) { mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); - kfree(pd); - - return 0; } static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, @@ -2580,6 +2569,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = { .req_notify_cq = mlx4_ib_arm_cq, .rereg_user_mr = mlx4_ib_rereg_user_mr, .resize_cq = mlx4_ib_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd), }; static const struct ib_device_ops mlx4_ib_dev_wq_ops = { diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 76d6c2557d0c..f9cddc6f2ab6 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2280,30 +2280,24 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm) return 0; } -static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { + struct mlx5_ib_pd *pd = to_mpd(ibpd); + struct ib_device *ibdev = ibpd->device; struct mlx5_ib_alloc_pd_resp resp; - struct mlx5_ib_pd *pd; int err; u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; u16 uid = 0; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - uid = context ? to_mucontext(context)->devx_uid : 0; MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); MLX5_SET(alloc_pd_in, in, uid, uid); err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in), out, sizeof(out)); - if (err) { - kfree(pd); - return ERR_PTR(err); - } + if (err) + return err; pd->pdn = MLX5_GET(alloc_pd_out, out, pd); pd->uid = uid; @@ -2311,23 +2305,19 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, resp.pdn = pd->pdn; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid); - kfree(pd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } - return &pd->ibpd; + return 0; } -static int mlx5_ib_dealloc_pd(struct ib_pd *pd) +static void mlx5_ib_dealloc_pd(struct ib_pd *pd) { struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); - kfree(mpd); - - return 0; } enum { @@ -4680,23 +4670,28 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) { struct ib_srq_init_attr attr; struct mlx5_ib_dev *dev; + struct ib_device *ibdev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; int port; int ret = 0; dev = container_of(devr, struct mlx5_ib_dev, devr); + ibdev = &dev->ib_dev; mutex_init(&devr->mutex); - devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); - if (IS_ERR(devr->p0)) { - ret = PTR_ERR(devr->p0); - goto error0; - } - devr->p0->device = &dev->ib_dev; + devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd); + if (!devr->p0) + return -ENOMEM; + + devr->p0->device = ibdev; devr->p0->uobject = NULL; atomic_set(&devr->p0->usecnt, 0); + ret = mlx5_ib_alloc_pd(devr->p0, NULL, NULL); + if (ret) + goto error0; + devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); if (IS_ERR(devr->c0)) { ret = PTR_ERR(devr->c0); @@ -4794,6 +4789,7 @@ error2: error1: mlx5_ib_dealloc_pd(devr->p0); error0: + kfree(devr->p0); return ret; } @@ -4809,6 +4805,7 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) mlx5_ib_dealloc_xrcd(devr->x1); mlx5_ib_destroy_cq(devr->c0); mlx5_ib_dealloc_pd(devr->p0); + kfree(devr->p0); /* Make sure no change P_Key work items are still executing */ for (port = 0; port < dev->num_ports; ++port) @@ -5938,6 +5935,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), }; static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 1bb67562c8c8..2c754bc226f3 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -374,40 +374,30 @@ static int mthca_mmap_uar(struct ib_ucontext *context, return 0; } -static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct mthca_pd *pd; + struct ib_device *ibdev = ibpd->device; + struct mthca_pd *pd = to_mpd(ibpd); int err; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - err = mthca_pd_alloc(to_mdev(ibdev), !context, pd); - if (err) { - kfree(pd); - return ERR_PTR(err); - } + if (err) + return err; if (context) { if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) { mthca_pd_free(to_mdev(ibdev), pd); - kfree(pd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } - return &pd->ibpd; + return 0; } -static int mthca_dealloc_pd(struct ib_pd *pd) +static void mthca_dealloc_pd(struct ib_pd *pd) { mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); - kfree(pd); - - return 0; } static struct ib_ah *mthca_ah_create(struct ib_pd *pd, @@ -1228,6 +1218,7 @@ static const struct ib_device_ops mthca_dev_ops = { .query_qp = mthca_query_qp, .reg_user_mr = mthca_reg_user_mr, .resize_cq = mthca_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, mthca_pd, ibpd), }; static const struct ib_device_ops mthca_dev_arbel_srq_ops = { diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 6eb991d40035..f18b28ae4bd9 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -658,10 +658,11 @@ static int nes_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) /** * nes_alloc_pd */ -static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, struct ib_udata *udata) +static int nes_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct nes_pd *nespd; + struct ib_device *ibdev = pd->device; + struct nes_pd *nespd = to_nespd(pd); struct nes_vnic *nesvnic = to_nesvnic(ibdev); struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; @@ -676,15 +677,8 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds, nesadapter->max_pd, &pd_num, &nesadapter->next_pd, NES_RESOURCE_PD); - if (err) { - return ERR_PTR(err); - } - - nespd = kzalloc(sizeof (struct nes_pd), GFP_KERNEL); - if (!nespd) { - nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); - return ERR_PTR(-ENOMEM); - } + if (err) + return err; nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n", nespd, dev_name(&nesvnic->nesibdev->ibdev.dev)); @@ -700,16 +694,14 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, if (nespd->mmap_db_index >= NES_MAX_USER_DB_REGIONS) { nes_debug(NES_DBG_PD, "mmap_db_index > MAX\n"); nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); - kfree(nespd); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } uresp.pd_id = nespd->pd_id; uresp.mmap_db_index = nespd->mmap_db_index; if (ib_copy_to_udata(udata, &uresp, sizeof (struct nes_alloc_pd_resp))) { nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); - kfree(nespd); - return ERR_PTR(-EFAULT); + return -EFAULT; } set_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells); @@ -718,14 +710,14 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, } nes_debug(NES_DBG_PD, "PD%u structure located @%p.\n", nespd->pd_id, nespd); - return &nespd->ibpd; + return 0; } /** * nes_dealloc_pd */ -static int nes_dealloc_pd(struct ib_pd *ibpd) +static void nes_dealloc_pd(struct ib_pd *ibpd) { struct nes_ucontext *nesucontext; struct nes_pd *nespd = to_nespd(ibpd); @@ -748,9 +740,6 @@ static int nes_dealloc_pd(struct ib_pd *ibpd) nespd->pd_id, nespd); nes_free_resource(nesadapter, nesadapter->allocated_pds, (nespd->pd_id-nesadapter->base_pd)>>(PAGE_SHIFT-12)); - kfree(nespd); - - return 0; } @@ -3658,6 +3647,7 @@ static const struct ib_device_ops nes_dev_ops = { .query_qp = nes_query_qp, .reg_user_mr = nes_reg_user_mr, .req_notify_cq = nes_req_notify_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, nes_pd, ibpd), }; /** diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 88970a6bb555..0de83c92691f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -179,6 +179,7 @@ static const struct ib_device_ops ocrdma_dev_ops = { .reg_user_mr = ocrdma_reg_user_mr, .req_notify_cq = ocrdma_arm_cq, .resize_cq = ocrdma_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, ocrdma_pd, ibpd), }; static const struct ib_device_ops ocrdma_dev_srq_ops = { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 2a62936bef4d..980ba97188ff 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -367,17 +367,12 @@ static int ocrdma_get_pd_num(struct ocrdma_dev *dev, struct ocrdma_pd *pd) return status; } -static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, - struct ocrdma_ucontext *uctx, - struct ib_udata *udata) +static int _ocrdma_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd, + struct ocrdma_ucontext *uctx, + struct ib_udata *udata) { - struct ocrdma_pd *pd = NULL; int status; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - if (udata && uctx && dev->attr.max_dpp_pds) { pd->dpp_enabled = ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R; @@ -386,15 +381,8 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, dev->attr.wqe_size) : 0; } - if (dev->pd_mgr->pd_prealloc_valid) { - status = ocrdma_get_pd_num(dev, pd); - if (status == 0) { - return pd; - } else { - kfree(pd); - return ERR_PTR(status); - } - } + if (dev->pd_mgr->pd_prealloc_valid) + return ocrdma_get_pd_num(dev, pd); retry: status = ocrdma_mbx_alloc_pd(dev, pd); @@ -403,13 +391,11 @@ retry: pd->dpp_enabled = false; pd->num_dpp_qp = 0; goto retry; - } else { - kfree(pd); - return ERR_PTR(status); } + return status; } - return pd; + return 0; } static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx, @@ -418,30 +404,33 @@ static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx, return (uctx->cntxt_pd == pd); } -static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev, +static void _ocrdma_dealloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd) { - int status; - if (dev->pd_mgr->pd_prealloc_valid) - status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled); + ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled); else - status = ocrdma_mbx_dealloc_pd(dev, pd); - - kfree(pd); - return status; + ocrdma_mbx_dealloc_pd(dev, pd); } static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev, struct ocrdma_ucontext *uctx, struct ib_udata *udata) { - int status = 0; + struct ib_device *ibdev = &dev->ibdev; + struct ib_pd *pd; + int status; + + pd = rdma_zalloc_drv_obj(ibdev, ib_pd); + if (!pd) + return -ENOMEM; + + pd->device = ibdev; + uctx->cntxt_pd = get_ocrdma_pd(pd); - uctx->cntxt_pd = _ocrdma_alloc_pd(dev, uctx, udata); - if (IS_ERR(uctx->cntxt_pd)) { - status = PTR_ERR(uctx->cntxt_pd); - uctx->cntxt_pd = NULL; + status = _ocrdma_alloc_pd(dev, uctx->cntxt_pd, uctx, udata); + if (status) { + kfree(uctx->cntxt_pd); goto err; } @@ -460,6 +449,7 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) pr_err("%s(%d) Freeing in use pdid=0x%x.\n", __func__, dev->id, pd->id); } + kfree(uctx->cntxt_pd); uctx->cntxt_pd = NULL; (void)_ocrdma_dealloc_pd(dev, pd); return 0; @@ -537,6 +527,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, return &ctx->ibucontext; cpy_err: + ocrdma_dealloc_ucontext_pd(ctx); pd_err: ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len); map_err: @@ -658,10 +649,10 @@ dpp_map_err: return status; } -struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +int ocrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { + struct ib_device *ibdev = ibpd->device; struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); struct ocrdma_pd *pd; struct ocrdma_ucontext *uctx = NULL; @@ -677,11 +668,10 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev, } } - pd = _ocrdma_alloc_pd(dev, uctx, udata); - if (IS_ERR(pd)) { - status = PTR_ERR(pd); + pd = get_ocrdma_pd(ibpd); + status = _ocrdma_alloc_pd(dev, pd, uctx, udata); + if (status) goto exit; - } pd_mapping: if (udata && context) { @@ -689,25 +679,22 @@ pd_mapping: if (status) goto err; } - return &pd->ibpd; + return 0; err: - if (is_uctx_pd) { + if (is_uctx_pd) ocrdma_release_ucontext_pd(uctx); - } else { - if (_ocrdma_dealloc_pd(dev, pd)) - pr_err("%s: _ocrdma_dealloc_pd() failed\n", __func__); - } + else + _ocrdma_dealloc_pd(dev, pd); exit: - return ERR_PTR(status); + return status; } -int ocrdma_dealloc_pd(struct ib_pd *ibpd) +void ocrdma_dealloc_pd(struct ib_pd *ibpd) { struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); struct ocrdma_ucontext *uctx = NULL; - int status = 0; u64 usr_db; uctx = pd->uctx; @@ -721,11 +708,10 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd) if (is_ucontext_pd(uctx, pd)) { ocrdma_release_ucontext_pd(uctx); - return status; + return; } } - status = _ocrdma_dealloc_pd(dev, pd); - return status; + _ocrdma_dealloc_pd(dev, pd); } static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index b69cfdce7970..1fd66721c930 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -70,9 +70,9 @@ int ocrdma_dealloc_ucontext(struct ib_ucontext *); int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma); -struct ib_pd *ocrdma_alloc_pd(struct ib_device *, - struct ib_ucontext *, struct ib_udata *); -int ocrdma_dealloc_pd(struct ib_pd *pd); +int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_ucontext *uctx, + struct ib_udata *udata); +void ocrdma_dealloc_pd(struct ib_pd *pd); struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 878e9e23652b..44ce4989dcef 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -239,6 +239,7 @@ static const struct ib_device_ops qedr_dev_ops = { .reg_user_mr = qedr_reg_user_mr, .req_notify_cq = qedr_arm_cq, .resize_cq = qedr_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd), }; static int qedr_register_device(struct qedr_dev *dev) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 989f08633fbe..a06d2258394a 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -450,11 +450,12 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) vma->vm_page_prot); } -struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, struct ib_udata *udata) +int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { + struct ib_device *ibdev = ibpd->device; struct qedr_dev *dev = get_qedr_dev(ibdev); - struct qedr_pd *pd; + struct qedr_pd *pd = get_qedr_pd(ibpd); u16 pd_id; int rc; @@ -463,16 +464,12 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, if (!dev->rdma_ctx) { DP_ERR(dev, "invalid RDMA context\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id); if (rc) - goto err; + return rc; pd->pd_id = pd_id; @@ -485,36 +482,23 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, if (rc) { DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id); dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id); - goto err; + return rc; } pd->uctx = get_qedr_ucontext(context); pd->uctx->pd = pd; } - return &pd->ibpd; - -err: - kfree(pd); - return ERR_PTR(rc); + return 0; } -int qedr_dealloc_pd(struct ib_pd *ibpd) +void qedr_dealloc_pd(struct ib_pd *ibpd) { struct qedr_dev *dev = get_qedr_dev(ibpd->device); struct qedr_pd *pd = get_qedr_pd(ibpd); - if (!pd) { - pr_err("Invalid PD received in dealloc_pd\n"); - return -EINVAL; - } - DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id); dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id); - - kfree(pd); - - return 0; } static void qedr_free_pbl(struct qedr_dev *dev, diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 1852b7012bf4..97a6ff3f9afb 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -47,9 +47,9 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *, struct ib_udata *); int qedr_dealloc_ucontext(struct ib_ucontext *); int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); -struct ib_pd *qedr_alloc_pd(struct ib_device *, - struct ib_ucontext *, struct ib_udata *); -int qedr_dealloc_pd(struct ib_pd *pd); +int qedr_alloc_pd(struct ib_pd *pd, struct ib_ucontext *uctx, + struct ib_udata *udata); +void qedr_dealloc_pd(struct ib_pd *pd); struct ib_cq *qedr_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 1ec155823716..256ad2f236c8 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -352,6 +352,7 @@ static const struct ib_device_ops usnic_dev_ops = { .query_port = usnic_ib_query_port, .query_qp = usnic_ib_query_qp, .reg_user_mr = usnic_ib_reg_mr, + INIT_RDMA_OBJ_SIZE(ib_pd, usnic_ib_pd, ibpd), }; /* Start of PF discovery section */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 9dea18106247..0ced89b51448 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -456,37 +456,23 @@ int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, return 0; } -struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct usnic_ib_pd *pd; + struct usnic_ib_pd *pd = to_upd(ibpd); void *umem_pd; - usnic_dbg("\n"); - - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - umem_pd = pd->umem_pd = usnic_uiom_alloc_pd(); if (IS_ERR_OR_NULL(umem_pd)) { - kfree(pd); - return ERR_PTR(umem_pd ? PTR_ERR(umem_pd) : -ENOMEM); + return umem_pd ? PTR_ERR(umem_pd) : -ENOMEM; } - usnic_info("domain 0x%p allocated for context 0x%p and device %s\n", - pd, context, dev_name(&ibdev->dev)); - return &pd->ibpd; + return 0; } -int usnic_ib_dealloc_pd(struct ib_pd *pd) +void usnic_ib_dealloc_pd(struct ib_pd *pd) { - usnic_info("freeing domain 0x%p\n", pd); - usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd); - kfree(pd); - return 0; } struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 99a6d81c2bcd..44a9d2f82bf5 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -51,10 +51,9 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, struct net_device *usnic_get_netdev(struct ib_device *device, u8 port_num); int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey); -struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata); -int usnic_ib_dealloc_pd(struct ib_pd *pd); +int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata); +void usnic_ib_dealloc_pd(struct ib_pd *pd); struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index e582beaf9430..47e653d2495c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -195,6 +195,7 @@ static const struct ib_device_ops pvrdma_dev_ops = { .query_qp = pvrdma_query_qp, .reg_user_mr = pvrdma_reg_user_mr, .req_notify_cq = pvrdma_req_notify_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd), }; static const struct ib_device_ops pvrdma_dev_srq_ops = { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index fafb2add3b44..f44220f72e05 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -438,37 +438,29 @@ int pvrdma_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) /** * pvrdma_alloc_pd - allocate protection domain - * @ibdev: the IB device + * @ibpd: PD pointer * @context: user context * @udata: user data * * @return: the ib_pd protection domain pointer on success, otherwise errno. */ -struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +int pvrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct pvrdma_pd *pd; + struct ib_device *ibdev = ibpd->device; + struct pvrdma_pd *pd = to_vpd(ibpd); struct pvrdma_dev *dev = to_vdev(ibdev); - union pvrdma_cmd_req req; - union pvrdma_cmd_resp rsp; + union pvrdma_cmd_req req = {}; + union pvrdma_cmd_resp rsp = {}; struct pvrdma_cmd_create_pd *cmd = &req.create_pd; struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp; struct pvrdma_alloc_pd_resp pd_resp = {0}; int ret; - void *ptr; /* Check allowed max pds */ if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd)) - return ERR_PTR(-ENOMEM); + return -ENOMEM; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) { - ptr = ERR_PTR(-ENOMEM); - goto err; - } - - memset(cmd, 0, sizeof(*cmd)); cmd->hdr.cmd = PVRDMA_CMD_CREATE_PD; cmd->ctx_handle = (context) ? to_vucontext(context)->ctx_handle : 0; ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_PD_RESP); @@ -476,8 +468,7 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, dev_warn(&dev->pdev->dev, "failed to allocate protection domain, error: %d\n", ret); - ptr = ERR_PTR(ret); - goto freepd; + goto err; } pd->privileged = !context; @@ -490,18 +481,16 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, dev_warn(&dev->pdev->dev, "failed to copy back protection domain\n"); pvrdma_dealloc_pd(&pd->ibpd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } /* u32 pd handle */ - return &pd->ibpd; + return 0; -freepd: - kfree(pd); err: atomic_dec(&dev->num_pds); - return ptr; + return ret; } /** @@ -510,14 +499,13 @@ err: * * @return: 0 on success, otherwise errno. */ -int pvrdma_dealloc_pd(struct ib_pd *pd) +void pvrdma_dealloc_pd(struct ib_pd *pd) { struct pvrdma_dev *dev = to_vdev(pd->device); - union pvrdma_cmd_req req; + union pvrdma_cmd_req req = {}; struct pvrdma_cmd_destroy_pd *cmd = &req.destroy_pd; int ret; - memset(cmd, 0, sizeof(*cmd)); cmd->hdr.cmd = PVRDMA_CMD_DESTROY_PD; cmd->pd_handle = to_vpd(pd)->pd_handle; @@ -527,10 +515,7 @@ int pvrdma_dealloc_pd(struct ib_pd *pd) "could not dealloc protection domain, error: %d\n", ret); - kfree(to_vpd(pd)); atomic_dec(&dev->num_pds); - - return 0; } /** diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index f7f758d60110..ed91baad1ffa 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -399,10 +399,9 @@ int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata); int pvrdma_dealloc_ucontext(struct ib_ucontext *context); -struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata); -int pvrdma_dealloc_pd(struct ib_pd *ibpd); +int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata); +void pvrdma_dealloc_pd(struct ib_pd *ibpd); struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index dcc1870b8d23..6033054b22fa 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -50,7 +50,7 @@ /** * rvt_alloc_pd - allocate a protection domain - * @ibdev: ib device + * @ibpd: PD * @context: optional user context * @udata: optional user data * @@ -58,19 +58,14 @@ * * Return: 0 on success */ -struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +int rvt_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { + struct ib_device *ibdev = ibpd->device; struct rvt_dev_info *dev = ib_to_rvt(ibdev); - struct rvt_pd *pd; - struct ib_pd *ret; + struct rvt_pd *pd = ibpd_to_rvtpd(ibpd); + int ret = 0; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } /* * While we could continue allocating protecetion domains, being * constrained only by system resources. The IBTA spec defines that @@ -81,8 +76,7 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, spin_lock(&dev->n_pds_lock); if (dev->n_pds_allocated == dev->dparms.props.max_pd) { spin_unlock(&dev->n_pds_lock); - kfree(pd); - ret = ERR_PTR(-ENOMEM); + ret = -ENOMEM; goto bail; } @@ -92,8 +86,6 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, /* ib_alloc_pd() will initialize pd->ibpd. */ pd->user = !!udata; - ret = &pd->ibpd; - bail: return ret; } @@ -104,16 +96,11 @@ bail: * * Return: always 0 */ -int rvt_dealloc_pd(struct ib_pd *ibpd) +void rvt_dealloc_pd(struct ib_pd *ibpd) { - struct rvt_pd *pd = ibpd_to_rvtpd(ibpd); struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); spin_lock(&dev->n_pds_lock); dev->n_pds_allocated--; spin_unlock(&dev->n_pds_lock); - - kfree(pd); - - return 0; } diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h index 1892ca4a9746..7a887e4a45e7 100644 --- a/drivers/infiniband/sw/rdmavt/pd.h +++ b/drivers/infiniband/sw/rdmavt/pd.h @@ -50,9 +50,8 @@ #include -struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata); -int rvt_dealloc_pd(struct ib_pd *ibpd); +int rvt_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata); +void rvt_dealloc_pd(struct ib_pd *ibpd); #endif /* DEF_RDMAVTPD_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index b3f0c5578925..a19832c73d5a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -436,6 +436,7 @@ static const struct ib_device_ops rvt_dev_ops = { .req_notify_cq = rvt_req_notify_cq, .resize_cq = rvt_resize_cq, .unmap_fmr = rvt_unmap_fmr, + INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd), }; static noinline int check_support(struct rvt_dev_info *rdi, int verb) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index b5c91df22047..cd3f14629ba8 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -46,6 +46,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_PD] = { .name = "rxe-pd", .size = sizeof(struct rxe_pd), + .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_AH] = { .name = "rxe-ah", @@ -119,8 +120,10 @@ static void rxe_cache_clean(size_t cnt) for (i = 0; i < cnt; i++) { type = &rxe_type_info[i]; - kmem_cache_destroy(type->cache); - type->cache = NULL; + if (!(type->flags & RXE_POOL_NO_ALLOC)) { + kmem_cache_destroy(type->cache); + type->cache = NULL; + } } } @@ -134,14 +137,17 @@ int rxe_cache_init(void) for (i = 0; i < RXE_NUM_TYPES; i++) { type = &rxe_type_info[i]; size = ALIGN(type->size, RXE_POOL_ALIGN); - type->cache = kmem_cache_create(type->name, size, - RXE_POOL_ALIGN, - RXE_POOL_CACHE_FLAGS, NULL); - if (!type->cache) { - pr_err("Unable to init kmem cache for %s\n", - type->name); - err = -ENOMEM; - goto err1; + if (!(type->flags & RXE_POOL_NO_ALLOC)) { + type->cache = + kmem_cache_create(type->name, size, + RXE_POOL_ALIGN, + RXE_POOL_CACHE_FLAGS, NULL); + if (!type->cache) { + pr_err("Unable to init kmem cache for %s\n", + type->name); + err = -ENOMEM; + goto err1; + } } } @@ -415,6 +421,37 @@ out_put_pool: return NULL; } +int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) +{ + unsigned long flags; + + might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); + + read_lock_irqsave(&pool->pool_lock, flags); + if (pool->state != RXE_POOL_STATE_VALID) { + read_unlock_irqrestore(&pool->pool_lock, flags); + return -EINVAL; + } + kref_get(&pool->ref_cnt); + read_unlock_irqrestore(&pool->pool_lock, flags); + + kref_get(&pool->rxe->ref_cnt); + + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) + goto out_put_pool; + + elem->pool = pool; + kref_init(&elem->ref_cnt); + + return 0; + +out_put_pool: + atomic_dec(&pool->num_elem); + rxe_dev_put(pool->rxe); + rxe_pool_put(pool); + return -EINVAL; +} + void rxe_elem_release(struct kref *kref) { struct rxe_pool_entry *elem = @@ -424,7 +461,8 @@ void rxe_elem_release(struct kref *kref) if (pool->cleanup) pool->cleanup(elem); - kmem_cache_free(pool_cache(pool), elem); + if (!(pool->flags & RXE_POOL_NO_ALLOC)) + kmem_cache_free(pool_cache(pool), elem); atomic_dec(&pool->num_elem); rxe_dev_put(pool->rxe); rxe_pool_put(pool); diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 72968c29e01f..2f2cff1cbe43 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -41,6 +41,7 @@ enum rxe_pool_flags { RXE_POOL_ATOMIC = BIT(0), RXE_POOL_INDEX = BIT(1), RXE_POOL_KEY = BIT(2), + RXE_POOL_NO_ALLOC = BIT(4), }; enum rxe_elem_type { @@ -131,6 +132,9 @@ void rxe_pool_cleanup(struct rxe_pool *pool); /* allocate an object from pool */ void *rxe_alloc(struct rxe_pool *pool); +/* connect already allocated object to pool */ +int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem); + /* assign an index to an indexed object and insert object into * pool's rb tree */ diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index cc5a05124ece..051c3930e808 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -191,23 +191,20 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num, return 0; } -static struct ib_pd *rxe_alloc_pd(struct ib_device *dev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct rxe_dev *rxe = to_rdev(dev); - struct rxe_pd *pd; + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); - pd = rxe_alloc(&rxe->pd_pool); - return pd ? &pd->ibpd : ERR_PTR(-ENOMEM); + return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem); } -static int rxe_dealloc_pd(struct ib_pd *ibpd) +static void rxe_dealloc_pd(struct ib_pd *ibpd) { struct rxe_pd *pd = to_rpd(ibpd); rxe_drop_ref(pd); - return 0; } static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, @@ -1183,6 +1180,7 @@ static const struct ib_device_ops rxe_dev_ops = { .reg_user_mr = rxe_reg_user_mr, .req_notify_cq = rxe_req_notify_cq, .resize_cq = rxe_resize_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd), }; int rxe_register_device(struct rxe_dev *rxe) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 74e04801d34d..70839d3f55d9 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -66,8 +66,8 @@ struct rxe_ucontext { }; struct rxe_pd { + struct ib_pd ibpd; struct rxe_pool_entry pelem; - struct ib_pd ibpd; }; struct rxe_ah { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e29eae4aec84..854d7816787c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2385,10 +2385,9 @@ struct ib_device_ops { int (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); - struct ib_pd *(*alloc_pd)(struct ib_device *device, - struct ib_ucontext *context, - struct ib_udata *udata); - int (*dealloc_pd)(struct ib_pd *pd); + int (*alloc_pd)(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata); + void (*dealloc_pd)(struct ib_pd *pd); struct ib_ah *(*create_ah)(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); @@ -2530,6 +2529,8 @@ struct ib_device_ops { */ int (*fill_res_entry)(struct sk_buff *msg, struct rdma_restrack_entry *entry); + + DECLARE_RDMA_OBJ_SIZE(ib_pd); }; struct ib_device { -- cgit v1.2.3-55-g7522 From e3593b568a68b0e1a434b80fd6eaebfb655e839d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 Feb 2019 22:41:47 -0700 Subject: RDMA/device: Check that the rename is nop under the lock Since another rename could be running in parallel it is safer to check that the name is not changing inside the lock, where we already know the device name will not change. Fixes: d21943dd19b5 ("RDMA/core: Implement IB device rename function") Signed-off-by: Jason Gunthorpe Reviewed-by: Parav Pandit --- drivers/infiniband/core/device.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 57e1e177921e..60083bde3e39 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -189,12 +189,14 @@ static struct ib_device *__ib_device_get_by_name(const char *name) int ib_device_rename(struct ib_device *ibdev, const char *name) { - int ret = 0; - - if (!strcmp(name, dev_name(&ibdev->dev))) - return ret; + int ret; mutex_lock(&device_mutex); + if (!strcmp(name, dev_name(&ibdev->dev))) { + ret = 0; + goto out; + } + if (__ib_device_get_by_name(name)) { ret = -EEXIST; goto out; -- cgit v1.2.3-55-g7522 From b34b269ad85d7dd4a512487f2395c3be3e40f76a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 Feb 2019 22:41:48 -0700 Subject: RDMA/device: Ensure that security memory is always freed Since this only frees memory it should be done during the release callback. Otherwise there are possible error flows where it might not get called if registration aborts. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 4 ++-- drivers/infiniband/core/device.c | 10 +++------- drivers/infiniband/core/security.c | 4 +--- 3 files changed, 6 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index d053110207eb..a1826f4c2e23 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -181,7 +181,7 @@ int ib_get_cached_subnet_prefix(struct ib_device *device, u64 *sn_pfx); #ifdef CONFIG_SECURITY_INFINIBAND -void ib_security_destroy_port_pkey_list(struct ib_device *device); +void ib_security_release_port_pkey_list(struct ib_device *device); void ib_security_cache_change(struct ib_device *device, u8 port_num, @@ -204,7 +204,7 @@ void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent); int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index); void ib_mad_agent_security_change(void); #else -static inline void ib_security_destroy_port_pkey_list(struct ib_device *device) +static inline void ib_security_release_port_pkey_list(struct ib_device *device) { } diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 60083bde3e39..b997feac2c63 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -253,6 +253,8 @@ static void ib_device_release(struct device *device) ib_cache_release_one(dev); kfree(dev->port_immutable); } + ib_security_release_port_pkey_list(dev); + kfree(dev->port_pkey_list); kfree(dev); } @@ -522,7 +524,6 @@ static void cleanup_device(struct ib_device *device) { ib_cache_cleanup_one(device); ib_cache_release_one(device); - kfree(device->port_pkey_list); kfree(device->port_immutable); } @@ -560,12 +561,10 @@ static int setup_device(struct ib_device *device) if (ret) { dev_warn(&device->dev, "Couldn't set up InfiniBand P_Key/GID cache\n"); - goto pkey_cleanup; + return ret; } return 0; -pkey_cleanup: - kfree(device->port_pkey_list); port_cleanup: kfree(device->port_immutable); return ret; @@ -682,9 +681,6 @@ void ib_unregister_device(struct ib_device *device) ib_cache_cleanup_one(device); - ib_security_destroy_port_pkey_list(device); - kfree(device->port_pkey_list); - down_write(&lists_rwsem); write_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry_safe(context, tmp, &device->client_data_list, diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index a70d2ba312ed..dad6a94a43f3 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -558,13 +558,12 @@ void ib_security_cache_change(struct ib_device *device, } } -void ib_security_destroy_port_pkey_list(struct ib_device *device) +void ib_security_release_port_pkey_list(struct ib_device *device) { struct pkey_index_qp_list *pkey, *tmp_pkey; int i; for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { - spin_lock(&device->port_pkey_list[i].list_lock); list_for_each_entry_safe(pkey, tmp_pkey, &device->port_pkey_list[i].pkey_list, @@ -572,7 +571,6 @@ void ib_security_destroy_port_pkey_list(struct ib_device *device) list_del(&pkey->pkey_index_list); kfree(pkey); } - spin_unlock(&device->port_pkey_list[i].list_lock); } } -- cgit v1.2.3-55-g7522 From d45f89d59bcd42d6b8575d0af69d7a3a98e73bb6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 Feb 2019 22:41:49 -0700 Subject: RDMA/device: Call ib_cache_release_one() only from ib_device_release() Instead of complicated logic about when this memory is freed, always free it during device release(). All the cache pointers start out as NULL, so it is safe to call this before the cache is initialized. This makes for a simpler error unwind flow, and a simpler understanding of the lifetime of the memory allocations inside the struct ib_device. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 3 +++ drivers/infiniband/core/device.c | 41 ++++++++++++---------------------------- 2 files changed, 15 insertions(+), 29 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 7b04590f307f..2338d0b3a0ca 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1460,6 +1460,9 @@ void ib_cache_release_one(struct ib_device *device) { int p; + if (!device->cache.ports) + return; + /* * The release function frees all the cache elements. * This function should be called as part of freeing diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b997feac2c63..872662a84b16 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -244,17 +244,10 @@ static void ib_device_release(struct device *device) struct ib_device *dev = container_of(device, struct ib_device, dev); WARN_ON(dev->reg_state == IB_DEV_REGISTERED); - if (dev->reg_state == IB_DEV_UNREGISTERED) { - /* - * In IB_DEV_UNINITIALIZED state, cache or port table - * is not even created. Free cache and port table only when - * device reaches UNREGISTERED state. - */ - ib_cache_release_one(dev); - kfree(dev->port_immutable); - } + ib_cache_release_one(dev); ib_security_release_port_pkey_list(dev); kfree(dev->port_pkey_list); + kfree(dev->port_immutable); kfree(dev); } @@ -520,13 +513,6 @@ static void setup_dma_device(struct ib_device *device) } } -static void cleanup_device(struct ib_device *device) -{ - ib_cache_cleanup_one(device); - ib_cache_release_one(device); - kfree(device->port_immutable); -} - static int setup_device(struct ib_device *device) { struct ib_udata uhw = {.outlen = 0, .inlen = 0}; @@ -548,26 +534,16 @@ static int setup_device(struct ib_device *device) if (ret) { dev_warn(&device->dev, "Couldn't query the device attributes\n"); - goto port_cleanup; + return ret; } ret = setup_port_pkey_list(device); if (ret) { dev_warn(&device->dev, "Couldn't create per port_pkey_list\n"); - goto port_cleanup; - } - - ret = ib_cache_setup_one(device); - if (ret) { - dev_warn(&device->dev, - "Couldn't set up InfiniBand P_Key/GID cache\n"); return ret; } - return 0; -port_cleanup: - kfree(device->port_immutable); - return ret; + return 0; } /** @@ -607,6 +583,13 @@ int ib_register_device(struct ib_device *device, const char *name) if (ret) goto out; + ret = ib_cache_setup_one(device); + if (ret) { + dev_warn(&device->dev, + "Couldn't set up InfiniBand P_Key/GID cache\n"); + goto out; + } + device->index = __dev_new_index(); ib_device_register_rdmacg(device); @@ -633,7 +616,7 @@ int ib_register_device(struct ib_device *device, const char *name) cg_cleanup: ib_device_unregister_rdmacg(device); - cleanup_device(device); + ib_cache_cleanup_one(device); out: mutex_unlock(&device_mutex); return ret; -- cgit v1.2.3-55-g7522 From 652432f33c01b2edaa5b2550b423cd894b1c7b9a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 Feb 2019 22:41:50 -0700 Subject: RDMA/device: Get rid of reg_state This really has no purpose anymore, refcount can be used to tell if the device is still registered. Keeping it around just invites mis-use. Signed-off-by: Jason Gunthorpe Reviewed-by: Parav Pandit --- drivers/infiniband/core/device.c | 8 ++------ include/rdma/ib_verbs.h | 6 ------ 2 files changed, 2 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 872662a84b16..1c54ded776d0 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -243,7 +243,7 @@ static void ib_device_release(struct device *device) { struct ib_device *dev = container_of(device, struct ib_device, dev); - WARN_ON(dev->reg_state == IB_DEV_REGISTERED); + WARN_ON(refcount_read(&dev->refcount)); ib_cache_release_one(dev); ib_security_release_port_pkey_list(dev); kfree(dev->port_pkey_list); @@ -316,8 +316,7 @@ EXPORT_SYMBOL(_ib_alloc_device); void ib_dealloc_device(struct ib_device *device) { WARN_ON(!list_empty(&device->client_data_list)); - WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && - device->reg_state != IB_DEV_UNINITIALIZED); + WARN_ON(refcount_read(&device->refcount)); rdma_restrack_clean(device); put_device(&device->dev); } @@ -602,7 +601,6 @@ int ib_register_device(struct ib_device *device, const char *name) } refcount_set(&device->refcount, 1); - device->reg_state = IB_DEV_REGISTERED; list_for_each_entry(client, &client_list, list) if (!add_client_context(device, client) && client->add) @@ -673,8 +671,6 @@ void ib_unregister_device(struct ib_device *device) } write_unlock_irqrestore(&device->client_data_lock, flags); up_write(&lists_rwsem); - - device->reg_state = IB_DEV_UNREGISTERED; } EXPORT_SYMBOL(ib_unregister_device); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 854d7816787c..d8ba987e8b29 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2572,12 +2572,6 @@ struct ib_device { struct kobject *ports_kobj; struct list_head port_list; - enum { - IB_DEV_UNINITIALIZED, - IB_DEV_REGISTERED, - IB_DEV_UNREGISTERED - } reg_state; - int uverbs_abi_ver; u64 uverbs_cmd_mask; u64 uverbs_ex_cmd_mask; -- cgit v1.2.3-55-g7522 From 3b88afd38e88d1bb2e900204ff0af7301a379a09 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 Feb 2019 22:41:51 -0700 Subject: RDMA/device: Use an ida instead of a free page in alloc_name ida is the proper data structure to hold list of clustered small integers and then allocate an unused integer. Get rid of the convoluted and limited open-coded bitmap. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 1c54ded776d0..3a80f96c2919 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -213,30 +213,36 @@ out: static int alloc_name(struct ib_device *ibdev, const char *name) { - unsigned long *inuse; struct ib_device *device; + struct ida inuse; + int rc; int i; - inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL); - if (!inuse) - return -ENOMEM; - + ida_init(&inuse); list_for_each_entry(device, &device_list, core_list) { char buf[IB_DEVICE_NAME_MAX]; if (sscanf(dev_name(&device->dev), name, &i) != 1) continue; - if (i < 0 || i >= PAGE_SIZE * 8) + if (i < 0 || i >= INT_MAX) continue; snprintf(buf, sizeof buf, name, i); - if (!strcmp(buf, dev_name(&device->dev))) - set_bit(i, inuse); + if (strcmp(buf, dev_name(&device->dev)) != 0) + continue; + + rc = ida_alloc_range(&inuse, i, i, GFP_KERNEL); + if (rc < 0) + goto out; } - i = find_first_zero_bit(inuse, PAGE_SIZE * 8); - free_page((unsigned long) inuse); + rc = ida_alloc(&inuse, GFP_KERNEL); + if (rc < 0) + goto out; - return dev_set_name(&ibdev->dev, name, i); + rc = dev_set_name(&ibdev->dev, name, rc); +out: + ida_destroy(&inuse); + return rc; } static void ib_device_release(struct device *device) -- cgit v1.2.3-55-g7522 From e59178d895afa29b671323f8265a1e50afe989e5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 Feb 2019 22:41:52 -0700 Subject: RDMA/devices: Use xarray to store the clients This gives each client a unique ID and will let us move client_data to use xarray, and revise the locking scheme. clients have to be add/removed in strict FIFO/LIFO order as they interdepend. To support this the client_ids are assigned to increase in FIFO order. The existing linked list is kept to support reverse iteration until xarray can get a reverse iteration API. Signed-off-by: Jason Gunthorpe Reviewed-by: Parav Pandit --- drivers/infiniband/core/device.c | 50 ++++++++++++++++++++++++++++++++++++---- include/rdma/ib_verbs.h | 3 ++- 2 files changed, 47 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 3a80f96c2919..f87d85659359 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -65,15 +65,17 @@ struct workqueue_struct *ib_comp_unbound_wq; struct workqueue_struct *ib_wq; EXPORT_SYMBOL_GPL(ib_wq); -/* The device_list and client_list contain devices and clients after their +/* The device_list and clients contain devices and clients after their * registration has completed, and the devices and clients are removed * during unregistration. */ static LIST_HEAD(device_list); static LIST_HEAD(client_list); +#define CLIENT_REGISTERED XA_MARK_1 +static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); /* * device_mutex and lists_rwsem protect access to both device_list and - * client_list. device_mutex protects writer access by device and client + * clients. device_mutex protects writer access by device and client * registration / de-registration. lists_rwsem protects reader access to * these lists. Iterators of these lists must lock it for read, while updates * to the lists must be done with a write lock. A special case is when the @@ -564,6 +566,7 @@ int ib_register_device(struct ib_device *device, const char *name) { int ret; struct ib_client *client; + unsigned long index; setup_dma_device(device); @@ -608,7 +611,7 @@ int ib_register_device(struct ib_device *device, const char *name) refcount_set(&device->refcount, 1); - list_for_each_entry(client, &client_list, list) + xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) if (!add_client_context(device, client) && client->add) client->add(device); @@ -680,6 +683,32 @@ void ib_unregister_device(struct ib_device *device) } EXPORT_SYMBOL(ib_unregister_device); +static int assign_client_id(struct ib_client *client) +{ + int ret; + + /* + * The add/remove callbacks must be called in FIFO/LIFO order. To + * achieve this we assign client_ids so they are sorted in + * registration order, and retain a linked list we can reverse iterate + * to get the LIFO order. The extra linked list can go away if xarray + * learns to reverse iterate. + */ + if (list_empty(&client_list)) + client->client_id = 0; + else + client->client_id = + list_last_entry(&client_list, struct ib_client, list) + ->client_id; + ret = xa_alloc(&clients, &client->client_id, INT_MAX, client, + GFP_KERNEL); + if (ret) + goto out; + +out: + return ret; +} + /** * ib_register_client - Register an IB client * @client:Client to register @@ -696,15 +725,21 @@ EXPORT_SYMBOL(ib_unregister_device); int ib_register_client(struct ib_client *client) { struct ib_device *device; + int ret; mutex_lock(&device_mutex); + ret = assign_client_id(client); + if (ret) { + mutex_unlock(&device_mutex); + return ret; + } list_for_each_entry(device, &device_list, core_list) if (!add_client_context(device, client) && client->add) client->add(device); down_write(&lists_rwsem); - list_add_tail(&client->list, &client_list); + xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); up_write(&lists_rwsem); mutex_unlock(&device_mutex); @@ -729,7 +764,7 @@ void ib_unregister_client(struct ib_client *client) mutex_lock(&device_mutex); down_write(&lists_rwsem); - list_del(&client->list); + xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED); up_write(&lists_rwsem); list_for_each_entry(device, &device_list, core_list) { @@ -765,6 +800,10 @@ void ib_unregister_client(struct ib_client *client) kfree(found_context); } + down_write(&lists_rwsem); + list_del(&client->list); + xa_erase(&clients, client->client_id); + up_write(&lists_rwsem); mutex_unlock(&device_mutex); } EXPORT_SYMBOL(ib_unregister_client); @@ -1422,6 +1461,7 @@ static void __exit ib_core_cleanup(void) destroy_workqueue(ib_comp_wq); /* Make sure that any pending umem accounting work is done. */ destroy_workqueue(ib_wq); + WARN_ON(!xa_empty(&clients)); } MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d8ba987e8b29..cc15820513cd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2610,7 +2610,7 @@ struct ib_device { }; struct ib_client { - char *name; + const char *name; void (*add) (struct ib_device *); void (*remove)(struct ib_device *, void *client_data); @@ -2637,6 +2637,7 @@ struct ib_client { const struct sockaddr *addr, void *client_data); struct list_head list; + u32 client_id; /* kverbs are not required by the client */ u8 no_kverbs_req:1; -- cgit v1.2.3-55-g7522 From 0df91bb67334eebaf73d4ba32567e16d55f4f116 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 Feb 2019 22:41:53 -0700 Subject: RDMA/devices: Use xarray to store the client_data Now that we have a small ID for each client we can use xarray instead of linearly searching linked lists for client data. This will give much faster and scalable client data lookup, and will lets us revise the locking scheme. Since xarray can store 'going_down' using a mark just entirely eliminate the struct ib_client_data and directly store the client_data value in the xarray. However this does require a special iterator as we must still iterate over any NULL client_data values. Also eliminate the client_data_lock in favour of internal xarray locking. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 348 +++++++++++++++++++-------------------- include/rdma/ib_verbs.h | 23 ++- 2 files changed, 186 insertions(+), 185 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f87d85659359..5096593b99e9 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -51,30 +51,72 @@ MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("core kernel InfiniBand API"); MODULE_LICENSE("Dual BSD/GPL"); -struct ib_client_data { - struct list_head list; - struct ib_client *client; - void * data; - /* The device or client is going down. Do not call client or device - * callbacks other than remove(). */ - bool going_down; -}; - struct workqueue_struct *ib_comp_wq; struct workqueue_struct *ib_comp_unbound_wq; struct workqueue_struct *ib_wq; EXPORT_SYMBOL_GPL(ib_wq); -/* The device_list and clients contain devices and clients after their - * registration has completed, and the devices and clients are removed - * during unregistration. */ -static LIST_HEAD(device_list); +/* + * devices contains devices that have had their names assigned. The + * devices may not be registered. Users that care about the registration + * status need to call ib_device_try_get() on the device to ensure it is + * registered, and keep it registered, for the required duration. + * + */ +static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); + +/* + * Note that if the *rwsem is held and the *_REGISTERED mark is seen then the + * object is guaranteed to be and remain registered for the duration of the + * lock. + */ +#define DEVICE_REGISTERED XA_MARK_1 + static LIST_HEAD(client_list); #define CLIENT_REGISTERED XA_MARK_1 static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); /* - * device_mutex and lists_rwsem protect access to both device_list and + * If client_data is registered then the corresponding client must also still + * be registered. + */ +#define CLIENT_DATA_REGISTERED XA_MARK_1 +/* + * xarray has this behavior where it won't iterate over NULL values stored in + * allocated arrays. So we need our own iterator to see all values stored in + * the array. This does the same thing as xa_for_each except that it also + * returns NULL valued entries if the array is allocating. Simplified to only + * work on simple xarrays. + */ +static void *xan_find_marked(struct xarray *xa, unsigned long *indexp, + xa_mark_t filter) +{ + XA_STATE(xas, xa, *indexp); + void *entry; + + rcu_read_lock(); + do { + entry = xas_find_marked(&xas, ULONG_MAX, filter); + if (xa_is_zero(entry)) + break; + } while (xas_retry(&xas, entry)); + rcu_read_unlock(); + + if (entry) { + *indexp = xas.xa_index; + if (xa_is_zero(entry)) + return NULL; + return entry; + } + return XA_ERROR(-ENOENT); +} +#define xan_for_each_marked(xa, index, entry, filter) \ + for (index = 0, entry = xan_find_marked(xa, &(index), filter); \ + !xa_is_err(entry); \ + (index)++, entry = xan_find_marked(xa, &(index), filter)) + +/* + * device_mutex and lists_rwsem protect access to both devices and * clients. device_mutex protects writer access by device and client * registration / de-registration. lists_rwsem protects reader access to * these lists. Iterators of these lists must lock it for read, while updates @@ -135,17 +177,6 @@ static int ib_device_check_mandatory(struct ib_device *device) return 0; } -static struct ib_device *__ib_device_get_by_index(u32 index) -{ - struct ib_device *device; - - list_for_each_entry(device, &device_list, core_list) - if (device->index == index) - return device; - - return NULL; -} - /* * Caller must perform ib_device_put() to return the device reference count * when ib_device_get_by_index() returns valid device pointer. @@ -155,7 +186,7 @@ struct ib_device *ib_device_get_by_index(u32 index) struct ib_device *device; down_read(&lists_rwsem); - device = __ib_device_get_by_index(index); + device = xa_load(&devices, index); if (device) { if (!ib_device_try_get(device)) device = NULL; @@ -181,8 +212,9 @@ EXPORT_SYMBOL(ib_device_put); static struct ib_device *__ib_device_get_by_name(const char *name) { struct ib_device *device; + unsigned long index; - list_for_each_entry(device, &device_list, core_list) + xa_for_each (&devices, index, device) if (!strcmp(name, dev_name(&device->dev))) return device; @@ -216,12 +248,13 @@ out: static int alloc_name(struct ib_device *ibdev, const char *name) { struct ib_device *device; + unsigned long index; struct ida inuse; int rc; int i; ida_init(&inuse); - list_for_each_entry(device, &device_list, core_list) { + xa_for_each (&devices, index, device) { char buf[IB_DEVICE_NAME_MAX]; if (sscanf(dev_name(&device->dev), name, &i) != 1) @@ -256,6 +289,7 @@ static void ib_device_release(struct device *device) ib_security_release_port_pkey_list(dev); kfree(dev->port_pkey_list); kfree(dev->port_immutable); + xa_destroy(&dev->client_data); kfree(dev); } @@ -306,8 +340,11 @@ struct ib_device *_ib_alloc_device(size_t size) INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->event_handler_lock); - rwlock_init(&device->client_data_lock); - INIT_LIST_HEAD(&device->client_data_list); + /* + * client_data needs to be alloc because we don't want our mark to be + * destroyed if the user stores NULL in the client data. + */ + xa_init_flags(&device->client_data, XA_FLAGS_ALLOC); INIT_LIST_HEAD(&device->port_list); init_completion(&device->unreg_completion); @@ -323,7 +360,7 @@ EXPORT_SYMBOL(_ib_alloc_device); */ void ib_dealloc_device(struct ib_device *device) { - WARN_ON(!list_empty(&device->client_data_list)); + WARN_ON(!xa_empty(&device->client_data)); WARN_ON(refcount_read(&device->refcount)); rdma_restrack_clean(device); put_device(&device->dev); @@ -332,26 +369,20 @@ EXPORT_SYMBOL(ib_dealloc_device); static int add_client_context(struct ib_device *device, struct ib_client *client) { - struct ib_client_data *context; + void *entry; if (!device->kverbs_provider && !client->no_kverbs_req) return -EOPNOTSUPP; - context = kmalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return -ENOMEM; - - context->client = client; - context->data = NULL; - context->going_down = false; - down_write(&lists_rwsem); - write_lock_irq(&device->client_data_lock); - list_add(&context->list, &device->client_data_list); - write_unlock_irq(&device->client_data_lock); + entry = xa_store(&device->client_data, client->client_id, NULL, + GFP_KERNEL); + if (!xa_is_err(entry)) + xa_set_mark(&device->client_data, client->client_id, + CLIENT_DATA_REGISTERED); up_write(&lists_rwsem); - return 0; + return xa_err(entry); } static int verify_immutable(const struct ib_device *dev, u8 port) @@ -428,9 +459,10 @@ static int setup_port_pkey_list(struct ib_device *device) static void ib_policy_change_task(struct work_struct *work) { struct ib_device *dev; + unsigned long index; down_read(&lists_rwsem); - list_for_each_entry(dev, &device_list, core_list) { + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { int i; for (i = rdma_start_port(dev); i <= rdma_end_port(dev); i++) { @@ -461,28 +493,48 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event, return NOTIFY_OK; } -/** - * __dev_new_index - allocate an device index - * - * Returns a suitable unique value for a new device interface - * number. It assumes that there are less than 2^32-1 ib devices - * will be present in the system. +/* + * Assign the unique string device name and the unique device index. */ -static u32 __dev_new_index(void) +static int assign_name(struct ib_device *device, const char *name) { - /* - * The device index to allow stable naming. - * Similar to struct net -> ifindex. - */ - static u32 index; + static u32 last_id; + int ret; - for (;;) { - if (!(++index)) - index = 1; + /* Assign a unique name to the device */ + if (strchr(name, '%')) + ret = alloc_name(device, name); + else + ret = dev_set_name(&device->dev, name); + if (ret) + goto out; + + if (__ib_device_get_by_name(dev_name(&device->dev))) { + ret = -ENFILE; + goto out; + } + strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); - if (!__ib_device_get_by_index(index)) - return index; + /* Cyclically allocate a user visible ID for the device */ + device->index = last_id; + ret = xa_alloc(&devices, &device->index, INT_MAX, device, GFP_KERNEL); + if (ret == -ENOSPC) { + device->index = 0; + ret = xa_alloc(&devices, &device->index, INT_MAX, device, + GFP_KERNEL); } + if (ret) + goto out; + last_id = device->index + 1; + + ret = 0; +out: + return ret; +} + +static void release_name(struct ib_device *device) +{ + xa_erase(&devices, device->index); } static void setup_dma_device(struct ib_device *device) @@ -572,34 +624,21 @@ int ib_register_device(struct ib_device *device, const char *name) mutex_lock(&device_mutex); - if (strchr(name, '%')) { - ret = alloc_name(device, name); - if (ret) - goto out; - } else { - ret = dev_set_name(&device->dev, name); - if (ret) - goto out; - } - if (__ib_device_get_by_name(dev_name(&device->dev))) { - ret = -ENFILE; + ret = assign_name(device, name); + if (ret) goto out; - } - strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); ret = setup_device(device); if (ret) - goto out; + goto out_name; ret = ib_cache_setup_one(device); if (ret) { dev_warn(&device->dev, "Couldn't set up InfiniBand P_Key/GID cache\n"); - goto out; + goto out_name; } - device->index = __dev_new_index(); - ib_device_register_rdmacg(device); ret = ib_device_register_sysfs(device); @@ -616,7 +655,7 @@ int ib_register_device(struct ib_device *device, const char *name) client->add(device); down_write(&lists_rwsem); - list_add_tail(&device->core_list, &device_list); + xa_set_mark(&devices, device->index, DEVICE_REGISTERED); up_write(&lists_rwsem); mutex_unlock(&device_mutex); return 0; @@ -624,6 +663,8 @@ int ib_register_device(struct ib_device *device, const char *name) cg_cleanup: ib_device_unregister_rdmacg(device); ib_cache_cleanup_one(device); +out_name: + release_name(device); out: mutex_unlock(&device_mutex); return ret; @@ -638,8 +679,8 @@ EXPORT_SYMBOL(ib_register_device); */ void ib_unregister_device(struct ib_device *device) { - struct ib_client_data *context, *tmp; - unsigned long flags; + struct ib_client *client; + unsigned long index; /* * Wait for all netlink command callers to finish working on the @@ -651,34 +692,31 @@ void ib_unregister_device(struct ib_device *device) mutex_lock(&device_mutex); down_write(&lists_rwsem); - list_del(&device->core_list); - write_lock_irq(&device->client_data_lock); - list_for_each_entry(context, &device->client_data_list, list) - context->going_down = true; - write_unlock_irq(&device->client_data_lock); + xa_clear_mark(&devices, device->index, DEVICE_REGISTERED); + xa_for_each (&clients, index, client) + xa_clear_mark(&device->client_data, index, + CLIENT_DATA_REGISTERED); downgrade_write(&lists_rwsem); - list_for_each_entry(context, &device->client_data_list, list) { - if (context->client->remove) - context->client->remove(device, context->data); - } + list_for_each_entry_reverse(client, &client_list, list) + if (xa_get_mark(&device->client_data, client->client_id, + CLIENT_DATA_REGISTERED) && + client->remove) + client->remove(device, xa_load(&device->client_data, + client->client_id)); up_read(&lists_rwsem); ib_device_unregister_sysfs(device); ib_device_unregister_rdmacg(device); + release_name(device); + mutex_unlock(&device_mutex); ib_cache_cleanup_one(device); down_write(&lists_rwsem); - write_lock_irqsave(&device->client_data_lock, flags); - list_for_each_entry_safe(context, tmp, &device->client_data_list, - list) { - list_del(&context->list); - kfree(context); - } - write_unlock_irqrestore(&device->client_data_lock, flags); + xa_destroy(&device->client_data); up_write(&lists_rwsem); } EXPORT_SYMBOL(ib_unregister_device); @@ -725,6 +763,7 @@ out: int ib_register_client(struct ib_client *client) { struct ib_device *device; + unsigned long index; int ret; mutex_lock(&device_mutex); @@ -734,7 +773,7 @@ int ib_register_client(struct ib_client *client) return ret; } - list_for_each_entry(device, &device_list, core_list) + xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) if (!add_client_context(device, client) && client->add) client->add(device); @@ -758,8 +797,8 @@ EXPORT_SYMBOL(ib_register_client); */ void ib_unregister_client(struct ib_client *client) { - struct ib_client_data *context; struct ib_device *device; + unsigned long index; mutex_lock(&device_mutex); @@ -767,37 +806,19 @@ void ib_unregister_client(struct ib_client *client) xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED); up_write(&lists_rwsem); - list_for_each_entry(device, &device_list, core_list) { - struct ib_client_data *found_context = NULL; - + xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { down_write(&lists_rwsem); - write_lock_irq(&device->client_data_lock); - list_for_each_entry(context, &device->client_data_list, list) - if (context->client == client) { - context->going_down = true; - found_context = context; - break; - } - write_unlock_irq(&device->client_data_lock); + xa_clear_mark(&device->client_data, client->client_id, + CLIENT_DATA_REGISTERED); up_write(&lists_rwsem); if (client->remove) - client->remove(device, found_context ? - found_context->data : NULL); - - if (!found_context) { - dev_warn(&device->dev, - "No client context found for %s\n", - client->name); - continue; - } + client->remove(device, xa_load(&device->client_data, + client->client_id)); down_write(&lists_rwsem); - write_lock_irq(&device->client_data_lock); - list_del(&found_context->list); - write_unlock_irq(&device->client_data_lock); + xa_erase(&device->client_data, client->client_id); up_write(&lists_rwsem); - kfree(found_context); } down_write(&lists_rwsem); @@ -808,59 +829,28 @@ void ib_unregister_client(struct ib_client *client) } EXPORT_SYMBOL(ib_unregister_client); -/** - * ib_get_client_data - Get IB client context - * @device:Device to get context for - * @client:Client to get context for - * - * ib_get_client_data() returns client context set with - * ib_set_client_data(). - */ -void *ib_get_client_data(struct ib_device *device, struct ib_client *client) -{ - struct ib_client_data *context; - void *ret = NULL; - unsigned long flags; - - read_lock_irqsave(&device->client_data_lock, flags); - list_for_each_entry(context, &device->client_data_list, list) - if (context->client == client) { - ret = context->data; - break; - } - read_unlock_irqrestore(&device->client_data_lock, flags); - - return ret; -} -EXPORT_SYMBOL(ib_get_client_data); - /** * ib_set_client_data - Set IB client context * @device:Device to set context for * @client:Client to set context for * @data:Context to set * - * ib_set_client_data() sets client context that can be retrieved with - * ib_get_client_data(). + * ib_set_client_data() sets client context data that can be retrieved with + * ib_get_client_data(). This can only be called while the client is + * registered to the device, once the ib_client remove() callback returns this + * cannot be called. */ void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data) { - struct ib_client_data *context; - unsigned long flags; - - write_lock_irqsave(&device->client_data_lock, flags); - list_for_each_entry(context, &device->client_data_list, list) - if (context->client == client) { - context->data = data; - goto out; - } + void *rc; - dev_warn(&device->dev, "No client context found for %s\n", - client->name); + if (WARN_ON(IS_ERR(data))) + data = NULL; -out: - write_unlock_irqrestore(&device->client_data_lock, flags); + rc = xa_store(&device->client_data, client->client_id, data, + GFP_KERNEL); + WARN_ON(xa_is_err(rc)); } EXPORT_SYMBOL(ib_set_client_data); @@ -1018,9 +1008,10 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, void *cookie) { struct ib_device *dev; + unsigned long index; down_read(&lists_rwsem); - list_for_each_entry(dev, &device_list, core_list) + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); up_read(&lists_rwsem); } @@ -1034,12 +1025,13 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, struct netlink_callback *cb) { + unsigned long index; struct ib_device *dev; unsigned int idx = 0; int ret = 0; down_read(&lists_rwsem); - list_for_each_entry(dev, &device_list, core_list) { + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { ret = nldev_cb(dev, skb, cb, idx); if (ret) break; @@ -1212,26 +1204,25 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, const struct sockaddr *addr) { struct net_device *net_dev = NULL; - struct ib_client_data *context; + unsigned long index; + void *client_data; if (!rdma_protocol_ib(dev, port)) return NULL; down_read(&lists_rwsem); - list_for_each_entry(context, &dev->client_data_list, list) { - struct ib_client *client = context->client; + xan_for_each_marked (&dev->client_data, index, client_data, + CLIENT_DATA_REGISTERED) { + struct ib_client *client = xa_load(&clients, index); - if (context->going_down) + if (!client || !client->get_net_dev_by_params) continue; - if (client->get_net_dev_by_params) { - net_dev = client->get_net_dev_by_params(dev, port, pkey, - gid, addr, - context->data); - if (net_dev) - break; - } + net_dev = client->get_net_dev_by_params(dev, port, pkey, gid, + addr, client_data); + if (net_dev) + break; } up_read(&lists_rwsem); @@ -1462,6 +1453,7 @@ static void __exit ib_core_cleanup(void) /* Make sure that any pending umem accounting work is done. */ destroy_workqueue(ib_wq); WARN_ON(!xa_empty(&clients)); + WARN_ON(!xa_empty(&devices)); } MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index cc15820513cd..8558f31ca46f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2542,12 +2542,7 @@ struct ib_device { struct list_head event_handler_list; spinlock_t event_handler_lock; - rwlock_t client_data_lock; - struct list_head core_list; - /* Access to the client_data_list is protected by the client_data_lock - * rwlock and the lists_rwsem read-write semaphore - */ - struct list_head client_data_list; + struct xarray client_data; struct ib_cache cache; /** @@ -2660,7 +2655,21 @@ void ib_unregister_device(struct ib_device *device); int ib_register_client (struct ib_client *client); void ib_unregister_client(struct ib_client *client); -void *ib_get_client_data(struct ib_device *device, struct ib_client *client); +/** + * ib_get_client_data - Get IB client context + * @device:Device to get context for + * @client:Client to get context for + * + * ib_get_client_data() returns the client context data set with + * ib_set_client_data(). This can only be called while the client is + * registered to the device, once the ib_client remove() callback returns this + * cannot be called. + */ +static inline void *ib_get_client_data(struct ib_device *device, + struct ib_client *client) +{ + return xa_load(&device->client_data, client->client_id); +} void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data); void ib_set_device_ops(struct ib_device *device, -- cgit v1.2.3-55-g7522 From 921eab1143aadf976a42cac4605b4d35159b355d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 Feb 2019 22:41:54 -0700 Subject: RDMA/devices: Re-organize device.c locking The locking here started out with a single lock that covered everything and then has lately veered into crazy town. The fundamental problem is that several places need to iterate over a linked list, but also need to drop their locks to avoid deadlock during client callbacks. xarray's restartable iteration offers a simple solution to the problem. Once all the lists are xarrays we can drop locks in the places that need that and rely on xarray to provide consistency and locking for the data structure. The resulting simplification is that each of the three lists has a dedicated rwsem that must be held when working with the list it covers. One data structure is no longer covered by multiple locks. The sleeping semaphore is selected because the read side generally needs to be held over something sleeping, and using RCU reader locking in those cases is overkill. In the process this simplifies the entire registration/unregistration flow to be the expected list of setups and the reversed list of matching teardowns, and the registration lock 'refcount' can now be revised to be released after the ULPs are removed, providing a very sane semantic for this feature. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 361 ++++++++++++++++++++++++--------------- include/rdma/ib_verbs.h | 1 + 2 files changed, 222 insertions(+), 140 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 5096593b99e9..3325be4f91a5 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -56,6 +55,29 @@ struct workqueue_struct *ib_comp_unbound_wq; struct workqueue_struct *ib_wq; EXPORT_SYMBOL_GPL(ib_wq); +/* + * Each of the three rwsem locks (devices, clients, client_data) protects the + * xarray of the same name. Specifically it allows the caller to assert that + * the MARK will/will not be changing under the lock, and for devices and + * clients, that the value in the xarray is still a valid pointer. Change of + * the MARK is linked to the object state, so holding the lock and testing the + * MARK also asserts that the contained object is in a certain state. + * + * This is used to build a two stage register/unregister flow where objects + * can continue to be in the xarray even though they are still in progress to + * register/unregister. + * + * The xarray itself provides additional locking, and restartable iteration, + * which is also relied on. + * + * Locks should not be nested, with the exception of client_data, which is + * allowed to nest under the read side of the other two locks. + * + * The devices_rwsem also protects the device name list, any change or + * assignment of device name must also hold the write side to guarantee unique + * names. + */ + /* * devices contains devices that have had their names assigned. The * devices may not be registered. Users that care about the registration @@ -64,17 +86,13 @@ EXPORT_SYMBOL_GPL(ib_wq); * */ static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); - -/* - * Note that if the *rwsem is held and the *_REGISTERED mark is seen then the - * object is guaranteed to be and remain registered for the duration of the - * lock. - */ +static DECLARE_RWSEM(devices_rwsem); #define DEVICE_REGISTERED XA_MARK_1 static LIST_HEAD(client_list); #define CLIENT_REGISTERED XA_MARK_1 static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); +static DECLARE_RWSEM(clients_rwsem); /* * If client_data is registered then the corresponding client must also still @@ -115,20 +133,6 @@ static void *xan_find_marked(struct xarray *xa, unsigned long *indexp, !xa_is_err(entry); \ (index)++, entry = xan_find_marked(xa, &(index), filter)) -/* - * device_mutex and lists_rwsem protect access to both devices and - * clients. device_mutex protects writer access by device and client - * registration / de-registration. lists_rwsem protects reader access to - * these lists. Iterators of these lists must lock it for read, while updates - * to the lists must be done with a write lock. A special case is when the - * device_mutex is locked. In this case locking the lists for read access is - * not necessary as the device_mutex implies it. - * - * lists_rwsem also protects access to the client data list. - */ -static DEFINE_MUTEX(device_mutex); -static DECLARE_RWSEM(lists_rwsem); - static int ib_security_change(struct notifier_block *nb, unsigned long event, void *lsm_data); static void ib_policy_change_task(struct work_struct *work); @@ -185,13 +189,13 @@ struct ib_device *ib_device_get_by_index(u32 index) { struct ib_device *device; - down_read(&lists_rwsem); + down_read(&devices_rwsem); device = xa_load(&devices, index); if (device) { if (!ib_device_try_get(device)) device = NULL; } - up_read(&lists_rwsem); + up_read(&devices_rwsem); return device; } @@ -225,7 +229,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) { int ret; - mutex_lock(&device_mutex); + down_write(&devices_rwsem); if (!strcmp(name, dev_name(&ibdev->dev))) { ret = 0; goto out; @@ -241,7 +245,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) goto out; strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); out: - mutex_unlock(&device_mutex); + up_write(&devices_rwsem); return ret; } @@ -253,6 +257,7 @@ static int alloc_name(struct ib_device *ibdev, const char *name) int rc; int i; + lockdep_assert_held_exclusive(&devices_rwsem); ida_init(&inuse); xa_for_each (&devices, index, device) { char buf[IB_DEVICE_NAME_MAX]; @@ -345,6 +350,7 @@ struct ib_device *_ib_alloc_device(size_t size) * destroyed if the user stores NULL in the client data. */ xa_init_flags(&device->client_data, XA_FLAGS_ALLOC); + init_rwsem(&device->client_data_rwsem); INIT_LIST_HEAD(&device->port_list); init_completion(&device->unreg_completion); @@ -367,22 +373,86 @@ void ib_dealloc_device(struct ib_device *device) } EXPORT_SYMBOL(ib_dealloc_device); -static int add_client_context(struct ib_device *device, struct ib_client *client) +/* + * add_client_context() and remove_client_context() must be safe against + * parallel calls on the same device - registration/unregistration of both the + * device and client can be occurring in parallel. + * + * The routines need to be a fence, any caller must not return until the add + * or remove is fully completed. + */ +static int add_client_context(struct ib_device *device, + struct ib_client *client) { - void *entry; + int ret = 0; if (!device->kverbs_provider && !client->no_kverbs_req) - return -EOPNOTSUPP; + return 0; + + down_write(&device->client_data_rwsem); + /* + * Another caller to add_client_context got here first and has already + * completely initialized context. + */ + if (xa_get_mark(&device->client_data, client->client_id, + CLIENT_DATA_REGISTERED)) + goto out; + + ret = xa_err(xa_store(&device->client_data, client->client_id, NULL, + GFP_KERNEL)); + if (ret) + goto out; + downgrade_write(&device->client_data_rwsem); + if (client->add) + client->add(device); + + /* Readers shall not see a client until add has been completed */ + xa_set_mark(&device->client_data, client->client_id, + CLIENT_DATA_REGISTERED); + up_read(&device->client_data_rwsem); + return 0; + +out: + up_write(&device->client_data_rwsem); + return ret; +} + +static void remove_client_context(struct ib_device *device, + unsigned int client_id) +{ + struct ib_client *client; + void *client_data; - down_write(&lists_rwsem); - entry = xa_store(&device->client_data, client->client_id, NULL, - GFP_KERNEL); - if (!xa_is_err(entry)) - xa_set_mark(&device->client_data, client->client_id, - CLIENT_DATA_REGISTERED); - up_write(&lists_rwsem); + down_write(&device->client_data_rwsem); + if (!xa_get_mark(&device->client_data, client_id, + CLIENT_DATA_REGISTERED)) { + up_write(&device->client_data_rwsem); + return; + } + client_data = xa_load(&device->client_data, client_id); + xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED); + client = xa_load(&clients, client_id); + downgrade_write(&device->client_data_rwsem); - return xa_err(entry); + /* + * Notice we cannot be holding any exclusive locks when calling the + * remove callback as the remove callback can recurse back into any + * public functions in this module and thus try for any locks those + * functions take. + * + * For this reason clients and drivers should not call the + * unregistration functions will holdling any locks. + * + * It tempting to drop the client_data_rwsem too, but this is required + * to ensure that unregister_client does not return until all clients + * are completely unregistered, which is required to avoid module + * unloading races. + */ + if (client->remove) + client->remove(device, client_data); + + xa_erase(&device->client_data, client_id); + up_read(&device->client_data_rwsem); } static int verify_immutable(const struct ib_device *dev, u8 port) @@ -461,7 +531,7 @@ static void ib_policy_change_task(struct work_struct *work) struct ib_device *dev; unsigned long index; - down_read(&lists_rwsem); + down_read(&devices_rwsem); xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { int i; @@ -478,7 +548,7 @@ static void ib_policy_change_task(struct work_struct *work) ib_security_cache_change(dev, i, sp); } } - up_read(&lists_rwsem); + up_read(&devices_rwsem); } static int ib_security_change(struct notifier_block *nb, unsigned long event, @@ -501,6 +571,7 @@ static int assign_name(struct ib_device *device, const char *name) static u32 last_id; int ret; + down_write(&devices_rwsem); /* Assign a unique name to the device */ if (strchr(name, '%')) ret = alloc_name(device, name); @@ -528,13 +599,17 @@ static int assign_name(struct ib_device *device, const char *name) last_id = device->index + 1; ret = 0; + out: + up_write(&devices_rwsem); return ret; } static void release_name(struct ib_device *device) { + down_write(&devices_rwsem); xa_erase(&devices, device->index); + up_write(&devices_rwsem); } static void setup_dma_device(struct ib_device *device) @@ -572,11 +647,18 @@ static void setup_dma_device(struct ib_device *device) } } +/* + * setup_device() allocates memory and sets up data that requires calling the + * device ops, this is the only reason these actions are not done during + * ib_alloc_device. It is undone by ib_dealloc_device(). + */ static int setup_device(struct ib_device *device) { struct ib_udata uhw = {.outlen = 0, .inlen = 0}; int ret; + setup_dma_device(device); + ret = ib_device_check_mandatory(device); if (ret) return ret; @@ -605,6 +687,54 @@ static int setup_device(struct ib_device *device) return 0; } +static void disable_device(struct ib_device *device) +{ + struct ib_client *client; + + WARN_ON(!refcount_read(&device->refcount)); + + down_write(&devices_rwsem); + xa_clear_mark(&devices, device->index, DEVICE_REGISTERED); + up_write(&devices_rwsem); + + down_read(&clients_rwsem); + list_for_each_entry_reverse(client, &client_list, list) + remove_client_context(device, client->client_id); + up_read(&clients_rwsem); + + /* Pairs with refcount_set in enable_device */ + ib_device_put(device); + wait_for_completion(&device->unreg_completion); +} + +/* + * An enabled device is visible to all clients and to all the public facing + * APIs that return a device pointer. + */ +static int enable_device(struct ib_device *device) +{ + struct ib_client *client; + unsigned long index; + int ret; + + refcount_set(&device->refcount, 1); + down_write(&devices_rwsem); + xa_set_mark(&devices, device->index, DEVICE_REGISTERED); + up_write(&devices_rwsem); + + down_read(&clients_rwsem); + xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { + ret = add_client_context(device, client); + if (ret) { + up_read(&clients_rwsem); + disable_device(device); + return ret; + } + } + up_read(&clients_rwsem); + return 0; +} + /** * ib_register_device - Register an IB device with IB core * @device:Device to register @@ -617,26 +747,20 @@ static int setup_device(struct ib_device *device) int ib_register_device(struct ib_device *device, const char *name) { int ret; - struct ib_client *client; - unsigned long index; - - setup_dma_device(device); - - mutex_lock(&device_mutex); ret = assign_name(device, name); if (ret) - goto out; + return ret; ret = setup_device(device); if (ret) - goto out_name; + goto out; ret = ib_cache_setup_one(device); if (ret) { dev_warn(&device->dev, "Couldn't set up InfiniBand P_Key/GID cache\n"); - goto out_name; + goto out; } ib_device_register_rdmacg(device); @@ -648,25 +772,19 @@ int ib_register_device(struct ib_device *device, const char *name) goto cg_cleanup; } - refcount_set(&device->refcount, 1); - - xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) - if (!add_client_context(device, client) && client->add) - client->add(device); + ret = enable_device(device); + if (ret) + goto sysfs_cleanup; - down_write(&lists_rwsem); - xa_set_mark(&devices, device->index, DEVICE_REGISTERED); - up_write(&lists_rwsem); - mutex_unlock(&device_mutex); return 0; +sysfs_cleanup: + ib_device_unregister_sysfs(device); cg_cleanup: ib_device_unregister_rdmacg(device); ib_cache_cleanup_one(device); -out_name: - release_name(device); out: - mutex_unlock(&device_mutex); + release_name(device); return ret; } EXPORT_SYMBOL(ib_register_device); @@ -679,45 +797,11 @@ EXPORT_SYMBOL(ib_register_device); */ void ib_unregister_device(struct ib_device *device) { - struct ib_client *client; - unsigned long index; - - /* - * Wait for all netlink command callers to finish working on the - * device. - */ - ib_device_put(device); - wait_for_completion(&device->unreg_completion); - - mutex_lock(&device_mutex); - - down_write(&lists_rwsem); - xa_clear_mark(&devices, device->index, DEVICE_REGISTERED); - xa_for_each (&clients, index, client) - xa_clear_mark(&device->client_data, index, - CLIENT_DATA_REGISTERED); - downgrade_write(&lists_rwsem); - - list_for_each_entry_reverse(client, &client_list, list) - if (xa_get_mark(&device->client_data, client->client_id, - CLIENT_DATA_REGISTERED) && - client->remove) - client->remove(device, xa_load(&device->client_data, - client->client_id)); - up_read(&lists_rwsem); - + disable_device(device); ib_device_unregister_sysfs(device); ib_device_unregister_rdmacg(device); - - release_name(device); - - mutex_unlock(&device_mutex); - ib_cache_cleanup_one(device); - - down_write(&lists_rwsem); - xa_destroy(&device->client_data); - up_write(&lists_rwsem); + release_name(device); } EXPORT_SYMBOL(ib_unregister_device); @@ -725,6 +809,7 @@ static int assign_client_id(struct ib_client *client) { int ret; + down_write(&clients_rwsem); /* * The add/remove callbacks must be called in FIFO/LIFO order. To * achieve this we assign client_ids so they are sorted in @@ -743,7 +828,11 @@ static int assign_client_id(struct ib_client *client) if (ret) goto out; + xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); + list_add_tail(&client->list, &client_list); + out: + up_write(&clients_rwsem); return ret; } @@ -766,23 +855,20 @@ int ib_register_client(struct ib_client *client) unsigned long index; int ret; - mutex_lock(&device_mutex); ret = assign_client_id(client); - if (ret) { - mutex_unlock(&device_mutex); + if (ret) return ret; - } - - xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) - if (!add_client_context(device, client) && client->add) - client->add(device); - - down_write(&lists_rwsem); - xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); - up_write(&lists_rwsem); - - mutex_unlock(&device_mutex); + down_read(&devices_rwsem); + xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { + ret = add_client_context(device, client); + if (ret) { + up_read(&devices_rwsem); + ib_unregister_client(client); + return ret; + } + } + up_read(&devices_rwsem); return 0; } EXPORT_SYMBOL(ib_register_client); @@ -794,38 +880,31 @@ EXPORT_SYMBOL(ib_register_client); * Upper level users use ib_unregister_client() to remove their client * registration. When ib_unregister_client() is called, the client * will receive a remove callback for each IB device still registered. + * + * This is a full fence, once it returns no client callbacks will be called, + * or are running in another thread. */ void ib_unregister_client(struct ib_client *client) { struct ib_device *device; unsigned long index; - mutex_lock(&device_mutex); - - down_write(&lists_rwsem); + down_write(&clients_rwsem); xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED); - up_write(&lists_rwsem); - - xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { - down_write(&lists_rwsem); - xa_clear_mark(&device->client_data, client->client_id, - CLIENT_DATA_REGISTERED); - up_write(&lists_rwsem); - - if (client->remove) - client->remove(device, xa_load(&device->client_data, - client->client_id)); - - down_write(&lists_rwsem); - xa_erase(&device->client_data, client->client_id); - up_write(&lists_rwsem); - } + up_write(&clients_rwsem); + /* + * Every device still known must be serialized to make sure we are + * done with the client callbacks before we return. + */ + down_read(&devices_rwsem); + xa_for_each (&devices, index, device) + remove_client_context(device, client->client_id); + up_read(&devices_rwsem); - down_write(&lists_rwsem); + down_write(&clients_rwsem); list_del(&client->list); xa_erase(&clients, client->client_id); - up_write(&lists_rwsem); - mutex_unlock(&device_mutex); + up_write(&clients_rwsem); } EXPORT_SYMBOL(ib_unregister_client); @@ -1010,10 +1089,10 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, struct ib_device *dev; unsigned long index; - down_read(&lists_rwsem); + down_read(&devices_rwsem); xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); - up_read(&lists_rwsem); + up_read(&devices_rwsem); } /** @@ -1030,15 +1109,14 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, unsigned int idx = 0; int ret = 0; - down_read(&lists_rwsem); + down_read(&devices_rwsem); xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { ret = nldev_cb(dev, skb, cb, idx); if (ret) break; idx++; } - - up_read(&lists_rwsem); + up_read(&devices_rwsem); return ret; } @@ -1196,6 +1274,7 @@ EXPORT_SYMBOL(ib_find_pkey); * @gid: A GID that the net_dev uses to communicate. * @addr: Contains the IP address that the request specified as its * destination. + * */ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, @@ -1210,8 +1289,11 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, if (!rdma_protocol_ib(dev, port)) return NULL; - down_read(&lists_rwsem); - + /* + * Holding the read side guarantees that the client will not become + * unregistered while we are calling get_net_dev_by_params() + */ + down_read(&dev->client_data_rwsem); xan_for_each_marked (&dev->client_data, index, client_data, CLIENT_DATA_REGISTERED) { struct ib_client *client = xa_load(&clients, index); @@ -1224,8 +1306,7 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, if (net_dev) break; } - - up_read(&lists_rwsem); + up_read(&dev->client_data_rwsem); return net_dev; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8558f31ca46f..135fab2c016c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2542,6 +2542,7 @@ struct ib_device { struct list_head event_handler_list; spinlock_t event_handler_lock; + struct rw_semaphore client_data_rwsem; struct xarray client_data; struct ib_cache cache; -- cgit v1.2.3-55-g7522 From e155755e53804e721c8ce99474cc9c65eb8e8bc2 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 13 Feb 2019 19:23:03 +0200 Subject: RDMA/core: Use simpler device_del() instead of device_unregister() Instead of holding extra reference using get_device() that device_unregister() releases, simplify it as below. device_add() balances with device_del(). device_initialize() balances with put_device(), always via ib_dealloc_device(). Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/sysfs.c | 7 ++----- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 3325be4f91a5..46d237ce83de 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -369,6 +369,7 @@ void ib_dealloc_device(struct ib_device *device) WARN_ON(!xa_empty(&device->client_data)); WARN_ON(refcount_read(&device->refcount)); rdma_restrack_clean(device); + /* Balances with device_initialize */ put_device(&device->dev); } EXPORT_SYMBOL(ib_dealloc_device); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index c75692802da8..e04f111fe406 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1351,15 +1351,12 @@ err: void ib_device_unregister_sysfs(struct ib_device *device) { - /* Hold device until ib_dealloc_device() */ - get_device(&device->dev); - free_port_list_attributes(device); if (device->hw_stats) { kfree(device->hw_stats); free_hsag(&device->dev.kobj, device->hw_stats_ag); } - - device_unregister(&device->dev); + /* Balances with device_add */ + device_del(&device->dev); } -- cgit v1.2.3-55-g7522 From 5f8f5499005c51656645a011bca81ffb66fcaaca Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 13 Feb 2019 19:23:06 +0200 Subject: RDMA/core: Move device addition deletion to device.c Move core device addition and removal from sysfs.c to device.c as device.c is more appropriate place for device management. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 2 ++ drivers/infiniband/core/device.c | 11 ++++++++++- drivers/infiniband/core/sysfs.c | 14 ++------------ 3 files changed, 14 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index a1826f4c2e23..eeabe9ca8427 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -54,6 +54,8 @@ struct pkey_index_qp_list { struct list_head qp_list; }; +extern const struct attribute_group ib_dev_attr_group; + int ib_device_register_sysfs(struct ib_device *device); void ib_device_unregister_sysfs(struct ib_device *device); int ib_device_rename(struct ib_device *ibdev, const char *name); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 46d237ce83de..3eddc6e67a16 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -341,6 +341,8 @@ struct ib_device *_ib_alloc_device(size_t size) rdma_restrack_init(device); device->dev.class = &ib_class; + device->groups[0] = &ib_dev_attr_group; + device->dev.groups = device->groups; device_initialize(&device->dev); INIT_LIST_HEAD(&device->event_handler_list); @@ -766,11 +768,15 @@ int ib_register_device(struct ib_device *device, const char *name) ib_device_register_rdmacg(device); + ret = device_add(&device->dev); + if (ret) + goto cg_cleanup; + ret = ib_device_register_sysfs(device); if (ret) { dev_warn(&device->dev, "Couldn't register device with driver model\n"); - goto cg_cleanup; + goto dev_cleanup; } ret = enable_device(device); @@ -781,6 +787,8 @@ int ib_register_device(struct ib_device *device, const char *name) sysfs_cleanup: ib_device_unregister_sysfs(device); +dev_cleanup: + device_del(&device->dev); cg_cleanup: ib_device_unregister_rdmacg(device); ib_cache_cleanup_one(device); @@ -800,6 +808,7 @@ void ib_unregister_device(struct ib_device *device) { disable_device(device); ib_device_unregister_sysfs(device); + device_del(&device->dev); ib_device_unregister_rdmacg(device); ib_cache_cleanup_one(device); release_name(device); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 7e51b406e89a..9335b15c2e38 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1275,7 +1275,7 @@ static struct attribute *ib_dev_attrs[] = { NULL, }; -static const struct attribute_group dev_attr_group = { +const struct attribute_group ib_dev_attr_group = { .attrs = ib_dev_attrs, }; @@ -1338,18 +1338,10 @@ int ib_device_register_sysfs(struct ib_device *device) { int ret; - device->groups[0] = &dev_attr_group; - device->dev.groups = device->groups; - - ret = device_add(&device->dev); + ret = ib_setup_port_attrs(device); if (ret) return ret; - ret = ib_setup_port_attrs(device); - if (ret) { - device_del(&device->dev); - return ret; - } if (device->ops.alloc_hw_stats) setup_hw_stats(device, NULL, 0); @@ -1363,6 +1355,4 @@ void ib_device_unregister_sysfs(struct ib_device *device) kfree(device->hw_stats); ib_free_port_attrs(device); - /* Balances with device_add */ - device_del(&device->dev); } -- cgit v1.2.3-55-g7522 From 41eda65c6100930d95bb854a0114f3544593070c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 18 Feb 2019 22:25:47 +0200 Subject: RDMA/restrack: Hide restrack DB from IB/core There is no need to expose internals of restrack DB to IB/core. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 6 ++- drivers/infiniband/core/nldev.c | 17 ++++---- drivers/infiniband/core/restrack.c | 83 ++++++++++++++++++++++++++------------ drivers/infiniband/core/restrack.h | 39 ++++++++++++++++++ include/rdma/ib_verbs.h | 7 ++-- include/rdma/restrack.h | 28 ------------- 6 files changed, 114 insertions(+), 66 deletions(-) create mode 100644 drivers/infiniband/core/restrack.h (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 3eddc6e67a16..f7e206033d39 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -45,6 +45,7 @@ #include #include "core_priv.h" +#include "restrack.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("core kernel InfiniBand API"); @@ -338,7 +339,10 @@ struct ib_device *_ib_alloc_device(size_t size) if (!device) return NULL; - rdma_restrack_init(device); + if (rdma_restrack_init(device)) { + kfree(device); + return NULL; + } device->dev.class = &ib_class; device->groups[0] = &ib_dev_attr_group; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 0cd95f80f7b4..54312f9626a1 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -39,6 +39,7 @@ #include "core_priv.h" #include "cma_priv.h" +#include "restrack.h" static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 }, @@ -1027,6 +1028,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, unsigned long id; u32 index, port = 0; bool filled = false; + struct xarray *xa; err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NULL); @@ -1074,13 +1076,14 @@ static int res_get_common_dumpit(struct sk_buff *skb, has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN); - down_read(&device->res.rwsem); + xa = &device->res->xa[res_type]; + down_read(&device->res->rwsem); /* * FIXME: if the skip ahead is something common this loop should * use xas_for_each & xas_pause to optimize, we can have a lot of * objects. */ - xa_for_each(&device->res.xa[res_type], id, res) { + xa_for_each(xa, id, res) { if (idx < start) goto next; @@ -1101,13 +1104,13 @@ static int res_get_common_dumpit(struct sk_buff *skb, if (!entry_attr) { ret = -EMSGSIZE; rdma_restrack_put(res); - up_read(&device->res.rwsem); + up_read(&device->res->rwsem); break; } - up_read(&device->res.rwsem); + up_read(&device->res->rwsem); ret = fe->fill_res_func(skb, has_cap_net_admin, res, port); - down_read(&device->res.rwsem); + down_read(&device->res->rwsem); /* * Return resource back, but it won't be released till * the &device->res.rwsem will be released for write. @@ -1125,7 +1128,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, nla_nest_end(skb, entry_attr); next: idx++; } - up_read(&device->res.rwsem); + up_read(&device->res->rwsem); nla_nest_end(skb, table_attr); nlmsg_end(skb, nlh); @@ -1143,7 +1146,7 @@ next: idx++; res_err: nla_nest_cancel(skb, table_attr); - up_read(&device->res.rwsem); + up_read(&device->res->rwsem); err: nlmsg_cancel(skb, nlh); diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 076ef6475df8..6a4b76c66bcb 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -9,8 +9,10 @@ #include #include #include +#include #include "cma_priv.h" +#include "restrack.h" static int rt_xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, u32 *next) @@ -35,18 +37,27 @@ static int rt_xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, } /** - * rdma_restrack_init() - initialize resource tracking + * rdma_restrack_init() - initialize and allocate resource tracking * @dev: IB device + * + * Return: 0 on success */ -void rdma_restrack_init(struct ib_device *dev) +int rdma_restrack_init(struct ib_device *dev) { - struct rdma_restrack_root *res = &dev->res; + struct rdma_restrack_root *rt; int i; + dev->res = kzalloc(sizeof(*rt), GFP_KERNEL); + if (!dev->res) + return -ENOMEM; + + rt = dev->res; + for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) - xa_init_flags(&res->xa[i], XA_FLAGS_ALLOC); + xa_init_flags(&rt->xa[i], XA_FLAGS_ALLOC); + init_rwsem(&rt->rwsem); - init_rwsem(&res->rwsem); + return 0; } static const char *type2str(enum rdma_restrack_type type) @@ -69,7 +80,7 @@ static const char *type2str(enum rdma_restrack_type type) */ void rdma_restrack_clean(struct ib_device *dev) { - struct rdma_restrack_root *res = &dev->res; + struct rdma_restrack_root *rt = dev->res; struct rdma_restrack_entry *e; char buf[TASK_COMM_LEN]; bool found = false; @@ -77,14 +88,16 @@ void rdma_restrack_clean(struct ib_device *dev) int i; for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) { - if (!xa_empty(&res->xa[i])) { + struct xarray *xa = &dev->res->xa[i]; + + if (!xa_empty(xa)) { unsigned long index; if (!found) { pr_err("restrack: %s", CUT_HERE); dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n"); } - xa_for_each(&res->xa[i], index, e) { + xa_for_each(xa, index, e) { if (rdma_is_kernel_res(e)) { owner = e->kern_name; } else { @@ -104,10 +117,12 @@ void rdma_restrack_clean(struct ib_device *dev) } found = true; } - xa_destroy(&res->xa[i]); + xa_destroy(xa); } if (found) pr_err("restrack: %s", CUT_HERE); + + kfree(rt); } /** @@ -119,19 +134,19 @@ void rdma_restrack_clean(struct ib_device *dev) int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, struct pid_namespace *ns) { - struct rdma_restrack_root *res = &dev->res; + struct xarray *xa = &dev->res->xa[type]; struct rdma_restrack_entry *e; unsigned long index = 0; u32 cnt = 0; - down_read(&res->rwsem); - xa_for_each(&res->xa[type], index, e) { + down_read(&dev->res->rwsem); + xa_for_each(xa, index, e) { if (ns == &init_pid_ns || (!rdma_is_kernel_res(e) && ns == task_active_pid_ns(e->task))) cnt++; } - up_read(&res->rwsem); + up_read(&dev->res->rwsem); return cnt; } EXPORT_SYMBOL(rdma_restrack_count); @@ -202,17 +217,19 @@ EXPORT_SYMBOL(rdma_restrack_set_task); static void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); + struct rdma_restrack_root *rt; + struct xarray *xa; int ret; if (!dev) return; + rt = dev->res; + xa = &dev->res->xa[res->type]; + kref_init(&res->kref); init_completion(&res->comp); - - ret = rt_xa_alloc_cyclic(&dev->res.xa[res->type], &res->id, res, - &dev->res.next_id[res->type]); - + ret = rt_xa_alloc_cyclic(xa, &res->id, res, &rt->next_id[res->type]); if (!ret) res->valid = true; } @@ -266,14 +283,14 @@ struct rdma_restrack_entry * rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, u32 id) { - struct rdma_restrack_root *rt = &dev->res; + struct xarray *xa = &dev->res->xa[type]; struct rdma_restrack_entry *res; - down_read(&dev->res.rwsem); - res = xa_load(&rt->xa[type], id); + down_read(&dev->res->rwsem); + res = xa_load(xa, id); if (!res || !rdma_restrack_get(res)) res = ERR_PTR(-ENOENT); - up_read(&dev->res.rwsem); + up_read(&dev->res->rwsem); return res; } @@ -295,19 +312,33 @@ EXPORT_SYMBOL(rdma_restrack_put); void rdma_restrack_del(struct rdma_restrack_entry *res) { - struct ib_device *dev; + struct ib_device *dev = res_to_dev(res); + struct xarray *xa; if (!res->valid) goto out; - dev = res_to_dev(res); + /* + * All objects except CM_ID set valid device immediately + * after new object is created, it means that for not valid + * objects will still have "dev". + * + * It is not the case for CM_ID, newly created object has + * this field set to NULL and it is set in _cma_attach_to_dev() + * only. + * + * Because we don't want to add any conditions on call + * to rdma_restrack_del(), the check below protects from + * NULL-dereference. + */ if (!dev) return; - down_write(&dev->res.rwsem); - xa_erase(&dev->res.xa[res->type], res->id); + xa = &dev->res->xa[res->type]; + down_write(&dev->res->rwsem); + xa_erase(xa, res->id); res->valid = false; - up_write(&dev->res.rwsem); + up_write(&dev->res->rwsem); rdma_restrack_put(res); wait_for_completion(&res->comp); diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h new file mode 100644 index 000000000000..cf89ef0b8ed5 --- /dev/null +++ b/drivers/infiniband/core/restrack.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved. + */ + +#ifndef _RDMA_CORE_RESTRACK_H_ +#define _RDMA_CORE_RESTRACK_H_ + +#include +#include + +/** + * struct rdma_restrack_root - main resource tracking management + * entity, per-device + */ +struct rdma_restrack_root { + /* + * @rwsem: Read/write lock to protect erase of entry. + * Lists and insertions are protected by XArray internal lock. + */ + struct rw_semaphore rwsem; + /** + * @xa: Array of XArray structures to hold restrack entries. + * We want to use array of XArrays because insertion is type + * dependent. For types with xisiting unique ID (like QPN), + * we will insert to that unique index. For other types, + * we insert based on pointers and auto-allocate unique index. + */ + struct xarray xa[RDMA_RESTRACK_MAX]; + /** + * @next_id: Next ID to support cyclic allocation + */ + u32 next_id[RDMA_RESTRACK_MAX]; +}; + + +int rdma_restrack_init(struct ib_device *dev); +void rdma_restrack_clean(struct ib_device *dev); +#endif /* _RDMA_CORE_RESTRACK_H_ */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 64ee7c08be22..2a17c2b30073 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2533,6 +2533,8 @@ struct ib_device_ops { DECLARE_RDMA_OBJ_SIZE(ib_pd); }; +struct rdma_restrack_root; + struct ib_device { /* Do not access @dma_device directly from ULP nor from HW drivers. */ struct device *dma_device; @@ -2589,10 +2591,7 @@ struct ib_device { #endif u32 index; - /* - * Implementation details of the RDMA core, don't use in drivers - */ - struct rdma_restrack_root res; + struct rdma_restrack_root *res; const struct uapi_definition *driver_def; enum rdma_driver_id driver_id; diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 53e1a7fb7355..ecf3c7702a4f 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -7,7 +7,6 @@ #define _RDMA_RESTRACK_H_ #include -#include #include #include #include @@ -50,31 +49,6 @@ enum rdma_restrack_type { }; struct ib_device; -struct rdma_restrack_entry; - -/** - * struct rdma_restrack_root - main resource tracking management - * entity, per-device - */ -struct rdma_restrack_root { - /* - * @rwsem: Read/write lock to protect erase of entry. - * Lists and insertions are protected by XArray internal lock. - */ - struct rw_semaphore rwsem; - /** - * @xa: Array of XArray structures to hold restrack entries. - * We want to use array of XArrays because insertion is type - * dependent. For types with xisiting unique ID (like QPN), - * we will insert to that unique index. For other types, - * we insert based on pointers and auto-allocate unique index. - */ - struct xarray xa[RDMA_RESTRACK_MAX]; - /** - * @next_id: Next ID to support cyclic allocation - */ - u32 next_id[RDMA_RESTRACK_MAX]; -}; /** * struct rdma_restrack_entry - metadata per-entry @@ -125,8 +99,6 @@ struct rdma_restrack_entry { u32 id; }; -void rdma_restrack_init(struct ib_device *dev); -void rdma_restrack_clean(struct ib_device *dev); int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, struct pid_namespace *ns); -- cgit v1.2.3-55-g7522 From ea1075edcbab7d92f4e4ccf5490043f796bf78be Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Feb 2019 21:12:47 -0700 Subject: RDMA: Add and use rdma_for_each_port We have many loops iterating over all of the end port numbers on a struct ib_device, simplify them with a for_each helper. Reviewed-by: Parav Pandit Signed-off-by: Jason Gunthorpe --- .clang-format | 1 + drivers/infiniband/core/cache.c | 6 +++--- drivers/infiniband/core/cma.c | 7 +++---- drivers/infiniband/core/device.c | 26 ++++++++++++-------------- drivers/infiniband/core/mad.c | 4 ++-- drivers/infiniband/core/nldev.c | 4 ++-- drivers/infiniband/core/security.c | 11 +++++++---- drivers/infiniband/core/sysfs.c | 12 +++--------- drivers/infiniband/core/user_mad.c | 9 +++++---- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 ++-- drivers/infiniband/ulp/srp/ib_srp.c | 5 +++-- include/rdma/ib_verbs.h | 10 ++++++++++ 12 files changed, 53 insertions(+), 46 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/.clang-format b/.clang-format index 335ce29ab813..201a4f531b90 100644 --- a/.clang-format +++ b/.clang-format @@ -361,6 +361,7 @@ ForEachMacros: - 'radix_tree_for_each_slot' - 'radix_tree_for_each_tagged' - 'rbtree_postorder_for_each_entry_safe' + - 'rdma_for_each_port' - 'resource_list_for_each_entry' - 'resource_list_for_each_entry_safe' - 'rhl_for_each_entry_rcu' diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 2338d0b3a0ca..3d137d8381a9 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1428,7 +1428,7 @@ static void ib_cache_event(struct ib_event_handler *handler, int ib_cache_setup_one(struct ib_device *device) { - int p; + unsigned int p; int err; rwlock_init(&device->cache.lock); @@ -1447,8 +1447,8 @@ int ib_cache_setup_one(struct ib_device *device) return err; } - for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) - ib_cache_update(device, p + rdma_start_port(device), true); + rdma_for_each_port (device, p) + ib_cache_update(device, p, true); INIT_IB_EVENT_HANDLER(&device->cache.event_handler, device, ib_cache_event); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c43512752b8a..68c997be2429 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -659,7 +659,7 @@ static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv) struct cma_device *cma_dev; enum ib_gid_type gid_type; int ret = -ENODEV; - u8 port; + unsigned int port; if (dev_addr->dev_type != ARPHRD_INFINIBAND && id_priv->id.ps == RDMA_PS_IPOIB) @@ -673,8 +673,7 @@ static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv) mutex_lock(&lock); list_for_each_entry(cma_dev, &dev_list, list) { - for (port = rdma_start_port(cma_dev->device); - port <= rdma_end_port(cma_dev->device); port++) { + rdma_for_each_port (cma_dev->device, port) { gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; gid_type = cma_dev->default_gid_type[port - 1]; @@ -4548,7 +4547,7 @@ static void cma_add_one(struct ib_device *device) if (!cma_dev->default_roce_tos) goto free_gid_type; - for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { + rdma_for_each_port (device, i) { supported_gids = roce_gid_type_mask_support(device, i); WARN_ON(!supported_gids); if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f7e206033d39..71582f848a9c 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -470,10 +470,8 @@ static int verify_immutable(const struct ib_device *dev, u8 port) static int read_port_immutable(struct ib_device *device) { + unsigned int port; int ret; - u8 start_port = rdma_start_port(device); - u8 end_port = rdma_end_port(device); - u8 port; /** * device->port_immutable is indexed directly by the port number to make @@ -482,13 +480,13 @@ static int read_port_immutable(struct ib_device *device) * Therefore port_immutable is declared as a 1 based array with * potential empty slots at the beginning. */ - device->port_immutable = kcalloc(end_port + 1, - sizeof(*device->port_immutable), - GFP_KERNEL); + device->port_immutable = + kcalloc(rdma_end_port(device) + 1, + sizeof(*device->port_immutable), GFP_KERNEL); if (!device->port_immutable) return -ENOMEM; - for (port = start_port; port <= end_port; ++port) { + rdma_for_each_port (device, port) { ret = device->ops.get_port_immutable( device, port, &device->port_immutable[port]); if (ret) @@ -540,9 +538,9 @@ static void ib_policy_change_task(struct work_struct *work) down_read(&devices_rwsem); xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { - int i; + unsigned int i; - for (i = rdma_start_port(dev); i <= rdma_end_port(dev); i++) { + rdma_for_each_port (dev, i) { u64 sp; int ret = ib_get_cached_subnet_prefix(dev, i, @@ -1060,10 +1058,9 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev, roce_netdev_callback cb, void *cookie) { - u8 port; + unsigned int port; - for (port = rdma_start_port(ib_dev); port <= rdma_end_port(ib_dev); - port++) + rdma_for_each_port (ib_dev, port) if (rdma_protocol_roce(ib_dev, port)) { struct net_device *idev = NULL; @@ -1217,9 +1214,10 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, u8 *port_num, u16 *index) { union ib_gid tmp_gid; - int ret, port, i; + unsigned int port; + int ret, i; - for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { + rdma_for_each_port (device, port) { if (!rdma_protocol_ib(device, port)) continue; diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 7870823bac47..e742a6a2c138 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3326,9 +3326,9 @@ error: static void ib_mad_remove_device(struct ib_device *device, void *client_data) { - int i; + unsigned int i; - for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { + rdma_for_each_port (device, i) { if (!rdma_cap_ib_mad(device, i)) continue; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index e7350d9d60e9..85f6f2bcce40 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -774,7 +774,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, u32 idx = 0; u32 ifindex; int err; - u32 p; + unsigned int p; err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NULL); @@ -786,7 +786,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, if (!device) return -EINVAL; - for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { + rdma_for_each_port (device, p) { /* * The dumpit function returns all information from specific * index. This specific index is taken from the netlink diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index dad6a94a43f3..492702b83600 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -422,12 +422,15 @@ void ib_close_shared_qp_security(struct ib_qp_security *sec) int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev) { - u8 i = rdma_start_port(dev); + unsigned int i; bool is_ib = false; int ret; - while (i <= rdma_end_port(dev) && !is_ib) + rdma_for_each_port (dev, i) { is_ib = rdma_protocol_ib(dev, i++); + if (is_ib) + break; + } /* If this isn't an IB device don't create the security context */ if (!is_ib) @@ -561,9 +564,9 @@ void ib_security_cache_change(struct ib_device *device, void ib_security_release_port_pkey_list(struct ib_device *device) { struct pkey_index_qp_list *pkey, *tmp_pkey; - int i; + unsigned int i; - for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { + rdma_for_each_port (device, i) { list_for_each_entry_safe(pkey, tmp_pkey, &device->port_pkey_list[i].pkey_list, diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 9335b15c2e38..9b6a065bdfa5 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1308,23 +1308,17 @@ static void ib_free_port_attrs(struct ib_device *device) static int ib_setup_port_attrs(struct ib_device *device) { + unsigned int port; int ret; - int i; device->ports_kobj = kobject_create_and_add("ports", &device->dev.kobj); if (!device->ports_kobj) return -ENOMEM; - if (rdma_cap_ib_switch(device)) { - ret = add_port(device, 0); + rdma_for_each_port (device, port) { + ret = add_port(device, port); if (ret) goto err_put; - } else { - for (i = 1; i <= device->phys_port_cnt; ++i) { - ret = add_port(device, i); - if (ret) - goto err_put; - } } return 0; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 3ebd211a87ed..02b7947ab215 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1323,14 +1323,15 @@ free: static void ib_umad_remove_one(struct ib_device *device, void *client_data) { struct ib_umad_device *umad_dev = client_data; - int i; + unsigned int i; if (!umad_dev) return; - for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) { - if (rdma_cap_ib_mad(device, i + rdma_start_port(device))) - ib_umad_kill_port(&umad_dev->ports[i]); + rdma_for_each_port (device, i) { + if (rdma_cap_ib_mad(device, i)) + ib_umad_kill_port( + &umad_dev->ports[i - rdma_start_port(device)]); } /* balances kref_init() */ ib_umad_dev_put(umad_dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ee4cca80f00b..48eda16db1a7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2495,7 +2495,7 @@ static void ipoib_add_one(struct ib_device *device) struct list_head *dev_list; struct net_device *dev; struct ipoib_dev_priv *priv; - int p; + unsigned int p; int count = 0; dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL); @@ -2504,7 +2504,7 @@ static void ipoib_add_one(struct ib_device *device) INIT_LIST_HEAD(dev_list); - for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { + rdma_for_each_port (device, p) { if (!rdma_protocol_ib(device, p)) continue; dev = ipoib_add_port("ib%d", device, p); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 84184910f038..151f4eba84b8 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -4127,7 +4127,8 @@ static void srp_add_one(struct ib_device *device) struct srp_device *srp_dev; struct ib_device_attr *attr = &device->attrs; struct srp_host *host; - int mr_page_shift, p; + int mr_page_shift; + unsigned int p; u64 max_pages_per_mr; unsigned int flags = 0; @@ -4194,7 +4195,7 @@ static void srp_add_one(struct ib_device *device) WARN_ON_ONCE(srp_dev->global_rkey == 0); } - for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { + rdma_for_each_port (device, p) { host = srp_add_port(srp_dev, p); if (host) list_add_tail(&host->list, &srp_dev->dev_list); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 2a17c2b30073..fa0edd6ae33c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2827,6 +2827,16 @@ static inline u8 rdma_start_port(const struct ib_device *device) return rdma_cap_ib_switch(device) ? 0 : 1; } +/** + * rdma_for_each_port - Iterate over all valid port numbers of the IB device + * @device - The struct ib_device * to iterate over + * @iter - The unsigned int to store the port number + */ +#define rdma_for_each_port(device, iter) \ + for (iter = rdma_start_port(device + BUILD_BUG_ON_ZERO(!__same_type( \ + unsigned int, iter))); \ + iter <= rdma_end_port(device); (iter)++) + /** * rdma_end_port - Return the last valid port number for the device * specified -- cgit v1.2.3-55-g7522 From 8ceb1357b33790193e9d55d2d09bcfd6bd59dd6d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Feb 2019 21:12:48 -0700 Subject: RDMA/device: Consolidate ib_device per_port data into one place There is no reason to have three allocations of per-port data. Combine them together and make the lifetime for all the per-port data match the struct ib_device. Following patches will require more port-specific data, now there is a good place to put it. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 4 +-- drivers/infiniband/core/device.c | 70 ++++++++++++------------------------ drivers/infiniband/core/security.c | 24 ++++++------- include/rdma/ib_verbs.h | 74 ++++++++++++++++++++++---------------- 4 files changed, 78 insertions(+), 94 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 3d137d8381a9..9d0e8aca741a 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -881,8 +881,8 @@ static int _gid_table_setup_one(struct ib_device *ib_dev) for (port = 0; port < ib_dev->phys_port_cnt; port++) { u8 rdma_port = port + rdma_start_port(ib_dev); - table = alloc_gid_table( - ib_dev->port_immutable[rdma_port].gid_tbl_len); + table = alloc_gid_table( + ib_dev->port_data[rdma_port].immutable.gid_tbl_len); if (!table) goto rollback_table_setup; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 71582f848a9c..8d7d63a60ef5 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -293,8 +293,7 @@ static void ib_device_release(struct device *device) WARN_ON(refcount_read(&dev->refcount)); ib_cache_release_one(dev); ib_security_release_port_pkey_list(dev); - kfree(dev->port_pkey_list); - kfree(dev->port_immutable); + kfree(dev->port_data); xa_destroy(&dev->client_data); kfree(dev); } @@ -468,27 +467,31 @@ static int verify_immutable(const struct ib_device *dev, u8 port) rdma_max_mad_size(dev, port) != 0); } -static int read_port_immutable(struct ib_device *device) +static int setup_port_data(struct ib_device *device) { unsigned int port; int ret; - /** - * device->port_immutable is indexed directly by the port number to make + /* + * device->port_data is indexed directly by the port number to make * access to this data as efficient as possible. * - * Therefore port_immutable is declared as a 1 based array with - * potential empty slots at the beginning. + * Therefore port_data is declared as a 1 based array with potential + * empty slots at the beginning. */ - device->port_immutable = - kcalloc(rdma_end_port(device) + 1, - sizeof(*device->port_immutable), GFP_KERNEL); - if (!device->port_immutable) + device->port_data = kcalloc(rdma_end_port(device) + 1, + sizeof(*device->port_data), GFP_KERNEL); + if (!device->port_data) return -ENOMEM; rdma_for_each_port (device, port) { - ret = device->ops.get_port_immutable( - device, port, &device->port_immutable[port]); + struct ib_port_data *pdata = &device->port_data[port]; + + spin_lock_init(&pdata->pkey_list_lock); + INIT_LIST_HEAD(&pdata->pkey_list); + + ret = device->ops.get_port_immutable(device, port, + &pdata->immutable); if (ret) return ret; @@ -507,30 +510,6 @@ void ib_get_device_fw_str(struct ib_device *dev, char *str) } EXPORT_SYMBOL(ib_get_device_fw_str); -static int setup_port_pkey_list(struct ib_device *device) -{ - int i; - - /** - * device->port_pkey_list is indexed directly by the port number, - * Therefore it is declared as a 1 based array with potential empty - * slots at the beginning. - */ - device->port_pkey_list = kcalloc(rdma_end_port(device) + 1, - sizeof(*device->port_pkey_list), - GFP_KERNEL); - - if (!device->port_pkey_list) - return -ENOMEM; - - for (i = 0; i < (rdma_end_port(device) + 1); i++) { - spin_lock_init(&device->port_pkey_list[i].list_lock); - INIT_LIST_HEAD(&device->port_pkey_list[i].pkey_list); - } - - return 0; -} - static void ib_policy_change_task(struct work_struct *work) { struct ib_device *dev; @@ -668,10 +647,9 @@ static int setup_device(struct ib_device *device) if (ret) return ret; - ret = read_port_immutable(device); + ret = setup_port_data(device); if (ret) { - dev_warn(&device->dev, - "Couldn't create per port immutable data\n"); + dev_warn(&device->dev, "Couldn't create per-port data\n"); return ret; } @@ -683,12 +661,6 @@ static int setup_device(struct ib_device *device) return ret; } - ret = setup_port_pkey_list(device); - if (ret) { - dev_warn(&device->dev, "Couldn't create per port_pkey_list\n"); - return ret; - } - return 0; } @@ -1221,7 +1193,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, if (!rdma_protocol_ib(device, port)) continue; - for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { + for (i = 0; i < device->port_data[port].immutable.gid_tbl_len; + ++i) { ret = rdma_query_gid(device, port, i, &tmp_gid); if (ret) return ret; @@ -1253,7 +1226,8 @@ int ib_find_pkey(struct ib_device *device, u16 tmp_pkey; int partial_ix = -1; - for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) { + for (i = 0; i < device->port_data[port_num].immutable.pkey_tbl_len; + ++i) { ret = ib_query_pkey(device, port_num, i, &tmp_pkey); if (ret) return ret; diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 492702b83600..1ab423b19f77 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -49,16 +49,15 @@ static struct pkey_index_qp_list *get_pkey_idx_qp_list(struct ib_port_pkey *pp) struct pkey_index_qp_list *tmp_pkey; struct ib_device *dev = pp->sec->dev; - spin_lock(&dev->port_pkey_list[pp->port_num].list_lock); - list_for_each_entry(tmp_pkey, - &dev->port_pkey_list[pp->port_num].pkey_list, - pkey_index_list) { + spin_lock(&dev->port_data[pp->port_num].pkey_list_lock); + list_for_each_entry (tmp_pkey, &dev->port_data[pp->port_num].pkey_list, + pkey_index_list) { if (tmp_pkey->pkey_index == pp->pkey_index) { pkey = tmp_pkey; break; } } - spin_unlock(&dev->port_pkey_list[pp->port_num].list_lock); + spin_unlock(&dev->port_data[pp->port_num].pkey_list_lock); return pkey; } @@ -263,12 +262,12 @@ static int port_pkey_list_insert(struct ib_port_pkey *pp) if (!pkey) return -ENOMEM; - spin_lock(&dev->port_pkey_list[port_num].list_lock); + spin_lock(&dev->port_data[port_num].pkey_list_lock); /* Check for the PKey again. A racing process may * have created it. */ list_for_each_entry(tmp_pkey, - &dev->port_pkey_list[port_num].pkey_list, + &dev->port_data[port_num].pkey_list, pkey_index_list) { if (tmp_pkey->pkey_index == pp->pkey_index) { kfree(pkey); @@ -283,9 +282,9 @@ static int port_pkey_list_insert(struct ib_port_pkey *pp) spin_lock_init(&pkey->qp_list_lock); INIT_LIST_HEAD(&pkey->qp_list); list_add(&pkey->pkey_index_list, - &dev->port_pkey_list[port_num].pkey_list); + &dev->port_data[port_num].pkey_list); } - spin_unlock(&dev->port_pkey_list[port_num].list_lock); + spin_unlock(&dev->port_data[port_num].pkey_list_lock); } spin_lock(&pkey->qp_list_lock); @@ -551,9 +550,8 @@ void ib_security_cache_change(struct ib_device *device, { struct pkey_index_qp_list *pkey; - list_for_each_entry(pkey, - &device->port_pkey_list[port_num].pkey_list, - pkey_index_list) { + list_for_each_entry (pkey, &device->port_data[port_num].pkey_list, + pkey_index_list) { check_pkey_qps(pkey, device, port_num, @@ -569,7 +567,7 @@ void ib_security_release_port_pkey_list(struct ib_device *device) rdma_for_each_port (device, i) { list_for_each_entry_safe(pkey, tmp_pkey, - &device->port_pkey_list[i].pkey_list, + &device->port_data[i].pkey_list, pkey_index_list) { list_del(&pkey->pkey_index_list); kfree(pkey); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index fa0edd6ae33c..b42e257814f7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2198,6 +2198,13 @@ struct ib_port_immutable { u32 max_mad_size; }; +struct ib_port_data { + struct ib_port_immutable immutable; + + spinlock_t pkey_list_lock; + struct list_head pkey_list; +}; + /* rdma netdev type - specifies protocol type */ enum rdma_netdev_t { RDMA_NETDEV_OPA_VNIC, @@ -2243,12 +2250,6 @@ struct rdma_netdev_alloc_params { struct net_device *netdev, void *param); }; -struct ib_port_pkey_list { - /* Lock to hold while modifying the list. */ - spinlock_t list_lock; - struct list_head pkey_list; -}; - struct ib_counters { struct ib_device *device; struct ib_uobject *uobject; @@ -2549,14 +2550,12 @@ struct ib_device { struct ib_cache cache; /** - * port_immutable is indexed by port number + * port_data is indexed by port number */ - struct ib_port_immutable *port_immutable; + struct ib_port_data *port_data; int num_comp_vectors; - struct ib_port_pkey_list *port_pkey_list; - struct iw_cm_verbs *iwcm; struct module *owner; @@ -2860,34 +2859,38 @@ static inline int rdma_is_port_valid(const struct ib_device *device, static inline bool rdma_is_grh_required(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & - RDMA_CORE_PORT_IB_GRH_REQUIRED; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_PORT_IB_GRH_REQUIRED; } static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_PROT_IB; } static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & - (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP); + return device->port_data[port_num].immutable.core_cap_flags & + (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP); } static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP; } static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_PROT_ROCE; } static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_PROT_IWARP; } static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num) @@ -2898,12 +2901,14 @@ static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num) static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_PROT_RAW_PACKET; } static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_USNIC; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_PROT_USNIC; } /** @@ -2920,7 +2925,8 @@ static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_n */ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_MAD; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_IB_MAD; } /** @@ -2944,8 +2950,8 @@ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num) */ static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num) { - return (device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_OPA_MAD) - == RDMA_CORE_CAP_OPA_MAD; + return (device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_OPA_MAD) == RDMA_CORE_CAP_OPA_MAD; } /** @@ -2970,7 +2976,8 @@ static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num) */ static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SMI; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_IB_SMI; } /** @@ -2990,7 +2997,8 @@ static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num) */ static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_CM; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_IB_CM; } /** @@ -3007,7 +3015,8 @@ static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num) */ static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IW_CM; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_IW_CM; } /** @@ -3027,7 +3036,8 @@ static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num) */ static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SA; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_IB_SA; } /** @@ -3067,7 +3077,8 @@ static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num */ static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_AF_IB; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_AF_IB; } /** @@ -3088,7 +3099,8 @@ static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num) */ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_ETH_AH; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_ETH_AH; } /** @@ -3102,7 +3114,7 @@ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num) */ static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num) { - return (device->port_immutable[port_num].core_cap_flags & + return (device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH; } @@ -3120,7 +3132,7 @@ static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num) */ static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].max_mad_size; + return device->port_data[port_num].immutable.max_mad_size; } /** -- cgit v1.2.3-55-g7522 From c2261dd76b549754c14c8ac7cadadd0993b182d6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Feb 2019 21:12:50 -0700 Subject: RDMA/device: Add ib_device_set_netdev() as an alternative to get_netdev The associated netdev should not actually be very dynamic, so for most drivers there is no reason for a callback like this. Provide an API to inform the core code about the net dev affiliation and use a core maintained data structure instead. This allows the core code to be more aware of the ndev relationship which will allow some new APIs based around this. This also uses locking that makes some kind of sense, many drivers had a confusing RCU lock, or missing locking which isn't right. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 24 +++--- drivers/infiniband/core/core_priv.h | 3 + drivers/infiniband/core/device.c | 166 ++++++++++++++++++++++++++++++++---- drivers/infiniband/core/nldev.c | 4 +- drivers/infiniband/core/verbs.c | 5 +- include/rdma/ib_verbs.h | 7 ++ 6 files changed, 171 insertions(+), 38 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index a28dc1901c80..43c67e5f43c6 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -547,21 +547,19 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, unsigned long mask; int ret; - if (ib_dev->ops.get_netdev) { - idev = ib_dev->ops.get_netdev(ib_dev, port); - if (idev && attr->ndev != idev) { - union ib_gid default_gid; - - /* Adding default GIDs in not permitted */ - make_default_gid(idev, &default_gid); - if (!memcmp(gid, &default_gid, sizeof(*gid))) { - dev_put(idev); - return -EPERM; - } - } - if (idev) + idev = ib_device_get_netdev(ib_dev, port); + if (idev && attr->ndev != idev) { + union ib_gid default_gid; + + /* Adding default GIDs is not permitted */ + make_default_gid(idev, &default_gid); + if (!memcmp(gid, &default_gid, sizeof(*gid))) { dev_put(idev); + return -EPERM; + } } + if (idev) + dev_put(idev); mask = GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE | diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index eeabe9ca8427..08c690249594 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -66,6 +66,9 @@ typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port, struct net_device *idev, void *cookie); +struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, + unsigned int port); + void ib_enum_roce_netdev(struct ib_device *ib_dev, roce_netdev_filter filter, void *filter_cookie, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 8d7d63a60ef5..7680a64a98bc 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -134,6 +134,7 @@ static void *xan_find_marked(struct xarray *xa, unsigned long *indexp, !xa_is_err(entry); \ (index)++, entry = xan_find_marked(xa, &(index), filter)) +static void free_netdevs(struct ib_device *ib_dev); static int ib_security_change(struct notifier_block *nb, unsigned long event, void *lsm_data); static void ib_policy_change_task(struct work_struct *work); @@ -290,6 +291,7 @@ static void ib_device_release(struct device *device) { struct ib_device *dev = container_of(device, struct ib_device, dev); + free_netdevs(dev); WARN_ON(refcount_read(&dev->refcount)); ib_cache_release_one(dev); ib_security_release_port_pkey_list(dev); @@ -371,6 +373,9 @@ EXPORT_SYMBOL(_ib_alloc_device); */ void ib_dealloc_device(struct ib_device *device) { + /* Expedite releasing netdev references */ + free_netdevs(device); + WARN_ON(!xa_empty(&device->client_data)); WARN_ON(refcount_read(&device->refcount)); rdma_restrack_clean(device); @@ -461,16 +466,16 @@ static void remove_client_context(struct ib_device *device, up_read(&device->client_data_rwsem); } -static int verify_immutable(const struct ib_device *dev, u8 port) -{ - return WARN_ON(!rdma_cap_ib_mad(dev, port) && - rdma_max_mad_size(dev, port) != 0); -} - -static int setup_port_data(struct ib_device *device) +static int alloc_port_data(struct ib_device *device) { unsigned int port; - int ret; + + if (device->port_data) + return 0; + + /* This can only be called once the physical port range is defined */ + if (WARN_ON(!device->phys_port_cnt)) + return -EINVAL; /* * device->port_data is indexed directly by the port number to make @@ -489,6 +494,28 @@ static int setup_port_data(struct ib_device *device) spin_lock_init(&pdata->pkey_list_lock); INIT_LIST_HEAD(&pdata->pkey_list); + spin_lock_init(&pdata->netdev_lock); + } + return 0; +} + +static int verify_immutable(const struct ib_device *dev, u8 port) +{ + return WARN_ON(!rdma_cap_ib_mad(dev, port) && + rdma_max_mad_size(dev, port) != 0); +} + +static int setup_port_data(struct ib_device *device) +{ + unsigned int port; + int ret; + + ret = alloc_port_data(device); + if (ret) + return ret; + + rdma_for_each_port (device, port) { + struct ib_port_data *pdata = &device->port_data[port]; ret = device->ops.get_port_immutable(device, port, &pdata->immutable); @@ -682,6 +709,9 @@ static void disable_device(struct ib_device *device) /* Pairs with refcount_set in enable_device */ ib_device_put(device); wait_for_completion(&device->unreg_completion); + + /* Expedite removing unregistered pointers from the hash table */ + free_netdevs(device); } /* @@ -1012,6 +1042,114 @@ int ib_query_port(struct ib_device *device, } EXPORT_SYMBOL(ib_query_port); +/** + * ib_device_set_netdev - Associate the ib_dev with an underlying net_device + * @ib_dev: Device to modify + * @ndev: net_device to affiliate, may be NULL + * @port: IB port the net_device is connected to + * + * Drivers should use this to link the ib_device to a netdev so the netdev + * shows up in interfaces like ib_enum_roce_netdev. Only one netdev may be + * affiliated with any port. + * + * The caller must ensure that the given ndev is not unregistered or + * unregistering, and that either the ib_device is unregistered or + * ib_device_set_netdev() is called with NULL when the ndev sends a + * NETDEV_UNREGISTER event. + */ +int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, + unsigned int port) +{ + struct net_device *old_ndev; + struct ib_port_data *pdata; + unsigned long flags; + int ret; + + /* + * Drivers wish to call this before ib_register_driver, so we have to + * setup the port data early. + */ + ret = alloc_port_data(ib_dev); + if (ret) + return ret; + + if (!rdma_is_port_valid(ib_dev, port)) + return -EINVAL; + + pdata = &ib_dev->port_data[port]; + spin_lock_irqsave(&pdata->netdev_lock, flags); + if (pdata->netdev == ndev) { + spin_unlock_irqrestore(&pdata->netdev_lock, flags); + return 0; + } + old_ndev = pdata->netdev; + + if (ndev) + dev_hold(ndev); + pdata->netdev = ndev; + spin_unlock_irqrestore(&pdata->netdev_lock, flags); + + if (old_ndev) + dev_put(old_ndev); + + return 0; +} +EXPORT_SYMBOL(ib_device_set_netdev); + +static void free_netdevs(struct ib_device *ib_dev) +{ + unsigned long flags; + unsigned int port; + + rdma_for_each_port (ib_dev, port) { + struct ib_port_data *pdata = &ib_dev->port_data[port]; + + spin_lock_irqsave(&pdata->netdev_lock, flags); + if (pdata->netdev) { + dev_put(pdata->netdev); + pdata->netdev = NULL; + } + spin_unlock_irqrestore(&pdata->netdev_lock, flags); + } +} + +struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, + unsigned int port) +{ + struct ib_port_data *pdata; + struct net_device *res; + + if (!rdma_is_port_valid(ib_dev, port)) + return NULL; + + pdata = &ib_dev->port_data[port]; + + /* + * New drivers should use ib_device_set_netdev() not the legacy + * get_netdev(). + */ + if (ib_dev->ops.get_netdev) + res = ib_dev->ops.get_netdev(ib_dev, port); + else { + spin_lock(&pdata->netdev_lock); + res = pdata->netdev; + if (res) + dev_hold(res); + spin_unlock(&pdata->netdev_lock); + } + + /* + * If we are starting to unregister expedite things by preventing + * propagation of an unregistering netdev. + */ + if (res && res->reg_state != NETREG_REGISTERED) { + dev_put(res); + return NULL; + } + + return res; +} + /** * ib_enum_roce_netdev - enumerate all RoCE ports * @ib_dev : IB device we want to query @@ -1034,16 +1172,8 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev, rdma_for_each_port (ib_dev, port) if (rdma_protocol_roce(ib_dev, port)) { - struct net_device *idev = NULL; - - if (ib_dev->ops.get_netdev) - idev = ib_dev->ops.get_netdev(ib_dev, port); - - if (idev && - idev->reg_state >= NETREG_UNREGISTERED) { - dev_put(idev); - idev = NULL; - } + struct net_device *idev = + ib_device_get_netdev(ib_dev, port); if (filter(ib_dev, port, idev, filter_cookie)) cb(ib_dev, port, idev, cookie); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 85f6f2bcce40..1980ddc5f7bc 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -268,9 +268,7 @@ static int fill_port_info(struct sk_buff *msg, if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state)) return -EMSGSIZE; - if (device->ops.get_netdev) - netdev = device->ops.get_netdev(device, port); - + netdev = ib_device_get_netdev(device, port); if (netdev && net_eq(dev_net(netdev), net)) { ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index de5d895a5054..5a5e83f5f0fc 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1723,10 +1723,7 @@ int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width) if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET) return -EINVAL; - if (!dev->ops.get_netdev) - return -EOPNOTSUPP; - - netdev = dev->ops.get_netdev(dev, port_num); + netdev = ib_device_get_netdev(dev, port_num); if (!netdev) return -ENODEV; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 50b7ebc2885e..7f81a313c01b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2204,6 +2204,9 @@ struct ib_port_data { struct list_head pkey_list; struct ib_port_cache cache; + + spinlock_t netdev_lock; + struct net_device *netdev; }; /* rdma netdev type - specifies protocol type */ @@ -3996,6 +3999,10 @@ void ib_device_put(struct ib_device *device); struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr); +int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, + unsigned int port); +struct net_device *ib_device_netdev(struct ib_device *dev, u8 port); + struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); int ib_destroy_wq(struct ib_wq *wq); -- cgit v1.2.3-55-g7522 From 324e227ea7c952626abafe72db42ae0d70220a6e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Feb 2019 21:12:51 -0700 Subject: RDMA/device: Add ib_device_get_by_netdev() Several drivers need to find the ib_device from a given netdev. rxe needs this at speed in an unsleepable context, so choose to implement the translation using a RCU safe hash table. The hash table can have a many to one mapping. This is intended to support some future case where multiple IB drivers (ie iWarp and RoCE) connect to the same netdevs. driver_ids will need to be different to support this. In the process this makes the struct ib_device and ib_port_data RCU safe by deferring their kfrees. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 119 +++++++++++++++++++++++++++++++++++---- include/rdma/ib_verbs.h | 10 +++- 2 files changed, 116 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 7680a64a98bc..f6795ad7ca98 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -134,6 +135,10 @@ static void *xan_find_marked(struct xarray *xa, unsigned long *indexp, !xa_is_err(entry); \ (index)++, entry = xan_find_marked(xa, &(index), filter)) +/* RCU hash table mapping netdevice pointers to struct ib_port_data */ +static DEFINE_SPINLOCK(ndev_hash_lock); +static DECLARE_HASHTABLE(ndev_hash, 5); + static void free_netdevs(struct ib_device *ib_dev); static int ib_security_change(struct notifier_block *nb, unsigned long event, void *lsm_data); @@ -144,6 +149,12 @@ static struct notifier_block ibdev_lsm_nb = { .notifier_call = ib_security_change, }; +/* Pointer to the RCU head at the start of the ib_port_data array */ +struct ib_port_data_rcu { + struct rcu_head rcu_head; + struct ib_port_data pdata[]; +}; + static int ib_device_check_mandatory(struct ib_device *device) { #define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x } @@ -295,9 +306,12 @@ static void ib_device_release(struct device *device) WARN_ON(refcount_read(&dev->refcount)); ib_cache_release_one(dev); ib_security_release_port_pkey_list(dev); - kfree(dev->port_data); xa_destroy(&dev->client_data); - kfree(dev); + if (dev->port_data) + kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu, + pdata[0]), + rcu_head); + kfree_rcu(dev, rcu_head); } static int ib_device_uevent(struct device *device, @@ -468,6 +482,7 @@ static void remove_client_context(struct ib_device *device, static int alloc_port_data(struct ib_device *device) { + struct ib_port_data_rcu *pdata_rcu; unsigned int port; if (device->port_data) @@ -484,17 +499,26 @@ static int alloc_port_data(struct ib_device *device) * Therefore port_data is declared as a 1 based array with potential * empty slots at the beginning. */ - device->port_data = kcalloc(rdma_end_port(device) + 1, - sizeof(*device->port_data), GFP_KERNEL); - if (!device->port_data) + pdata_rcu = kzalloc(struct_size(pdata_rcu, pdata, + rdma_end_port(device) + 1), + GFP_KERNEL); + if (!pdata_rcu) return -ENOMEM; + /* + * The rcu_head is put in front of the port data array and the stored + * pointer is adjusted since we never need to see that member until + * kfree_rcu. + */ + device->port_data = pdata_rcu->pdata; rdma_for_each_port (device, port) { struct ib_port_data *pdata = &device->port_data[port]; + pdata->ib_dev = device; spin_lock_init(&pdata->pkey_list_lock); INIT_LIST_HEAD(&pdata->pkey_list); spin_lock_init(&pdata->netdev_lock); + INIT_HLIST_NODE(&pdata->ndev_hash_link); } return 0; } @@ -1042,6 +1066,29 @@ int ib_query_port(struct ib_device *device, } EXPORT_SYMBOL(ib_query_port); +static void add_ndev_hash(struct ib_port_data *pdata) +{ + unsigned long flags; + + might_sleep(); + + spin_lock_irqsave(&ndev_hash_lock, flags); + if (hash_hashed(&pdata->ndev_hash_link)) { + hash_del_rcu(&pdata->ndev_hash_link); + spin_unlock_irqrestore(&ndev_hash_lock, flags); + /* + * We cannot do hash_add_rcu after a hash_del_rcu until the + * grace period + */ + synchronize_rcu(); + spin_lock_irqsave(&ndev_hash_lock, flags); + } + if (pdata->netdev) + hash_add_rcu(ndev_hash, &pdata->ndev_hash_link, + (uintptr_t)pdata->netdev); + spin_unlock_irqrestore(&ndev_hash_lock, flags); +} + /** * ib_device_set_netdev - Associate the ib_dev with an underlying net_device * @ib_dev: Device to modify @@ -1078,17 +1125,19 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, pdata = &ib_dev->port_data[port]; spin_lock_irqsave(&pdata->netdev_lock, flags); - if (pdata->netdev == ndev) { + old_ndev = rcu_dereference_protected( + pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); + if (old_ndev == ndev) { spin_unlock_irqrestore(&pdata->netdev_lock, flags); return 0; } - old_ndev = pdata->netdev; if (ndev) dev_hold(ndev); - pdata->netdev = ndev; + rcu_assign_pointer(pdata->netdev, ndev); spin_unlock_irqrestore(&pdata->netdev_lock, flags); + add_ndev_hash(pdata); if (old_ndev) dev_put(old_ndev); @@ -1103,11 +1152,24 @@ static void free_netdevs(struct ib_device *ib_dev) rdma_for_each_port (ib_dev, port) { struct ib_port_data *pdata = &ib_dev->port_data[port]; + struct net_device *ndev; spin_lock_irqsave(&pdata->netdev_lock, flags); - if (pdata->netdev) { - dev_put(pdata->netdev); - pdata->netdev = NULL; + ndev = rcu_dereference_protected( + pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); + if (ndev) { + spin_lock(&ndev_hash_lock); + hash_del_rcu(&pdata->ndev_hash_link); + spin_unlock(&ndev_hash_lock); + + /* + * If this is the last dev_put there is still a + * synchronize_rcu before the netdev is kfreed, so we + * can continue to rely on unlocked pointer + * comparisons after the put + */ + rcu_assign_pointer(pdata->netdev, NULL); + dev_put(ndev); } spin_unlock_irqrestore(&pdata->netdev_lock, flags); } @@ -1132,7 +1194,8 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, res = ib_dev->ops.get_netdev(ib_dev, port); else { spin_lock(&pdata->netdev_lock); - res = pdata->netdev; + res = rcu_dereference_protected( + pdata->netdev, lockdep_is_held(&pdata->netdev_lock)); if (res) dev_hold(res); spin_unlock(&pdata->netdev_lock); @@ -1150,6 +1213,38 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, return res; } +/** + * ib_device_get_by_netdev - Find an IB device associated with a netdev + * @ndev: netdev to locate + * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all) + * + * Find and hold an ib_device that is associated with a netdev via + * ib_device_set_netdev(). The caller must call ib_device_put() on the + * returned pointer. + */ +struct ib_device *ib_device_get_by_netdev(struct net_device *ndev, + enum rdma_driver_id driver_id) +{ + struct ib_device *res = NULL; + struct ib_port_data *cur; + + rcu_read_lock(); + hash_for_each_possible_rcu (ndev_hash, cur, ndev_hash_link, + (uintptr_t)ndev) { + if (rcu_access_pointer(cur->netdev) == ndev && + (driver_id == RDMA_DRIVER_UNKNOWN || + cur->ib_dev->driver_id == driver_id) && + ib_device_try_get(cur->ib_dev)) { + res = cur->ib_dev; + break; + } + } + rcu_read_unlock(); + + return res; +} +EXPORT_SYMBOL(ib_device_get_by_netdev); + /** * ib_enum_roce_netdev - enumerate all RoCE ports * @ib_dev : IB device we want to query diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7f81a313c01b..3aa802b65cf3 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2198,6 +2198,8 @@ struct ib_port_immutable { }; struct ib_port_data { + struct ib_device *ib_dev; + struct ib_port_immutable immutable; spinlock_t pkey_list_lock; @@ -2206,7 +2208,8 @@ struct ib_port_data { struct ib_port_cache cache; spinlock_t netdev_lock; - struct net_device *netdev; + struct net_device __rcu *netdev; + struct hlist_node ndev_hash_link; }; /* rdma netdev type - specifies protocol type */ @@ -2545,6 +2548,7 @@ struct ib_device { struct device *dma_device; struct ib_device_ops ops; char name[IB_DEVICE_NAME_MAX]; + struct rcu_head rcu_head; struct list_head event_handler_list; spinlock_t event_handler_lock; @@ -3996,6 +4000,10 @@ static inline bool ib_device_try_get(struct ib_device *dev) } void ib_device_put(struct ib_device *device); +struct ib_device *ib_device_get_by_netdev(struct net_device *ndev, + enum rdma_driver_id driver_id); +struct ib_device *ib_device_get_by_name(const char *name, + enum rdma_driver_id driver_id); struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr); -- cgit v1.2.3-55-g7522 From d0899892edd089790eb17943ecf28254a909deae Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Feb 2019 21:12:53 -0700 Subject: RDMA/device: Provide APIs from the core code to help unregistration These APIs are intended to support drivers that exist outside the usual driver core probe()/remove() callbacks. Normally the driver core will prevent remove() from running concurrently with probe(), once this safety is lost drivers need more support to get the locking and lifetimes right. ib_unregister_driver() is intended to be used during module_exit of a driver using these APIs. It unregisters all the associated ib_devices. ib_unregister_device_and_put() is to be used by a driver-specific removal function (ie removal by name, removal from a netdev notifier, removal from netlink) ib_unregister_queued() is to be used from netdev notifier chains where RTNL is held. The locking is tricky here since once things become async it is possible to race unregister with registration. This is largely solved by relying on the registration refcount, unregistration will only ever work on something that has a positive registration refcount - and then an unregistration mutex serializes all competing unregistrations of the same device. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 242 +++++++++++++++++++++++++++++++++------ include/rdma/ib_verbs.h | 11 ++ 2 files changed, 217 insertions(+), 36 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f6795ad7ca98..e470fa651961 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -140,6 +140,8 @@ static DEFINE_SPINLOCK(ndev_hash_lock); static DECLARE_HASHTABLE(ndev_hash, 5); static void free_netdevs(struct ib_device *ib_dev); +static void ib_unregister_work(struct work_struct *work); +static void __ib_unregister_device(struct ib_device *device); static int ib_security_change(struct notifier_block *nb, unsigned long event, void *lsm_data); static void ib_policy_change_task(struct work_struct *work); @@ -366,6 +368,7 @@ struct ib_device *_ib_alloc_device(size_t size) INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->event_handler_lock); + mutex_init(&device->unregistration_lock); /* * client_data needs to be alloc because we don't want our mark to be * destroyed if the user stores NULL in the client data. @@ -374,6 +377,7 @@ struct ib_device *_ib_alloc_device(size_t size) init_rwsem(&device->client_data_rwsem); INIT_LIST_HEAD(&device->port_list); init_completion(&device->unreg_completion); + INIT_WORK(&device->unregistration_work, ib_unregister_work); return device; } @@ -387,6 +391,20 @@ EXPORT_SYMBOL(_ib_alloc_device); */ void ib_dealloc_device(struct ib_device *device) { + if (device->ops.dealloc_driver) + device->ops.dealloc_driver(device); + + /* + * ib_unregister_driver() requires all devices to remain in the xarray + * while their ops are callable. The last op we call is dealloc_driver + * above. This is needed to create a fence on op callbacks prior to + * allowing the driver module to unload. + */ + down_write(&devices_rwsem); + if (xa_load(&devices, device->index) == device) + xa_erase(&devices, device->index); + up_write(&devices_rwsem); + /* Expedite releasing netdev references */ free_netdevs(device); @@ -599,7 +617,8 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event, } /* - * Assign the unique string device name and the unique device index. + * Assign the unique string device name and the unique device index. This is + * undone by ib_dealloc_device. */ static int assign_name(struct ib_device *device, const char *name) { @@ -640,13 +659,6 @@ out: return ret; } -static void release_name(struct ib_device *device) -{ - down_write(&devices_rwsem); - xa_erase(&devices, device->index); - up_write(&devices_rwsem); -} - static void setup_dma_device(struct ib_device *device) { struct device *parent = device->dev.parent; @@ -740,30 +752,38 @@ static void disable_device(struct ib_device *device) /* * An enabled device is visible to all clients and to all the public facing - * APIs that return a device pointer. + * APIs that return a device pointer. This always returns with a new get, even + * if it fails. */ -static int enable_device(struct ib_device *device) +static int enable_device_and_get(struct ib_device *device) { struct ib_client *client; unsigned long index; - int ret; + int ret = 0; - refcount_set(&device->refcount, 1); + /* + * One ref belongs to the xa and the other belongs to this + * thread. This is needed to guard against parallel unregistration. + */ + refcount_set(&device->refcount, 2); down_write(&devices_rwsem); xa_set_mark(&devices, device->index, DEVICE_REGISTERED); - up_write(&devices_rwsem); + + /* + * By using downgrade_write() we ensure that no other thread can clear + * DEVICE_REGISTERED while we are completing the client setup. + */ + downgrade_write(&devices_rwsem); down_read(&clients_rwsem); xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { ret = add_client_context(device, client); - if (ret) { - up_read(&clients_rwsem); - disable_device(device); - return ret; - } + if (ret) + break; } up_read(&clients_rwsem); - return 0; + up_read(&devices_rwsem); + return ret; } /** @@ -774,6 +794,10 @@ static int enable_device(struct ib_device *device) * devices with the IB core. All registered clients will receive a * callback for each device that is added. @device must be allocated * with ib_alloc_device(). + * + * If the driver uses ops.dealloc_driver and calls any ib_unregister_device() + * asynchronously then the device pointer may become freed as soon as this + * function returns. */ int ib_register_device(struct ib_device *device, const char *name) { @@ -785,13 +809,13 @@ int ib_register_device(struct ib_device *device, const char *name) ret = setup_device(device); if (ret) - goto out; + return ret; ret = ib_cache_setup_one(device); if (ret) { dev_warn(&device->dev, "Couldn't set up InfiniBand P_Key/GID cache\n"); - goto out; + return ret; } ib_device_register_rdmacg(device); @@ -807,42 +831,186 @@ int ib_register_device(struct ib_device *device, const char *name) goto dev_cleanup; } - ret = enable_device(device); - if (ret) - goto sysfs_cleanup; + ret = enable_device_and_get(device); + if (ret) { + void (*dealloc_fn)(struct ib_device *); + + /* + * If we hit this error flow then we don't want to + * automatically dealloc the device since the caller is + * expected to call ib_dealloc_device() after + * ib_register_device() fails. This is tricky due to the + * possibility for a parallel unregistration along with this + * error flow. Since we have a refcount here we know any + * parallel flow is stopped in disable_device and will see the + * NULL pointers, causing the responsibility to + * ib_dealloc_device() to revert back to this thread. + */ + dealloc_fn = device->ops.dealloc_driver; + device->ops.dealloc_driver = NULL; + ib_device_put(device); + __ib_unregister_device(device); + device->ops.dealloc_driver = dealloc_fn; + return ret; + } + ib_device_put(device); return 0; -sysfs_cleanup: - ib_device_unregister_sysfs(device); dev_cleanup: device_del(&device->dev); cg_cleanup: ib_device_unregister_rdmacg(device); ib_cache_cleanup_one(device); -out: - release_name(device); return ret; } EXPORT_SYMBOL(ib_register_device); +/* Callers must hold a get on the device. */ +static void __ib_unregister_device(struct ib_device *ib_dev) +{ + /* + * We have a registration lock so that all the calls to unregister are + * fully fenced, once any unregister returns the device is truely + * unregistered even if multiple callers are unregistering it at the + * same time. This also interacts with the registration flow and + * provides sane semantics if register and unregister are racing. + */ + mutex_lock(&ib_dev->unregistration_lock); + if (!refcount_read(&ib_dev->refcount)) + goto out; + + disable_device(ib_dev); + ib_device_unregister_sysfs(ib_dev); + device_del(&ib_dev->dev); + ib_device_unregister_rdmacg(ib_dev); + ib_cache_cleanup_one(ib_dev); + + /* + * Drivers using the new flow may not call ib_dealloc_device except + * in error unwind prior to registration success. + */ + if (ib_dev->ops.dealloc_driver) { + WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1); + ib_dealloc_device(ib_dev); + } +out: + mutex_unlock(&ib_dev->unregistration_lock); +} + /** * ib_unregister_device - Unregister an IB device - * @device:Device to unregister + * @device: The device to unregister * * Unregister an IB device. All clients will receive a remove callback. + * + * Callers should call this routine only once, and protect against races with + * registration. Typically it should only be called as part of a remove + * callback in an implementation of driver core's struct device_driver and + * related. + * + * If ops.dealloc_driver is used then ib_dev will be freed upon return from + * this function. */ -void ib_unregister_device(struct ib_device *device) +void ib_unregister_device(struct ib_device *ib_dev) { - disable_device(device); - ib_device_unregister_sysfs(device); - device_del(&device->dev); - ib_device_unregister_rdmacg(device); - ib_cache_cleanup_one(device); - release_name(device); + get_device(&ib_dev->dev); + __ib_unregister_device(ib_dev); + put_device(&ib_dev->dev); } EXPORT_SYMBOL(ib_unregister_device); +/** + * ib_unregister_device_and_put - Unregister a device while holding a 'get' + * device: The device to unregister + * + * This is the same as ib_unregister_device(), except it includes an internal + * ib_device_put() that should match a 'get' obtained by the caller. + * + * It is safe to call this routine concurrently from multiple threads while + * holding the 'get'. When the function returns the device is fully + * unregistered. + * + * Drivers using this flow MUST use the driver_unregister callback to clean up + * their resources associated with the device and dealloc it. + */ +void ib_unregister_device_and_put(struct ib_device *ib_dev) +{ + WARN_ON(!ib_dev->ops.dealloc_driver); + get_device(&ib_dev->dev); + ib_device_put(ib_dev); + __ib_unregister_device(ib_dev); + put_device(&ib_dev->dev); +} +EXPORT_SYMBOL(ib_unregister_device_and_put); + +/** + * ib_unregister_driver - Unregister all IB devices for a driver + * @driver_id: The driver to unregister + * + * This implements a fence for device unregistration. It only returns once all + * devices associated with the driver_id have fully completed their + * unregistration and returned from ib_unregister_device*(). + * + * If device's are not yet unregistered it goes ahead and starts unregistering + * them. + * + * This does not block creation of new devices with the given driver_id, that + * is the responsibility of the caller. + */ +void ib_unregister_driver(enum rdma_driver_id driver_id) +{ + struct ib_device *ib_dev; + unsigned long index; + + down_read(&devices_rwsem); + xa_for_each (&devices, index, ib_dev) { + if (ib_dev->driver_id != driver_id) + continue; + + get_device(&ib_dev->dev); + up_read(&devices_rwsem); + + WARN_ON(!ib_dev->ops.dealloc_driver); + __ib_unregister_device(ib_dev); + + put_device(&ib_dev->dev); + down_read(&devices_rwsem); + } + up_read(&devices_rwsem); +} +EXPORT_SYMBOL(ib_unregister_driver); + +static void ib_unregister_work(struct work_struct *work) +{ + struct ib_device *ib_dev = + container_of(work, struct ib_device, unregistration_work); + + __ib_unregister_device(ib_dev); + put_device(&ib_dev->dev); +} + +/** + * ib_unregister_device_queued - Unregister a device using a work queue + * device: The device to unregister + * + * This schedules an asynchronous unregistration using a WQ for the device. A + * driver should use this to avoid holding locks while doing unregistration, + * such as holding the RTNL lock. + * + * Drivers using this API must use ib_unregister_driver before module unload + * to ensure that all scheduled unregistrations have completed. + */ +void ib_unregister_device_queued(struct ib_device *ib_dev) +{ + WARN_ON(!refcount_read(&ib_dev->refcount)); + WARN_ON(!ib_dev->ops.dealloc_driver); + get_device(&ib_dev->dev); + if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work)) + put_device(&ib_dev->dev); +} +EXPORT_SYMBOL(ib_unregister_device_queued); + static int assign_client_id(struct ib_client *client) { int ret; @@ -1558,6 +1726,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, create_srq); SET_DEVICE_OP(dev_ops, create_wq); SET_DEVICE_OP(dev_ops, dealloc_dm); + SET_DEVICE_OP(dev_ops, dealloc_driver); SET_DEVICE_OP(dev_ops, dealloc_fmr); SET_DEVICE_OP(dev_ops, dealloc_mw); SET_DEVICE_OP(dev_ops, dealloc_pd); @@ -1744,6 +1913,7 @@ static void __exit ib_core_cleanup(void) destroy_workqueue(ib_comp_wq); /* Make sure that any pending umem accounting work is done. */ destroy_workqueue(ib_wq); + flush_workqueue(system_unbound_wq); WARN_ON(!xa_empty(&clients)); WARN_ON(!xa_empty(&devices)); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3aa802b65cf3..ad83f8c38dc8 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2538,6 +2538,12 @@ struct ib_device_ops { int (*fill_res_entry)(struct sk_buff *msg, struct rdma_restrack_entry *entry); + /* Device lifecycle callbacks */ + /* + * This is called as part of ib_dealloc_device(). + */ + void (*dealloc_driver)(struct ib_device *dev); + DECLARE_RDMA_OBJ_SIZE(ib_pd); }; @@ -2555,6 +2561,7 @@ struct ib_device { struct rw_semaphore client_data_rwsem; struct xarray client_data; + struct mutex unregistration_lock; struct ib_cache cache; /** @@ -2609,6 +2616,7 @@ struct ib_device { */ refcount_t refcount; struct completion unreg_completion; + struct work_struct unregistration_work; }; struct ib_client { @@ -2658,6 +2666,9 @@ void ib_get_device_fw_str(struct ib_device *device, char *str); int ib_register_device(struct ib_device *device, const char *name); void ib_unregister_device(struct ib_device *device); +void ib_unregister_driver(enum rdma_driver_id driver_id); +void ib_unregister_device_and_put(struct ib_device *device); +void ib_unregister_device_queued(struct ib_device *ib_dev); int ib_register_client (struct ib_client *client); void ib_unregister_client(struct ib_client *client); -- cgit v1.2.3-55-g7522 From 6cc2c8e535ec19153714cee62e11e4d9ac2ea953 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Feb 2019 21:12:55 -0700 Subject: RDMA/rxe: Add ib_device_get_by_name() and use it in rxe rxe has an open coded version of this that is not as safe as the core version. This lets us eliminate the internal device list entirely from rxe. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 28 ++++++++++++++++++++++++++++ drivers/infiniband/sw/rxe/rxe.c | 3 --- drivers/infiniband/sw/rxe/rxe.h | 2 -- drivers/infiniband/sw/rxe/rxe_net.c | 22 ---------------------- drivers/infiniband/sw/rxe/rxe_sysfs.c | 9 ++++----- drivers/infiniband/sw/rxe/rxe_verbs.h | 1 - 6 files changed, 32 insertions(+), 33 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index e470fa651961..2a7d54794ee3 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -240,6 +240,34 @@ static struct ib_device *__ib_device_get_by_name(const char *name) return NULL; } +/** + * ib_device_get_by_name - Find an IB device by name + * @name: The name to look for + * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all) + * + * Find and hold an ib_device by its name. The caller must call + * ib_device_put() on the returned pointer. + */ +struct ib_device *ib_device_get_by_name(const char *name, + enum rdma_driver_id driver_id) +{ + struct ib_device *device; + + down_read(&devices_rwsem); + device = __ib_device_get_by_name(name); + if (device && driver_id != RDMA_DRIVER_UNKNOWN && + device->driver_id != driver_id) + device = NULL; + + if (device) { + if (!ib_device_try_get(device)) + device = NULL; + } + up_read(&devices_rwsem); + return device; +} +EXPORT_SYMBOL(ib_device_get_by_name); + int ib_device_rename(struct ib_device *ibdev, const char *name) { int ret; diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index cd93c546c008..08a45ce9cab5 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -69,8 +69,6 @@ void rxe_dealloc(struct ib_device *ib_dev) if (rxe->tfm) crypto_free_shash(rxe->tfm); - - list_del(&rxe->list); } /* initialize rxe device parameters */ @@ -275,7 +273,6 @@ static int rxe_init(struct rxe_dev *rxe) spin_lock_init(&rxe->mmap_offset_lock); spin_lock_init(&rxe->pending_lock); INIT_LIST_HEAD(&rxe->pending_mmaps); - INIT_LIST_HEAD(&rxe->list); mutex_init(&rxe->usdev_lock); diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index ce4bf05cd855..63b3b89b894a 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -99,8 +99,6 @@ int rxe_add(struct rxe_dev *rxe, unsigned int mtu); void rxe_rcv(struct sk_buff *skb); -struct rxe_dev *get_rxe_by_name(const char *name); - /* The caller must do a matching ib_device_put(&dev->ib_dev) */ static inline struct rxe_dev *rxe_get_dev_from_net(struct net_device *ndev) { diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index d56a967ff90b..d6dfbcf6a47e 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -45,25 +45,6 @@ #include "rxe_net.h" #include "rxe_loc.h" -static LIST_HEAD(rxe_dev_list); -static DEFINE_SPINLOCK(dev_list_lock); /* spinlock for device list */ - -struct rxe_dev *get_rxe_by_name(const char *name) -{ - struct rxe_dev *rxe; - struct rxe_dev *found = NULL; - - spin_lock_bh(&dev_list_lock); - list_for_each_entry(rxe, &rxe_dev_list, list) { - if (!strcmp(name, dev_name(&rxe->ib_dev.dev))) { - found = rxe; - break; - } - } - spin_unlock_bh(&dev_list_lock); - return found; -} - static struct rxe_recv_sockets recv_sockets; struct device *rxe_dma_device(struct rxe_dev *rxe) @@ -553,9 +534,6 @@ struct rxe_dev *rxe_net_add(struct net_device *ndev) return NULL; } - spin_lock_bh(&dev_list_lock); - list_add_tail(&rxe->list, &rxe_dev_list); - spin_unlock_bh(&dev_list_lock); return rxe; } diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c index c8630638cd57..d51b55b0a311 100644 --- a/drivers/infiniband/sw/rxe/rxe_sysfs.c +++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c @@ -101,7 +101,7 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) { int len; char intf[32]; - struct rxe_dev *rxe; + struct ib_device *ib_dev; len = sanitize_arg(val, intf, sizeof(intf)); if (!len) { @@ -115,14 +115,13 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) return 0; } - rxe = get_rxe_by_name(intf); - - if (!rxe) { + ib_dev = ib_device_get_by_name(intf, RDMA_DRIVER_RXE); + if (!ib_dev) { pr_err("not configured on %s\n", intf); return -EINVAL; } - ib_unregister_device(&rxe->ib_dev); + ib_unregister_device_and_put(ib_dev); return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index ad5574d8e8ba..a22822526a62 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -411,7 +411,6 @@ struct rxe_dev { atomic64_t stats_counters[RXE_NUM_OF_COUNTERS]; struct rxe_port port; - struct list_head list; struct crypto_shash *tfm; }; -- cgit v1.2.3-55-g7522 From ca22354b140853b8155692d5b2bc0110aa54e937 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Feb 2019 21:12:56 -0700 Subject: RDMA/rxe: Close a race after ib_register_device Since rxe allows unregistration from other threads the rxe pointer can become invalid any moment after ib_register_driver returns. This could cause a user triggered use after free. Add another driver callback to be called right after the device becomes registered to complete any device setup required post-registration. This callback has enough core locking to prevent the device from becoming unregistered. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 9 +++++++++ drivers/infiniband/sw/rxe/rxe_net.c | 8 ++++---- drivers/infiniband/sw/rxe/rxe_net.h | 2 +- drivers/infiniband/sw/rxe/rxe_sysfs.c | 9 ++------- drivers/infiniband/sw/rxe/rxe_verbs.c | 14 ++++++++++++++ include/rdma/ib_verbs.h | 5 +++++ 6 files changed, 35 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 2a7d54794ee3..bf2a215d94dd 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -803,6 +803,12 @@ static int enable_device_and_get(struct ib_device *device) */ downgrade_write(&devices_rwsem); + if (device->ops.enable_driver) { + ret = device->ops.enable_driver(device); + if (ret) + goto out; + } + down_read(&clients_rwsem); xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { ret = add_client_context(device, client); @@ -810,6 +816,8 @@ static int enable_device_and_get(struct ib_device *device) break; } up_read(&clients_rwsem); + +out: up_read(&devices_rwsem); return ret; } @@ -1775,6 +1783,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, disassociate_ucontext); SET_DEVICE_OP(dev_ops, drain_rq); SET_DEVICE_OP(dev_ops, drain_sq); + SET_DEVICE_OP(dev_ops, enable_driver); SET_DEVICE_OP(dev_ops, fill_res_entry); SET_DEVICE_OP(dev_ops, get_dev_fw_str); SET_DEVICE_OP(dev_ops, get_dma_mr); diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index d6dfbcf6a47e..fb792f5bc0b7 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -517,24 +517,24 @@ enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num) return IB_LINK_LAYER_ETHERNET; } -struct rxe_dev *rxe_net_add(struct net_device *ndev) +int rxe_net_add(struct net_device *ndev) { int err; struct rxe_dev *rxe = NULL; rxe = ib_alloc_device(rxe_dev, ib_dev); if (!rxe) - return NULL; + return -ENOMEM; rxe->ndev = ndev; err = rxe_add(rxe, ndev->mtu); if (err) { ib_dealloc_device(&rxe->ib_dev); - return NULL; + return err; } - return rxe; + return 0; } static void rxe_port_event(struct rxe_dev *rxe, diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h index 106c586dbb26..ad79514191bb 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.h +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -43,7 +43,7 @@ struct rxe_recv_sockets { struct socket *sk6; }; -struct rxe_dev *rxe_net_add(struct net_device *ndev); +int rxe_net_add(struct net_device *ndev); int rxe_net_init(void); void rxe_net_exit(void); diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c index d51b55b0a311..46587eb0da0e 100644 --- a/drivers/infiniband/sw/rxe/rxe_sysfs.c +++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c @@ -60,7 +60,6 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp) char intf[32]; struct net_device *ndev; struct rxe_dev *exists; - struct rxe_dev *rxe; len = sanitize_arg(val, intf, sizeof(intf)); if (!len) { @@ -82,16 +81,12 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp) goto err; } - rxe = rxe_net_add(ndev); - if (!rxe) { + err = rxe_net_add(ndev); + if (err) { pr_err("failed to add %s\n", intf); - err = -EINVAL; goto err; } - rxe_set_port_state(rxe); - dev_info(&rxe->ib_dev.dev, "added %s\n", intf); - err: dev_put(ndev); return err; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 76da8a142bf7..79ad93b4140c 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1125,6 +1125,15 @@ static const struct attribute_group rxe_attr_group = { .attrs = rxe_dev_attributes, }; +static int rxe_enable_driver(struct ib_device *ib_dev) +{ + struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev); + + rxe_set_port_state(rxe); + dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev)); + return 0; +} + static const struct ib_device_ops rxe_dev_ops = { .alloc_hw_stats = rxe_ib_alloc_hw_stats, .alloc_mr = rxe_alloc_mr, @@ -1144,6 +1153,7 @@ static const struct ib_device_ops rxe_dev_ops = { .destroy_qp = rxe_destroy_qp, .destroy_srq = rxe_destroy_srq, .detach_mcast = rxe_detach_mcast, + .enable_driver = rxe_enable_driver, .get_dma_mr = rxe_get_dma_mr, .get_hw_stats = rxe_ib_get_hw_stats, .get_link_layer = rxe_get_link_layer, @@ -1245,5 +1255,9 @@ int rxe_register_device(struct rxe_dev *rxe) if (err) pr_warn("%s failed with error %d\n", __func__, err); + /* + * Note that rxe may be invalid at this point if another thread + * unregistered it. + */ return err; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index ad83f8c38dc8..640263289ab9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2539,6 +2539,11 @@ struct ib_device_ops { struct rdma_restrack_entry *entry); /* Device lifecycle callbacks */ + /* + * Called after the device becomes registered, before clients are + * attached + */ + int (*enable_driver)(struct ib_device *dev); /* * This is called as part of ib_dealloc_device(). */ -- cgit v1.2.3-55-g7522 From a2a074ef396f8738d9ee08ceefa8811381a4fe4f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 12 Feb 2019 20:39:16 +0200 Subject: RDMA: Handle ucontext allocations by IB/core Following the PD conversion patch, do the same for ucontext allocations. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/rdma_core.c | 9 +---- drivers/infiniband/core/uverbs_cmd.c | 24 +++++++----- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 32 +++++----------- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 7 ++-- drivers/infiniband/hw/bnxt_re/main.c | 1 + drivers/infiniband/hw/cxgb3/iwch_provider.c | 17 ++++----- drivers/infiniband/hw/cxgb4/provider.c | 26 +++++-------- drivers/infiniband/hw/hns/hns_roce_main.c | 26 +++++-------- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 50 +++++++------------------ drivers/infiniband/hw/mlx4/main.c | 30 ++++++--------- drivers/infiniband/hw/mlx5/main.c | 35 +++++++---------- drivers/infiniband/hw/mthca/mthca_provider.c | 39 +++++++------------ drivers/infiniband/hw/nes/nes_verbs.c | 32 ++++++---------- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 + drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 38 +++++++------------ drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 5 +-- drivers/infiniband/hw/qedr/main.c | 1 + drivers/infiniband/hw/qedr/verbs.c | 34 +++++------------ drivers/infiniband/hw/qedr/verbs.h | 4 +- drivers/infiniband/hw/usnic/usnic_ib_main.c | 1 + drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 18 +++------ drivers/infiniband/hw/usnic/usnic_ib_verbs.h | 5 +-- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 1 + drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 50 ++++++++----------------- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 5 +-- drivers/infiniband/sw/rdmavt/vt.c | 22 ++++------- drivers/infiniband/sw/rxe/rxe_pool.c | 1 + drivers/infiniband/sw/rxe/rxe_verbs.c | 14 +++---- drivers/infiniband/sw/rxe/rxe_verbs.h | 2 +- include/rdma/ib_verbs.h | 7 ++-- 31 files changed, 198 insertions(+), 340 deletions(-) (limited to 'drivers/infiniband/core/device.c') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index bf2a215d94dd..a9f29156e486 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1832,6 +1832,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, unmap_fmr); SET_OBJ_SIZE(dev_ops, ib_pd); + SET_OBJ_SIZE(dev_ops, ib_ucontext); } EXPORT_SYMBOL(ib_set_device_ops); diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 96f919fe86e7..778375ff664e 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -844,7 +844,6 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, { struct ib_ucontext *ucontext = ufile->ucontext; struct ib_device *ib_dev = ucontext->device; - int ret; /* * If we are closing the FD then the user mmap VMAs must have @@ -862,12 +861,8 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, rdma_restrack_del(&ucontext->res); - /* - * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove - * the error return. - */ - ret = ib_dev->ops.dealloc_ucontext(ucontext); - WARN_ON(ret); + ib_dev->ops.dealloc_ucontext(ucontext); + kfree(ucontext); ufile->ucontext = NULL; } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e44ac718f1cd..3128821ca36e 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -224,12 +224,13 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) if (ret) goto err; - ucontext = ib_dev->ops.alloc_ucontext(ib_dev, &attrs->driver_udata); - if (IS_ERR(ucontext)) { - ret = PTR_ERR(ucontext); + ucontext = rdma_zalloc_drv_obj(ib_dev, ib_ucontext); + if (!ucontext) { + ret = -ENOMEM; goto err_alloc; } + ucontext->res.type = RDMA_RESTRACK_CTX; ucontext->device = ib_dev; ucontext->cg_obj = cg_obj; /* ufile is required when some objects are released */ @@ -240,10 +241,6 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) mutex_init(&ucontext->per_mm_list_lock); INIT_LIST_HEAD(&ucontext->per_mm_list); - if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) - ucontext->invalidate_range = NULL; - - resp.num_comp_vectors = file->device->num_comp_vectors; ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) @@ -256,15 +253,22 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) goto err_fd; } + resp.num_comp_vectors = file->device->num_comp_vectors; + ret = uverbs_response(attrs, &resp, sizeof(resp)); if (ret) goto err_file; - fd_install(resp.async_fd, filp); + ret = ib_dev->ops.alloc_ucontext(ucontext, &attrs->driver_udata); + if (ret) + goto err_file; + if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) + ucontext->invalidate_range = NULL; - ucontext->res.type = RDMA_RESTRACK_CTX; rdma_restrack_uadd(&ucontext->res); + fd_install(resp.async_fd, filp); + /* * Make sure that ib_uverbs_get_ucontext() sees the pointer update * only after all writes to setup the ucontext have completed @@ -283,7 +287,7 @@ err_fd: put_unused_fd(resp.async_fd); err_free: - ib_dev->ops.dealloc_ucontext(ucontext); + kfree(ucontext); err_alloc: ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index f29f29aae537..24092911c2ac 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3671,13 +3671,14 @@ free_mr: return ERR_PTR(rc); } -struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) { + struct ib_device *ibdev = ctx->device; + struct bnxt_re_ucontext *uctx = + container_of(ctx, struct bnxt_re_ucontext, ib_uctx); struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; struct bnxt_re_uctx_resp resp; - struct bnxt_re_ucontext *uctx; u32 chip_met_rev_num = 0; int rc; @@ -3687,13 +3688,9 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev, if (ibdev->uverbs_abi_ver != BNXT_RE_ABI_VERSION) { dev_dbg(rdev_to_dev(rdev), " is different from the device %d ", BNXT_RE_ABI_VERSION); - return ERR_PTR(-EPERM); + return -EPERM; } - uctx = kzalloc(sizeof(*uctx), GFP_KERNEL); - if (!uctx) - return ERR_PTR(-ENOMEM); - uctx->rdev = rdev; uctx->shpg = (void *)__get_free_page(GFP_KERNEL); @@ -3727,23 +3724,21 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev, goto cfail; } - return &uctx->ib_uctx; + return 0; cfail: free_page((unsigned long)uctx->shpg); uctx->shpg = NULL; fail: - kfree(uctx); - return ERR_PTR(rc); + return rc; } -int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx) +void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx) { struct bnxt_re_ucontext *uctx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx); struct bnxt_re_dev *rdev = uctx->rdev; - int rc = 0; if (uctx->shpg) free_page((unsigned long)uctx->shpg); @@ -3752,17 +3747,10 @@ int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx) /* Free DPI only if this is the first PD allocated by the * application and mark the context dpi as NULL */ - rc = bnxt_qplib_dealloc_dpi(&rdev->qplib_res, - &rdev->qplib_res.dpi_tbl, - &uctx->dpi); - if (rc) - dev_err(rdev_to_dev(rdev), "Deallocate HW DPI failed!"); - /* Don't fail, continue*/ + bnxt_qplib_dealloc_dpi(&rdev->qplib_res, + &rdev->qplib_res.dpi_tbl, &uctx->dpi); uctx->dpi.dbr = NULL; } - - kfree(uctx); - return 0; } /* Helper function to mmap the virtual memory from user app */ diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index c7cca803cfa3..e45465ed4eee 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -135,8 +135,8 @@ struct bnxt_re_mw { }; struct bnxt_re_ucontext { + struct ib_ucontext ib_uctx; struct bnxt_re_dev *rdev; - struct ib_ucontext ib_uctx; struct bnxt_qplib_dpi dpi; void *shpg; spinlock_t sh_lock; /* protect shpg */ @@ -215,9 +215,8 @@ int bnxt_re_dealloc_mw(struct ib_mw *mw); struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); -struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata); -int bnxt_re_dealloc_ucontext(struct ib_ucontext *context); +int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata); +void bnxt_re_dealloc_ucontext(struct ib_ucontext *context); int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 0a89ef6e5754..2bd24ac45ee4 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -638,6 +638,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .reg_user_mr = bnxt_re_reg_user_mr, .req_notify_cq = bnxt_re_req_notify_cq, INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx), }; static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index b74fd90a22dc..4accf7b3dcf2 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -62,7 +62,7 @@ #include #include "common.h" -static int iwch_dealloc_ucontext(struct ib_ucontext *context) +static void iwch_dealloc_ucontext(struct ib_ucontext *context) { struct iwch_dev *rhp = to_iwch_dev(context->device); struct iwch_ucontext *ucontext = to_iwch_ucontext(context); @@ -72,24 +72,20 @@ static int iwch_dealloc_ucontext(struct ib_ucontext *context) list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) kfree(mm); cxio_release_ucontext(&rhp->rdev, &ucontext->uctx); - kfree(ucontext); - return 0; } -static struct ib_ucontext *iwch_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +static int iwch_alloc_ucontext(struct ib_ucontext *ucontext, + struct ib_udata *udata) { - struct iwch_ucontext *context; + struct ib_device *ibdev = ucontext->device; + struct iwch_ucontext *context = to_iwch_ucontext(ucontext); struct iwch_dev *rhp = to_iwch_dev(ibdev); pr_debug("%s ibdev %p\n", __func__, ibdev); - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); cxio_init_ucontext(&rhp->rdev, &context->uctx); INIT_LIST_HEAD(&context->mmaps); spin_lock_init(&context->mmap_lock); - return &context->ibucontext; + return 0; } static int iwch_destroy_cq(struct ib_cq *ib_cq) @@ -1342,6 +1338,7 @@ static const struct ib_device_ops iwch_dev_ops = { .req_notify_cq = iwch_arm_cq, .resize_cq = iwch_resize_cq, INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext), }; int iwch_register_device(struct iwch_dev *dev) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 81fcffb597ab..507c54572cc9 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -58,7 +58,7 @@ static int fastreg_support = 1; module_param(fastreg_support, int, 0644); MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); -static int c4iw_dealloc_ucontext(struct ib_ucontext *context) +static void c4iw_dealloc_ucontext(struct ib_ucontext *context) { struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); struct c4iw_dev *rhp; @@ -70,26 +70,19 @@ static int c4iw_dealloc_ucontext(struct ib_ucontext *context) list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) kfree(mm); c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx); - kfree(ucontext); - return 0; } -static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +static int c4iw_alloc_ucontext(struct ib_ucontext *ucontext, + struct ib_udata *udata) { - struct c4iw_ucontext *context; + struct ib_device *ibdev = ucontext->device; + struct c4iw_ucontext *context = to_c4iw_ucontext(ucontext); struct c4iw_dev *rhp = to_c4iw_dev(ibdev); struct c4iw_alloc_ucontext_resp uresp; int ret = 0; struct c4iw_mm_entry *mm = NULL; pr_debug("ibdev %p\n", ibdev); - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) { - ret = -ENOMEM; - goto err; - } - c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx); INIT_LIST_HEAD(&context->mmaps); spin_lock_init(&context->mmap_lock); @@ -101,7 +94,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, mm = kmalloc(sizeof(*mm), GFP_KERNEL); if (!mm) { ret = -ENOMEM; - goto err_free; + goto err; } uresp.status_page_size = PAGE_SIZE; @@ -121,13 +114,11 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, mm->len = PAGE_SIZE; insert_mmap(context, mm); } - return &context->ibucontext; + return 0; err_mm: kfree(mm); -err_free: - kfree(context); err: - return ERR_PTR(ret); + return ret; } static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) @@ -555,6 +546,7 @@ static const struct ib_device_ops c4iw_dev_ops = { .reg_user_mr = c4iw_reg_user_mr, .req_notify_cq = c4iw_arm_cq, INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext), }; void c4iw_register_device(struct work_struct *work) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 29fb4fbba5ba..c929125da84b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -335,23 +335,19 @@ static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask, return 0; } -static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev, - struct ib_udata *udata) +static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, + struct ib_udata *udata) { int ret = 0; - struct hns_roce_ucontext *context; + struct hns_roce_ucontext *context = to_hr_ucontext(uctx); struct hns_roce_ib_alloc_ucontext_resp resp = {}; - struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); if (!hr_dev->active) - return ERR_PTR(-EAGAIN); + return -EAGAIN; resp.qp_tab_size = hr_dev->caps.num_qps; - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); - ret = hns_roce_uar_alloc(hr_dev, &context->uar); if (ret) goto error_fail_uar_alloc; @@ -365,25 +361,20 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev, if (ret) goto error_fail_copy_to_udata; - return &context->ibucontext; + return 0; error_fail_copy_to_udata: hns_roce_uar_free(hr_dev, &context->uar); error_fail_uar_alloc: - kfree(context); - - return ERR_PTR(ret); + return ret; } -static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) +static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar); - kfree(context); - - return 0; } static int hns_roce_mmap(struct ib_ucontext *context, @@ -478,6 +469,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .query_port = hns_roce_query_port, .reg_user_mr = hns_roce_reg_user_mr, INIT_RDMA_OBJ_SIZE(ib_pd, hns_roce_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, hns_roce_ucontext, ibucontext), }; static const struct ib_device_ops hns_roce_dev_mr_ops = { diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 76b4d1218696..a8352e3ca23d 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -121,78 +121,55 @@ static int i40iw_query_port(struct ib_device *ibdev, /** * i40iw_alloc_ucontext - Allocate the user context data structure - * @ibdev: device pointer from stack + * @uctx: Uverbs context pointer from stack * @udata: user data * * This keeps track of all objects associated with a particular * user-mode client. */ -static struct ib_ucontext *i40iw_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +static int i40iw_alloc_ucontext(struct ib_ucontext *uctx, + struct ib_udata *udata) { + struct ib_device *ibdev = uctx->device; struct i40iw_device *iwdev = to_iwdev(ibdev); struct i40iw_alloc_ucontext_req req; - struct i40iw_alloc_ucontext_resp uresp; - struct i40iw_ucontext *ucontext; + struct i40iw_alloc_ucontext_resp uresp = {}; + struct i40iw_ucontext *ucontext = to_ucontext(uctx); if (ib_copy_from_udata(&req, udata, sizeof(req))) - return ERR_PTR(-EINVAL); + return -EINVAL; if (req.userspace_ver < 4 || req.userspace_ver > I40IW_ABI_VER) { i40iw_pr_err("Unsupported provider library version %u.\n", req.userspace_ver); - return ERR_PTR(-EINVAL); + return -EINVAL; } - memset(&uresp, 0, sizeof(uresp)); uresp.max_qps = iwdev->max_qp; uresp.max_pds = iwdev->max_pd; uresp.wq_size = iwdev->max_qp_wr * 2; uresp.kernel_ver = req.userspace_ver; - ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL); - if (!ucontext) - return ERR_PTR(-ENOMEM); - ucontext->iwdev = iwdev; ucontext->abi_ver = req.userspace_ver; - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { - kfree(ucontext); - return ERR_PTR(-EFAULT); - } + if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) + return -EFAULT; INIT_LIST_HEAD(&ucontext->cq_reg_mem_list); spin_lock_init(&ucontext->cq_reg_mem_list_lock); INIT_LIST_HEAD(&ucontext->qp_reg_mem_list); spin_lock_init(&ucontext->qp_reg_mem_list_lock); - return &ucontext->ibucontext; + return 0; } /** * i40iw_dealloc_ucontext - deallocate the user context data structure * @context: user context created during alloc */ -static int i40iw_dealloc_ucontext(struct ib_ucontext *context) +static void i40iw_dealloc_ucontext(struct ib_ucontext *context) { - struct i40iw_ucontext *ucontext = to_ucontext(context); - unsigned long flags; - - spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); - if (!list_empty(&ucontext->cq_reg_mem_list)) { - spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); - return -EBUSY; - } - spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); - spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); - if (!list_empty(&ucontext->qp_reg_mem_list)) { - spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); - return -EBUSY; - } - spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); - - kfree(ucontext); - return 0; + return; } /** @@ -2740,6 +2717,7 @@ static const struct ib_device_ops i40iw_dev_ops = { .reg_user_mr = i40iw_reg_user_mr, .req_notify_cq = i40iw_req_notify_cq, INIT_RDMA_OBJ_SIZE(ib_pd, i40iw_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, i40iw_ucontext, ibucontext), }; /** diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c0f6aea7ed7c..733f7bbd5901 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1076,17 +1076,18 @@ out: return err; } -static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx, + struct ib_udata *udata) { + struct ib_device *ibdev = uctx->device; struct mlx4_ib_dev *dev = to_mdev(ibdev); - struct mlx4_ib_ucontext *context; + struct mlx4_ib_ucontext *context = to_mucontext(uctx); struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3; struct mlx4_ib_alloc_ucontext_resp resp; int err; if (!dev->ib_active) - return ERR_PTR(-EAGAIN); + return -EAGAIN; if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) { resp_v3.qp_tab_size = dev->dev->caps.num_qps; @@ -1100,15 +1101,9 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, resp.cqe_size = dev->dev->caps.cqe_size; } - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); - err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar); - if (err) { - kfree(context); - return ERR_PTR(err); - } + if (err) + return err; INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); @@ -1123,21 +1118,17 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, if (err) { mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); - kfree(context); - return ERR_PTR(-EFAULT); + return -EFAULT; } - return &context->ibucontext; + return err; } -static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +static void mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx4_ib_ucontext *context = to_mucontext(ibcontext); mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar); - kfree(context); - - return 0; } static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) @@ -2570,6 +2561,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = { .rereg_user_mr = mlx4_ib_rereg_user_mr, .resize_cq = mlx4_ib_resize_cq, INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext), }; static const struct ib_device_ops mlx4_ib_dev_wq_ops = { diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 614b2acbc621..994c19d01211 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1745,14 +1745,15 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn, mlx5_ib_disable_lb(dev, true, false); } -static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, + struct ib_udata *udata) { + struct ib_device *ibdev = uctx->device; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_alloc_ucontext_req_v2 req = {}; struct mlx5_ib_alloc_ucontext_resp resp = {}; struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_ib_ucontext *context; + struct mlx5_ib_ucontext *context = to_mucontext(uctx); struct mlx5_bfreg_info *bfregi; int ver; int err; @@ -1762,29 +1763,29 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, bool lib_uar_4k; if (!dev->ib_active) - return ERR_PTR(-EAGAIN); + return -EAGAIN; if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) ver = 0; else if (udata->inlen >= min_req_v2) ver = 2; else - return ERR_PTR(-EINVAL); + return -EINVAL; err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); if (err) - return ERR_PTR(err); + return err; if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; req.total_num_bfregs = ALIGN(req.total_num_bfregs, MLX5_NON_FP_BFREGS_PER_UAR); if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) - return ERR_PTR(-EINVAL); + return -EINVAL; resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) @@ -1817,10 +1818,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ } - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); - lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; bfregi = &context->bfregi; @@ -1955,7 +1952,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, 1, &dev->roce[port].tx_port_affinity)); } - return &context->ibucontext; + return 0; out_mdev: mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); @@ -1973,12 +1970,10 @@ out_count: kfree(bfregi->count); out_ctx: - kfree(context); - - return ERR_PTR(err); + return err; } -static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); @@ -1998,9 +1993,6 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) deallocate_uars(dev, context); kfree(bfregi->sys_pages); kfree(bfregi->count); - kfree(context); - - return 0; } static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, @@ -5984,6 +5976,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), }; static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 80c3af217d96..d063d7a37762 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -301,17 +301,16 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port, return err; } -static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +static int mthca_alloc_ucontext(struct ib_ucontext *uctx, + struct ib_udata *udata) { - struct mthca_alloc_ucontext_resp uresp; - struct mthca_ucontext *context; + struct ib_device *ibdev = uctx->device; + struct mthca_alloc_ucontext_resp uresp = {}; + struct mthca_ucontext *context = to_mucontext(uctx); int err; if (!(to_mdev(ibdev)->active)) - return ERR_PTR(-EAGAIN); - - memset(&uresp, 0, sizeof uresp); + return -EAGAIN; uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps; if (mthca_is_memfree(to_mdev(ibdev))) @@ -319,44 +318,33 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, else uresp.uarc_size = 0; - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); - err = mthca_uar_alloc(to_mdev(ibdev), &context->uar); - if (err) { - kfree(context); - return ERR_PTR(err); - } + if (err) + return err; context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev)); if (IS_ERR(context->db_tab)) { err = PTR_ERR(context->db_tab); mthca_uar_free(to_mdev(ibdev), &context->uar); - kfree(context); - return ERR_PTR(err); + return err; } - if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { + if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab); mthca_uar_free(to_mdev(ibdev), &context->uar); - kfree(context); - return ERR_PTR(-EFAULT); + return -EFAULT; } context->reg_mr_warned = 0; - return &context->ibucontext; + return 0; } -static int mthca_dealloc_ucontext(struct ib_ucontext *context) +static void mthca_dealloc_ucontext(struct ib_ucontext *context) { mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar, to_mucontext(context)->db_tab); mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar); - kfree(to_mucontext(context)); - - return 0; } static int mthca_mmap_uar(struct ib_ucontext *context, @@ -1213,6 +1201,7 @@ static const struct ib_device_ops mthca_dev_ops = { .reg_user_mr = mthca_reg_user_mr, .resize_cq = mthca_resize_cq, INIT_RDMA_OBJ_SIZE(ib_pd, mthca_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, mthca_ucontext, ibucontext), }; static const struct ib_device_ops mthca_dev_arbel_srq_ops = { diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index b23956aa45b8..828e4af3f951 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -529,27 +529,27 @@ static int nes_query_gid(struct ib_device *ibdev, u8 port, * nes_alloc_ucontext - Allocate the user context data structure. This keeps track * of all objects associated with a particular user-mode client. */ -static struct ib_ucontext *nes_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +static int nes_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { + struct ib_device *ibdev = uctx->device; struct nes_vnic *nesvnic = to_nesvnic(ibdev); struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; struct nes_alloc_ucontext_req req; struct nes_alloc_ucontext_resp uresp = {}; - struct nes_ucontext *nes_ucontext; + struct nes_ucontext *nes_ucontext = to_nesucontext(uctx); struct nes_ib_device *nesibdev = nesvnic->nesibdev; if (ib_copy_from_udata(&req, udata, sizeof(struct nes_alloc_ucontext_req))) { printk(KERN_ERR PFX "Invalid structure size on allocate user context.\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (req.userspace_ver != NES_ABI_USERSPACE_VER) { printk(KERN_ERR PFX "Invalid userspace driver version detected. Detected version %d, should be %d\n", req.userspace_ver, NES_ABI_USERSPACE_VER); - return ERR_PTR(-EINVAL); + return -EINVAL; } @@ -559,10 +559,6 @@ static struct ib_ucontext *nes_alloc_ucontext(struct ib_device *ibdev, uresp.virtwq = nesadapter->virtwq; uresp.kernel_ver = NES_ABI_KERNEL_VER; - nes_ucontext = kzalloc(sizeof *nes_ucontext, GFP_KERNEL); - if (!nes_ucontext) - return ERR_PTR(-ENOMEM); - nes_ucontext->nesdev = nesdev; nes_ucontext->mmap_wq_offset = uresp.max_pds; nes_ucontext->mmap_cq_offset = nes_ucontext->mmap_wq_offset + @@ -570,29 +566,22 @@ static struct ib_ucontext *nes_alloc_ucontext(struct ib_device *ibdev, PAGE_SIZE; - if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { - kfree(nes_ucontext); - return ERR_PTR(-EFAULT); - } + if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) + return -EFAULT; INIT_LIST_HEAD(&nes_ucontext->cq_reg_mem_list); INIT_LIST_HEAD(&nes_ucontext->qp_reg_mem_list); - return &nes_ucontext->ibucontext; + return 0; } - /** * nes_dealloc_ucontext */ -static int nes_dealloc_ucontext(struct ib_ucontext *context) +static void nes_dealloc_ucontext(struct ib_ucontext *context) { - struct nes_ucontext *nes_ucontext = to_nesucontext(context); - - kfree(nes_ucontext); - return 0; + return; } - /** * nes_mmap */ @@ -3599,6 +3588,7 @@ static const struct ib_device_ops nes_dev_ops = { .reg_user_mr = nes_reg_user_mr, .req_notify_cq = nes_req_notify_cq, INIT_RDMA_OBJ_SIZE(ib_pd, nes_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, nes_ucontext, ibucontext), }; /** diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 0de83c92691f..b9e10d55a58e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -180,6 +180,7 @@ static const struct ib_device_ops ocrdma_dev_ops = { .req_notify_cq = ocrdma_arm_cq, .resize_cq = ocrdma_resize_cq, INIT_RDMA_OBJ_SIZE(ib_pd, ocrdma_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, ocrdma_ucontext, ibucontext), }; static const struct ib_device_ops ocrdma_dev_srq_ops = { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index ed5da67b693d..b4e1777c2c97 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -440,7 +440,7 @@ err: return status; } -static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) +static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) { struct ocrdma_pd *pd = uctx->cntxt_pd; struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); @@ -451,8 +451,7 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) } kfree(uctx->cntxt_pd); uctx->cntxt_pd = NULL; - (void)_ocrdma_dealloc_pd(dev, pd); - return 0; + _ocrdma_dealloc_pd(dev, pd); } static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx) @@ -476,33 +475,28 @@ static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx) mutex_unlock(&uctx->mm_list_lock); } -struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +int ocrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { + struct ib_device *ibdev = uctx->device; int status; - struct ocrdma_ucontext *ctx; - struct ocrdma_alloc_ucontext_resp resp; + struct ocrdma_ucontext *ctx = get_ocrdma_ucontext(uctx); + struct ocrdma_alloc_ucontext_resp resp = {}; struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); struct pci_dev *pdev = dev->nic_info.pdev; u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE); if (!udata) - return ERR_PTR(-EFAULT); - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return ERR_PTR(-ENOMEM); + return -EFAULT; INIT_LIST_HEAD(&ctx->mm_head); mutex_init(&ctx->mm_list_lock); ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len, &ctx->ah_tbl.pa, GFP_KERNEL); - if (!ctx->ah_tbl.va) { - kfree(ctx); - return ERR_PTR(-ENOMEM); - } + if (!ctx->ah_tbl.va) + return -ENOMEM; + ctx->ah_tbl.len = map_len; - memset(&resp, 0, sizeof(resp)); resp.ah_tbl_len = ctx->ah_tbl.len; resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va); @@ -524,7 +518,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, status = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (status) goto cpy_err; - return &ctx->ibucontext; + return 0; cpy_err: ocrdma_dealloc_ucontext_pd(ctx); @@ -533,19 +527,17 @@ pd_err: map_err: dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va, ctx->ah_tbl.pa); - kfree(ctx); - return ERR_PTR(status); + return status; } -int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) +void ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) { - int status; struct ocrdma_mm *mm, *tmp; struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx); struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device); struct pci_dev *pdev = dev->nic_info.pdev; - status = ocrdma_dealloc_ucontext_pd(uctx); + ocrdma_dealloc_ucontext_pd(uctx); ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len); dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va, @@ -555,8 +547,6 @@ int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) list_del(&mm->entry); kfree(mm); } - kfree(uctx); - return status; } int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 1fd66721c930..4c04ab40798e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -64,9 +64,8 @@ void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid); struct net_device *ocrdma_get_netdev(struct ib_device *device, u8 port_num); int ocrdma_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey); -struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *, - struct ib_udata *); -int ocrdma_dealloc_ucontext(struct ib_ucontext *); +int ocrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); +void ocrdma_dealloc_ucontext(struct ib_ucontext *uctx); int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 44ce4989dcef..996d9ecd93e0 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -240,6 +240,7 @@ static const struct ib_device_ops qedr_dev_ops = { .req_notify_cq = qedr_arm_cq, .resize_cq = qedr_resize_cq, INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext), }; static int qedr_register_device(struct qedr_dev *dev) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index d51bc3ede9d1..59ad4202422c 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -316,28 +316,24 @@ static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr, return found; } -struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { + struct ib_device *ibdev = uctx->device; int rc; - struct qedr_ucontext *ctx; - struct qedr_alloc_ucontext_resp uresp; + struct qedr_ucontext *ctx = get_qedr_ucontext(uctx); + struct qedr_alloc_ucontext_resp uresp = {}; struct qedr_dev *dev = get_qedr_dev(ibdev); struct qed_rdma_add_user_out_params oparams; if (!udata) - return ERR_PTR(-EFAULT); - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return ERR_PTR(-ENOMEM); + return -EFAULT; rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams); if (rc) { DP_ERR(dev, "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n", rc); - goto err; + return rc; } ctx->dpi = oparams.dpi; @@ -347,8 +343,6 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev, INIT_LIST_HEAD(&ctx->mm_head); mutex_init(&ctx->mm_list_lock); - memset(&uresp, 0, sizeof(uresp)); - uresp.dpm_enabled = dev->user_dpm_enabled; uresp.wids_enabled = 1; uresp.wid_count = oparams.wid_count; @@ -364,28 +358,23 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev, rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) - goto err; + return rc; ctx->dev = dev; rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size); if (rc) - goto err; + return rc; DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n", &ctx->ibucontext); - return &ctx->ibucontext; - -err: - kfree(ctx); - return ERR_PTR(rc); + return 0; } -int qedr_dealloc_ucontext(struct ib_ucontext *ibctx) +void qedr_dealloc_ucontext(struct ib_ucontext *ibctx) { struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx); struct qedr_mm *mm, *tmp; - int status = 0; DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n", uctx); @@ -398,9 +387,6 @@ int qedr_dealloc_ucontext(struct ib_ucontext *ibctx) list_del(&mm->entry); kfree(mm); } - - kfree(uctx); - return status; } int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 97a6ff3f9afb..f0c05f4771ac 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -43,8 +43,8 @@ int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey); -struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *, struct ib_udata *); -int qedr_dealloc_ucontext(struct ib_ucontext *); +int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); +void qedr_dealloc_ucontext(struct ib_ucontext *uctx); int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); int qedr_alloc_pd(struct ib_pd *pd, struct ib_ucontext *uctx, diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index be6468021a9a..d88d9f8a7f9a 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -350,6 +350,7 @@ static const struct ib_device_ops usnic_dev_ops = { .query_qp = usnic_ib_query_qp, .reg_user_mr = usnic_ib_reg_mr, INIT_RDMA_OBJ_SIZE(ib_pd, usnic_ib_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, usnic_ib_ucontext, ibucontext), }; /* Start of PF discovery section */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 7549ae23027e..bd4521b2cc5f 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -653,37 +653,31 @@ int usnic_ib_dereg_mr(struct ib_mr *ibmr) return 0; } -struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +int usnic_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { - struct usnic_ib_ucontext *context; + struct ib_device *ibdev = uctx->device; + struct usnic_ib_ucontext *context = to_ucontext(uctx); struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); usnic_dbg("\n"); - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&context->qp_grp_list); mutex_lock(&us_ibdev->usdev_lock); list_add_tail(&context->link, &us_ibdev->ctx_list); mutex_unlock(&us_ibdev->usdev_lock); - return &context->ibucontext; + return 0; } -int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +void usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct usnic_ib_ucontext *context = to_uucontext(ibcontext); struct usnic_ib_dev *us_ibdev = to_usdev(ibcontext->device); usnic_dbg("\n"); mutex_lock(&us_ibdev->usdev_lock); - BUG_ON(!list_empty(&context->qp_grp_list)); + WARN_ON_ONCE(!list_empty(&context->qp_grp_list)); list_del(&context->link); mutex_unlock(&us_ibdev->usdev_lock); - kfree(context); - return 0; } int usnic_ib_mmap(struct ib_ucontext *context, diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 2a87650949f6..c40e89b6246f 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -68,9 +68,8 @@ struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int usnic_ib_dereg_mr(struct ib_mr *ibmr); -struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata); -int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); +int usnic_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); +void usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); int usnic_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); #endif /* !USNIC_IB_VERBS_H */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 47e653d2495c..6d8b3e0de57a 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -196,6 +196,7 @@ static const struct ib_device_ops pvrdma_dev_ops = { .reg_user_mr = pvrdma_reg_user_mr, .req_notify_cq = pvrdma_req_notify_cq, INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, pvrdma_ucontext, ibucontext), }; static const struct ib_device_ops pvrdma_dev_srq_ops = { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index f44220f72e05..42fe821f8d58 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -306,41 +306,32 @@ out: /** * pvrdma_alloc_ucontext - allocate ucontext - * @ibdev: the IB device + * @uctx: the uverbs countext * @udata: user data * - * @return: the ib_ucontext pointer on success, otherwise errno. + * @return: zero on success, otherwise errno. */ -struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { + struct ib_device *ibdev = uctx->device; struct pvrdma_dev *vdev = to_vdev(ibdev); - struct pvrdma_ucontext *context; - union pvrdma_cmd_req req; - union pvrdma_cmd_resp rsp; + struct pvrdma_ucontext *context = to_vucontext(uctx); + union pvrdma_cmd_req req = {}; + union pvrdma_cmd_resp rsp = {}; struct pvrdma_cmd_create_uc *cmd = &req.create_uc; struct pvrdma_cmd_create_uc_resp *resp = &rsp.create_uc_resp; - struct pvrdma_alloc_ucontext_resp uresp = {0}; + struct pvrdma_alloc_ucontext_resp uresp = {}; int ret; - void *ptr; if (!vdev->ib_active) - return ERR_PTR(-EAGAIN); - - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); + return -EAGAIN; context->dev = vdev; ret = pvrdma_uar_alloc(vdev, &context->uar); - if (ret) { - kfree(context); - return ERR_PTR(-ENOMEM); - } + if (ret) + return -ENOMEM; /* get ctx_handle from host */ - memset(cmd, 0, sizeof(*cmd)); - if (vdev->dsr_version < PVRDMA_PPN64_VERSION) cmd->pfn = context->uar.pfn; else @@ -351,7 +342,6 @@ struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev, if (ret < 0) { dev_warn(&vdev->pdev->dev, "could not create ucontext, error: %d\n", ret); - ptr = ERR_PTR(ret); goto err; } @@ -362,33 +352,28 @@ struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev, ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (ret) { pvrdma_uar_free(vdev, &context->uar); - context->ibucontext.device = ibdev; pvrdma_dealloc_ucontext(&context->ibucontext); - return ERR_PTR(-EFAULT); + return -EFAULT; } - return &context->ibucontext; + return 0; err: pvrdma_uar_free(vdev, &context->uar); - kfree(context); - return ptr; + return ret; } /** * pvrdma_dealloc_ucontext - deallocate ucontext * @ibcontext: the ucontext - * - * @return: 0 on success, otherwise errno. */ -int pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext) +void pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct pvrdma_ucontext *context = to_vucontext(ibcontext); - union pvrdma_cmd_req req; + union pvrdma_cmd_req req = {}; struct pvrdma_cmd_destroy_uc *cmd = &req.destroy_uc; int ret; - memset(cmd, 0, sizeof(*cmd)); cmd->hdr.cmd = PVRDMA_CMD_DESTROY_UC; cmd->ctx_handle = context->ctx_handle; @@ -399,9 +384,6 @@ int pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext) /* Free the UAR even if the device command failed */ pvrdma_uar_free(to_vdev(ibcontext->device), &context->uar); - kfree(context); - - return ret; } /** diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index ed91baad1ffa..607aa131d67c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -396,9 +396,8 @@ int pvrdma_modify_device(struct ib_device *ibdev, int mask, int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props); int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); -struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata); -int pvrdma_dealloc_ucontext(struct ib_ucontext *context); +int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); +void pvrdma_dealloc_ucontext(struct ib_ucontext *context); int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, struct ib_udata *udata); void pvrdma_dealloc_pd(struct ib_pd *ibpd); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index ee9c82cb3b6b..42c9d35f832d 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -292,28 +292,21 @@ static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext /** * rvt_alloc_ucontext - Allocate a user context - * @ibdev: Verbs IB dev + * @uctx: Verbs context * @udata: User data allocated */ -static struct ib_ucontext *rvt_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +static int rvt_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { - struct rvt_ucontext *context; - - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); - return &context->ibucontext; + return 0; } /** - *rvt_dealloc_ucontext - Free a user context - *@context - Free this + * rvt_dealloc_ucontext - Free a user context + * @context - Free this */ -static int rvt_dealloc_ucontext(struct ib_ucontext *context) +static void rvt_dealloc_ucontext(struct ib_ucontext *context) { - kfree(to_iucontext(context)); - return 0; + return; } static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, @@ -433,6 +426,7 @@ static const struct ib_device_ops rvt_dev_ops = { .resize_cq = rvt_resize_cq, .unmap_fmr = rvt_unmap_fmr, INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, rvt_ucontext, ibucontext), }; static noinline int check_support(struct rvt_dev_info *rdi, int verb) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index c9b8b8c6bfb5..120fa9005954 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -42,6 +42,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_UC] = { .name = "rxe-uc", .size = sizeof(struct rxe_ucontext), + .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_PD] = { .name = "rxe-pd", diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index bd6a379b79d3..6ecf28570ff0 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -142,22 +142,19 @@ static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, return rxe_link_layer(rxe, port_num); } -static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev, - struct ib_udata *udata) +static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { - struct rxe_dev *rxe = to_rdev(dev); - struct rxe_ucontext *uc; + struct rxe_dev *rxe = to_rdev(uctx->device); + struct rxe_ucontext *uc = to_ruc(uctx); - uc = rxe_alloc(&rxe->uc_pool); - return uc ? &uc->ibuc : ERR_PTR(-ENOMEM); + return rxe_add_to_pool(&rxe->uc_pool, &uc->pelem); } -static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc) +static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc) { struct rxe_ucontext *uc = to_ruc(ibuc); rxe_drop_ref(uc); - return 0; } static int rxe_port_immutable(struct ib_device *dev, u8 port_num, @@ -1180,6 +1177,7 @@ static const struct ib_device_ops rxe_dev_ops = { .req_notify_cq = rxe_req_notify_cq, .resize_cq = rxe_resize_cq, INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc), }; int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index d02eb75ef282..157e51aeb1e1 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -61,8 +61,8 @@ static inline int psn_compare(u32 psn_a, u32 psn_b) } struct rxe_ucontext { + struct ib_ucontext ibuc; struct rxe_pool_entry pelem; - struct ib_ucontext ibuc; }; struct rxe_pd { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 225cb76d469f..9b9e17bcc201 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2389,9 +2389,9 @@ struct ib_device_ops { int (*del_gid)(const struct ib_gid_attr *attr, void **context); int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); - struct ib_ucontext *(*alloc_ucontext)(struct ib_device *device, - struct ib_udata *udata); - int (*dealloc_ucontext)(struct ib_ucontext *context); + int (*alloc_ucontext)(struct ib_ucontext *context, + struct ib_udata *udata); + void (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); int (*alloc_pd)(struct ib_pd *pd, struct ib_ucontext *context, @@ -2551,6 +2551,7 @@ struct ib_device_ops { void (*dealloc_driver)(struct ib_device *dev); DECLARE_RDMA_OBJ_SIZE(ib_pd); + DECLARE_RDMA_OBJ_SIZE(ib_ucontext); }; struct rdma_restrack_root; -- cgit v1.2.3-55-g7522