summaryrefslogtreecommitdiffstats
path: root/net/sunrpc/xprtrdma/verbs.c
diff options
context:
space:
mode:
authorChuck Lever2017-04-11 19:23:10 +0200
committerAnna Schumaker2017-04-25 22:12:24 +0200
commitbebd031866caa404c522e91bb6fd0c69be04c707 (patch)
tree5046d400f405f383e1823d140e18c3f878002cec /net/sunrpc/xprtrdma/verbs.c
parentxprtrdma: Use same device when mapping or syncing DMA buffers (diff)
downloadkernel-qcow2-linux-bebd031866caa404c522e91bb6fd0c69be04c707.tar.gz
kernel-qcow2-linux-bebd031866caa404c522e91bb6fd0c69be04c707.tar.xz
kernel-qcow2-linux-bebd031866caa404c522e91bb6fd0c69be04c707.zip
xprtrdma: Support unplugging an HCA from under an NFS mount
The device driver for the underlying physical device associated with an RPC-over-RDMA transport can be removed while RPC-over-RDMA transports are still in use (ie, while NFS filesystems are still mounted and active). The IB core performs a connection event upcall to request that consumers free all RDMA resources associated with a transport. There may be pending RPCs when this occurs. Care must be taken to release associated resources without leaving references that can trigger a subsequent crash if a signal or soft timeout occurs. We rely on the caller of the transport's ->close method to ensure that the previous RPC task has invoked xprt_release but the transport remains write-locked. A DEVICE_REMOVE upcall forces a disconnect then sleeps. When ->close is invoked, it destroys the transport's H/W resources, then wakes the upcall, which completes and allows the core driver unload to continue. BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=266 Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'net/sunrpc/xprtrdma/verbs.c')
-rw-r--r--net/sunrpc/xprtrdma/verbs.c74
1 files changed, 70 insertions, 4 deletions
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index c8813fb2163f..938fd9e6f308 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -69,6 +69,8 @@
/*
* internal functions
*/
+static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
+static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
static struct workqueue_struct *rpcrdma_receive_wq;
@@ -262,6 +264,21 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
__func__, ep);
complete(&ia->ri_done);
break;
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+ pr_info("rpcrdma: removing device for %pIS:%u\n",
+ sap, rpc_get_port(sap));
+#endif
+ set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
+ ep->rep_connected = -ENODEV;
+ xprt_force_disconnect(&xprt->rx_xprt);
+ wait_for_completion(&ia->ri_remove_done);
+
+ ia->ri_id = NULL;
+ ia->ri_pd = NULL;
+ ia->ri_device = NULL;
+ /* Return 1 to ensure the core destroys the id. */
+ return 1;
case RDMA_CM_EVENT_ESTABLISHED:
connstate = 1;
ib_query_qp(ia->ri_id->qp, attr,
@@ -291,9 +308,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
goto connected;
case RDMA_CM_EVENT_DISCONNECTED:
connstate = -ECONNABORTED;
- goto connected;
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- connstate = -ENODEV;
connected:
dprintk("RPC: %s: %sconnected\n",
__func__, connstate > 0 ? "" : "dis");
@@ -346,6 +360,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
int rc;
init_completion(&ia->ri_done);
+ init_completion(&ia->ri_remove_done);
id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
IB_QPT_RC);
@@ -469,6 +484,56 @@ out_err:
}
/**
+ * rpcrdma_ia_remove - Handle device driver unload
+ * @ia: interface adapter being removed
+ *
+ * Divest transport H/W resources associated with this adapter,
+ * but allow it to be restored later.
+ */
+void
+rpcrdma_ia_remove(struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
+ rx_ia);
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpcrdma_req *req;
+ struct rpcrdma_rep *rep;
+
+ cancel_delayed_work_sync(&buf->rb_refresh_worker);
+
+ /* This is similar to rpcrdma_ep_destroy, but:
+ * - Don't cancel the connect worker.
+ * - Don't call rpcrdma_ep_disconnect, which waits
+ * for another conn upcall, which will deadlock.
+ * - rdma_disconnect is unneeded, the underlying
+ * connection is already gone.
+ */
+ if (ia->ri_id->qp) {
+ ib_drain_qp(ia->ri_id->qp);
+ rdma_destroy_qp(ia->ri_id);
+ ia->ri_id->qp = NULL;
+ }
+ ib_free_cq(ep->rep_attr.recv_cq);
+ ib_free_cq(ep->rep_attr.send_cq);
+
+ /* The ULP is responsible for ensuring all DMA
+ * mappings and MRs are gone.
+ */
+ list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
+ rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf);
+ list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
+ rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf);
+ rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
+ rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
+ }
+ rpcrdma_destroy_mrs(buf);
+
+ /* Allow waiters to continue */
+ complete(&ia->ri_remove_done);
+}
+
+/**
* rpcrdma_ia_close - Clean up/close an IA.
* @ia: interface adapter to close
*
@@ -1080,7 +1145,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
out_nomws:
dprintk("RPC: %s: no MWs available\n", __func__);
- schedule_delayed_work(&buf->rb_refresh_worker, 0);
+ if (r_xprt->rx_ep.rep_connected != -ENODEV)
+ schedule_delayed_work(&buf->rb_refresh_worker, 0);
/* Allow the reply handler and refresh worker to run */
cond_resched();