From cb4876e8ce1c6d78306c206df1970748ebb89025 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 13 Nov 2017 23:29:05 +0200 Subject: nvmet-rdma: removed queue cleanup from module exit We already do that when we are notified in device removal which is triggered when unregistering as an ib client. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 0e4c15754c58..454a5dce81f8 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1503,25 +1503,9 @@ err_ib_client: static void __exit nvmet_rdma_exit(void) { - struct nvmet_rdma_queue *queue; - nvmet_unregister_transport(&nvmet_rdma_ops); - - flush_scheduled_work(); - - mutex_lock(&nvmet_rdma_queue_mutex); - while ((queue = list_first_entry_or_null(&nvmet_rdma_queue_list, - struct nvmet_rdma_queue, queue_list))) { - list_del_init(&queue->queue_list); - - mutex_unlock(&nvmet_rdma_queue_mutex); - __nvmet_rdma_queue_disconnect(queue); - mutex_lock(&nvmet_rdma_queue_mutex); - } - mutex_unlock(&nvmet_rdma_queue_mutex); - - flush_scheduled_work(); ib_unregister_client(&nvmet_rdma_ib_client); + WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list)); ida_destroy(&nvmet_rdma_queue_ida); } -- cgit v1.2.3-55-g7522 From 424125a09db7a207ab53876db50a6198ca88518f Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 4 Dec 2017 10:47:10 +0200 Subject: nvmet-rdma: lowering log level for chatty debug messages It is a bit chatty to report on every deleted queue, so keep it for debug purposes only. Signed-off-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 454a5dce81f8..978e169c11bf 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -921,7 +921,7 @@ static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue) { - pr_info("freeing queue %d\n", queue->idx); + pr_debug("freeing queue %d\n", queue->idx); nvmet_sq_destroy(&queue->nvme_sq); -- cgit v1.2.3-55-g7522 From 6a1c57acab85e2e7a18827b43710b4e16c11148d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 4 Dec 2017 10:47:09 +0200 Subject: nvmet: lower log level for each queue creation It is a bit chatty to report on each queue, log it only for debug purposes. Signed-off-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fabrics-cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index db3bf6b8bf9e..19e9e42ae943 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -225,7 +225,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) goto out_ctrl_put; } - pr_info("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); + pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); out: kfree(d); -- cgit v1.2.3-55-g7522 From 278e096063f1914fccfc77a617be9fc8dbb31b0e Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 29 Nov 2017 16:47:30 -0800 Subject: nvme_fcloop: fix abort race condition A test case revealed a race condition of an i/o completing on a thread parallel to the delete_association generating the aborts for the outstanding ios on the controller. The i/o completion was freeing the target fcloop context, thus the abort task referenced the just-freed memory. Correct by clearing the target/initiator cross pointers in the io completion and abort tasks before calling the callbacks. On aborts that detect already finished io's, ensure the complete context is called. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 7b75d9de55ab..3eb2a0733f46 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -370,6 +370,7 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work) spin_lock(&tfcp_req->reqlock); fcpreq = tfcp_req->fcpreq; + tfcp_req->fcpreq = NULL; spin_unlock(&tfcp_req->reqlock); if (tport->remoteport && fcpreq) { @@ -611,11 +612,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, if (!tfcp_req) /* abort has already been called */ - return; - - if (rport->targetport) - nvmet_fc_rcv_fcp_abort(rport->targetport, - &tfcp_req->tgt_fcp_req); + goto finish; /* break initiator/target relationship for io */ spin_lock(&tfcp_req->reqlock); @@ -623,6 +620,11 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, tfcp_req->fcpreq = NULL; spin_unlock(&tfcp_req->reqlock); + if (rport->targetport) + nvmet_fc_rcv_fcp_abort(rport->targetport, + &tfcp_req->tgt_fcp_req); + +finish: /* post the aborted io completion */ fcpreq->status = -ECANCELED; schedule_work(&inireq->iniwork); -- cgit v1.2.3-55-g7522 From 6fda20283e55b9d288cd56822ce39fc8e64f2208 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 29 Nov 2017 16:47:31 -0800 Subject: nvme_fcloop: disassocate local port structs The current fcloop driver gets its lport structure from the private area co-allocated with the fc_localport. All is fine except the teardown path, which wants to wait on the completion, which is marked complete by the delete_localport callback performed after unregister_localport. The issue is, the nvme_fc transport frees the localport structure immediately after delete_localport is called, meaning the original routine is trying to wait on a complete that was just freed. Change such that a lport struct is allocated coincident with the addition and registration of a localport. The private area of the localport now contains just a backpointer to the real lport struct. Now, the completion can be waited for, and after completing, the new structure can be kfree'd. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 3eb2a0733f46..c0080f6ab2f5 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -204,6 +204,10 @@ struct fcloop_lport { struct completion unreg_done; }; +struct fcloop_lport_priv { + struct fcloop_lport *lport; +}; + struct fcloop_rport { struct nvme_fc_remote_port *remoteport; struct nvmet_fc_target_port *targetport; @@ -659,7 +663,8 @@ fcloop_nport_get(struct fcloop_nport *nport) static void fcloop_localport_delete(struct nvme_fc_local_port *localport) { - struct fcloop_lport *lport = localport->private; + struct fcloop_lport_priv *lport_priv = localport->private; + struct fcloop_lport *lport = lport_priv->lport; /* release any threads waiting for the unreg to complete */ complete(&lport->unreg_done); @@ -699,7 +704,7 @@ static struct nvme_fc_port_template fctemplate = { .max_dif_sgl_segments = FCLOOP_SGL_SEGS, .dma_boundary = FCLOOP_DMABOUND_4G, /* sizes of additional private data for data structures */ - .local_priv_sz = sizeof(struct fcloop_lport), + .local_priv_sz = sizeof(struct fcloop_lport_priv), .remote_priv_sz = sizeof(struct fcloop_rport), .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), .fcprqst_priv_sz = sizeof(struct fcloop_ini_fcpreq), @@ -730,11 +735,17 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr, struct fcloop_ctrl_options *opts; struct nvme_fc_local_port *localport; struct fcloop_lport *lport; - int ret; + struct fcloop_lport_priv *lport_priv; + unsigned long flags; + int ret = -ENOMEM; + + lport = kzalloc(sizeof(*lport), GFP_KERNEL); + if (!lport) + return -ENOMEM; opts = kzalloc(sizeof(*opts), GFP_KERNEL); if (!opts) - return -ENOMEM; + goto out_free_lport; ret = fcloop_parse_options(opts, buf); if (ret) @@ -754,23 +765,25 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr, ret = nvme_fc_register_localport(&pinfo, &fctemplate, NULL, &localport); if (!ret) { - unsigned long flags; - /* success */ - lport = localport->private; + lport_priv = localport->private; + lport_priv->lport = lport; + lport->localport = localport; INIT_LIST_HEAD(&lport->lport_list); spin_lock_irqsave(&fcloop_lock, flags); list_add_tail(&lport->lport_list, &fcloop_lports); spin_unlock_irqrestore(&fcloop_lock, flags); - - /* mark all of the input buffer consumed */ - ret = count; } out_free_opts: kfree(opts); +out_free_lport: + /* free only if we're going to fail */ + if (ret) + kfree(lport); + return ret ? ret : count; } @@ -792,6 +805,8 @@ __wait_localport_unreg(struct fcloop_lport *lport) wait_for_completion(&lport->unreg_done); + kfree(lport); + return ret; } -- cgit v1.2.3-55-g7522 From 24431d60d3fbfd4c8c05e1828e5d9b35db4fd81c Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 29 Nov 2017 16:47:32 -0800 Subject: nvme_fcloop: rework to remove xxx_IN_ISR feature flags The existing fcloop driver expects the target side upcalls to the transport to context switch, thus the calls into the nvmet layer are not done in the calling context of the host/initiator down calls. The xxx_IN_ISR feature flags are used to select this logic. The xxx_IN_ISR feature flags should go away in the nvmet_fc transport as no other lldd utilizes them. Both Broadcom and Cavium lldds have their own non-ISR deferred handlers thus the nvmet calls can be made directly. This patch converts the paths that make the target upcalls (command receive, abort receive) such that they schedule a work item rather than expecting the transport to schedule the work item. The patch also cleans up the following: - The completion path from target to host scheduled a host work element called "work". Rename it "tio_done_work" for code clarity. - The abort io path called a iniwork item to call the host side io done. This is no longer needed as the abort routine can make the same call. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 98 ++++++++++++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 35 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c0080f6ab2f5..c5015199c031 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -249,14 +249,15 @@ struct fcloop_fcpreq { u16 status; bool active; bool aborted; - struct work_struct work; + struct work_struct fcp_rcv_work; + struct work_struct abort_rcv_work; + struct work_struct tio_done_work; struct nvmefc_tgt_fcp_req tgt_fcp_req; }; struct fcloop_ini_fcpreq { struct nvmefc_fcp_req *fcpreq; struct fcloop_fcpreq *tfcp_req; - struct work_struct iniwork; }; static inline struct fcloop_lsreq * @@ -347,17 +348,58 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, return 0; } -/* - * FCP IO operation done by initiator abort. - * call back up initiator "done" flows. - */ static void -fcloop_tgt_fcprqst_ini_done_work(struct work_struct *work) +fcloop_fcp_recv_work(struct work_struct *work) +{ + struct fcloop_fcpreq *tfcp_req = + container_of(work, struct fcloop_fcpreq, fcp_rcv_work); + struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct fcloop_ini_fcpreq *inireq = NULL; + int ret = 0; + + ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, + &tfcp_req->tgt_fcp_req, + fcpreq->cmdaddr, fcpreq->cmdlen); + if (ret) { + inireq = fcpreq->private; + inireq->tfcp_req = NULL; + + fcpreq->status = tfcp_req->status; + fcpreq->done(fcpreq); + } +} + +static void +fcloop_call_host_done(struct nvmefc_fcp_req *fcpreq, + struct fcloop_fcpreq *tfcp_req, int status) +{ + struct fcloop_ini_fcpreq *inireq = NULL; + + if (fcpreq) { + inireq = fcpreq->private; + inireq->tfcp_req = NULL; + + fcpreq->status = status; + fcpreq->done(fcpreq); + } +} + +static void +fcloop_fcp_abort_recv_work(struct work_struct *work) { - struct fcloop_ini_fcpreq *inireq = - container_of(work, struct fcloop_ini_fcpreq, iniwork); + struct fcloop_fcpreq *tfcp_req = + container_of(work, struct fcloop_fcpreq, abort_rcv_work); + struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + + if (tfcp_req->tport->targetport) + nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport, + &tfcp_req->tgt_fcp_req); + + spin_lock(&tfcp_req->reqlock); + tfcp_req->fcpreq = NULL; + spin_unlock(&tfcp_req->reqlock); - inireq->fcpreq->done(inireq->fcpreq); + fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED); } /* @@ -368,8 +410,7 @@ static void fcloop_tgt_fcprqst_done_work(struct work_struct *work) { struct fcloop_fcpreq *tfcp_req = - container_of(work, struct fcloop_fcpreq, work); - struct fcloop_tport *tport = tfcp_req->tport; + container_of(work, struct fcloop_fcpreq, tio_done_work); struct nvmefc_fcp_req *fcpreq; spin_lock(&tfcp_req->reqlock); @@ -377,10 +418,7 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work) tfcp_req->fcpreq = NULL; spin_unlock(&tfcp_req->reqlock); - if (tport->remoteport && fcpreq) { - fcpreq->status = tfcp_req->status; - fcpreq->done(fcpreq); - } + fcloop_call_host_done(fcpreq, tfcp_req, tfcp_req->status); kfree(tfcp_req); } @@ -395,7 +433,6 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, struct fcloop_rport *rport = remoteport->private; struct fcloop_ini_fcpreq *inireq = fcpreq->private; struct fcloop_fcpreq *tfcp_req; - int ret = 0; if (!rport->targetport) return -ECONNREFUSED; @@ -406,16 +443,16 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, inireq->fcpreq = fcpreq; inireq->tfcp_req = tfcp_req; - INIT_WORK(&inireq->iniwork, fcloop_tgt_fcprqst_ini_done_work); tfcp_req->fcpreq = fcpreq; tfcp_req->tport = rport->targetport->private; spin_lock_init(&tfcp_req->reqlock); - INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work); + INIT_WORK(&tfcp_req->fcp_rcv_work, fcloop_fcp_recv_work); + INIT_WORK(&tfcp_req->abort_rcv_work, fcloop_fcp_abort_recv_work); + INIT_WORK(&tfcp_req->tio_done_work, fcloop_tgt_fcprqst_done_work); - ret = nvmet_fc_rcv_fcp_req(rport->targetport, &tfcp_req->tgt_fcp_req, - fcpreq->cmdaddr, fcpreq->cmdlen); + schedule_work(&tfcp_req->fcp_rcv_work); - return ret; + return 0; } static void @@ -594,7 +631,7 @@ fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, { struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); - schedule_work(&tfcp_req->work); + schedule_work(&tfcp_req->tio_done_work); } static void @@ -610,13 +647,12 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, void *hw_queue_handle, struct nvmefc_fcp_req *fcpreq) { - struct fcloop_rport *rport = remoteport->private; struct fcloop_ini_fcpreq *inireq = fcpreq->private; struct fcloop_fcpreq *tfcp_req = inireq->tfcp_req; if (!tfcp_req) /* abort has already been called */ - goto finish; + return; /* break initiator/target relationship for io */ spin_lock(&tfcp_req->reqlock); @@ -624,14 +660,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, tfcp_req->fcpreq = NULL; spin_unlock(&tfcp_req->reqlock); - if (rport->targetport) - nvmet_fc_rcv_fcp_abort(rport->targetport, - &tfcp_req->tgt_fcp_req); - -finish: - /* post the aborted io completion */ - fcpreq->status = -ECANCELED; - schedule_work(&inireq->iniwork); + WARN_ON(!schedule_work(&tfcp_req->abort_rcv_work)); } static void @@ -721,8 +750,7 @@ static struct nvmet_fc_target_template tgttemplate = { .max_dif_sgl_segments = FCLOOP_SGL_SEGS, .dma_boundary = FCLOOP_DMABOUND_4G, /* optional features */ - .target_features = NVMET_FCTGTFEAT_CMD_IN_ISR | - NVMET_FCTGTFEAT_OPDONE_IN_ISR, + .target_features = 0, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct fcloop_tport), }; -- cgit v1.2.3-55-g7522 From b6f807738b5e3a24eda3ea6864abc18d10279e69 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 29 Nov 2017 16:47:33 -0800 Subject: nvme_fcloop: refactor host/target io job access The split between what the host accesses on its flows vs what the target side accesses was flawed. Abort handling didn't properly clear initiator vs target structure cross-reference and locks weren't used for synchronization. Thus, there were issues of freeing structures too soon and access after free. A couple of these existed pre the IN_ISR mods, but when the target upcalls were converted to work items, thus adding delays between the 2 sides of accesses, the problems became pronounced. Resolve by: - tracking io state mainly in the tgt-side io structure. - make the tgt-side io structure released by reference not by code flow. - when changing initiator structures, use locks for synchronization - aborts are clearly tracked for which side saw the abort, and after seeing the abort, cross-references are cleared under lock. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 147 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 125 insertions(+), 22 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c5015199c031..9f8a6726df91 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -242,13 +242,22 @@ struct fcloop_lsreq { int status; }; +enum { + INI_IO_START = 0, + INI_IO_ACTIVE = 1, + INI_IO_ABORTED = 2, + INI_IO_COMPLETED = 3, +}; + struct fcloop_fcpreq { struct fcloop_tport *tport; struct nvmefc_fcp_req *fcpreq; spinlock_t reqlock; u16 status; + u32 inistate; bool active; bool aborted; + struct kref ref; struct work_struct fcp_rcv_work; struct work_struct abort_rcv_work; struct work_struct tio_done_work; @@ -258,6 +267,7 @@ struct fcloop_fcpreq { struct fcloop_ini_fcpreq { struct nvmefc_fcp_req *fcpreq; struct fcloop_fcpreq *tfcp_req; + spinlock_t inilock; }; static inline struct fcloop_lsreq * @@ -349,24 +359,24 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, } static void -fcloop_fcp_recv_work(struct work_struct *work) +fcloop_tfcp_req_free(struct kref *ref) { struct fcloop_fcpreq *tfcp_req = - container_of(work, struct fcloop_fcpreq, fcp_rcv_work); - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; - struct fcloop_ini_fcpreq *inireq = NULL; - int ret = 0; + container_of(ref, struct fcloop_fcpreq, ref); - ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, - &tfcp_req->tgt_fcp_req, - fcpreq->cmdaddr, fcpreq->cmdlen); - if (ret) { - inireq = fcpreq->private; - inireq->tfcp_req = NULL; + kfree(tfcp_req); +} - fcpreq->status = tfcp_req->status; - fcpreq->done(fcpreq); - } +static void +fcloop_tfcp_req_put(struct fcloop_fcpreq *tfcp_req) +{ + kref_put(&tfcp_req->ref, fcloop_tfcp_req_free); +} + +static int +fcloop_tfcp_req_get(struct fcloop_fcpreq *tfcp_req) +{ + return kref_get_unless_zero(&tfcp_req->ref); } static void @@ -377,11 +387,52 @@ fcloop_call_host_done(struct nvmefc_fcp_req *fcpreq, if (fcpreq) { inireq = fcpreq->private; + spin_lock(&inireq->inilock); inireq->tfcp_req = NULL; + spin_unlock(&inireq->inilock); fcpreq->status = status; fcpreq->done(fcpreq); } + + /* release original io reference on tgt struct */ + fcloop_tfcp_req_put(tfcp_req); +} + +static void +fcloop_fcp_recv_work(struct work_struct *work) +{ + struct fcloop_fcpreq *tfcp_req = + container_of(work, struct fcloop_fcpreq, fcp_rcv_work); + struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + int ret = 0; + bool aborted = false; + + spin_lock(&tfcp_req->reqlock); + switch (tfcp_req->inistate) { + case INI_IO_START: + tfcp_req->inistate = INI_IO_ACTIVE; + break; + case INI_IO_ABORTED: + aborted = true; + break; + default: + spin_unlock(&tfcp_req->reqlock); + WARN_ON(1); + return; + } + spin_unlock(&tfcp_req->reqlock); + + if (unlikely(aborted)) + ret = -ECANCELED; + else + ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, + &tfcp_req->tgt_fcp_req, + fcpreq->cmdaddr, fcpreq->cmdlen); + if (ret) + fcloop_call_host_done(fcpreq, tfcp_req, ret); + + return; } static void @@ -389,7 +440,29 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) { struct fcloop_fcpreq *tfcp_req = container_of(work, struct fcloop_fcpreq, abort_rcv_work); - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; + bool completed = false; + + spin_lock(&tfcp_req->reqlock); + fcpreq = tfcp_req->fcpreq; + switch (tfcp_req->inistate) { + case INI_IO_ABORTED: + break; + case INI_IO_COMPLETED: + completed = true; + break; + default: + spin_unlock(&tfcp_req->reqlock); + WARN_ON(1); + return; + } + spin_unlock(&tfcp_req->reqlock); + + if (unlikely(completed)) { + /* remove reference taken in original abort downcall */ + fcloop_tfcp_req_put(tfcp_req); + return; + } if (tfcp_req->tport->targetport) nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport, @@ -400,6 +473,7 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) spin_unlock(&tfcp_req->reqlock); fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED); + /* call_host_done releases reference for abort downcall */ } /* @@ -415,12 +489,10 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work) spin_lock(&tfcp_req->reqlock); fcpreq = tfcp_req->fcpreq; - tfcp_req->fcpreq = NULL; + tfcp_req->inistate = INI_IO_COMPLETED; spin_unlock(&tfcp_req->reqlock); fcloop_call_host_done(fcpreq, tfcp_req, tfcp_req->status); - - kfree(tfcp_req); } @@ -443,12 +515,16 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, inireq->fcpreq = fcpreq; inireq->tfcp_req = tfcp_req; + spin_lock_init(&inireq->inilock); + tfcp_req->fcpreq = fcpreq; tfcp_req->tport = rport->targetport->private; + tfcp_req->inistate = INI_IO_START; spin_lock_init(&tfcp_req->reqlock); INIT_WORK(&tfcp_req->fcp_rcv_work, fcloop_fcp_recv_work); INIT_WORK(&tfcp_req->abort_rcv_work, fcloop_fcp_abort_recv_work); INIT_WORK(&tfcp_req->tio_done_work, fcloop_tgt_fcprqst_done_work); + kref_init(&tfcp_req->ref); schedule_work(&tfcp_req->fcp_rcv_work); @@ -648,7 +724,14 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, struct nvmefc_fcp_req *fcpreq) { struct fcloop_ini_fcpreq *inireq = fcpreq->private; - struct fcloop_fcpreq *tfcp_req = inireq->tfcp_req; + struct fcloop_fcpreq *tfcp_req; + bool abortio = true; + + spin_lock(&inireq->inilock); + tfcp_req = inireq->tfcp_req; + if (tfcp_req) + fcloop_tfcp_req_get(tfcp_req); + spin_unlock(&inireq->inilock); if (!tfcp_req) /* abort has already been called */ @@ -656,11 +739,31 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, /* break initiator/target relationship for io */ spin_lock(&tfcp_req->reqlock); - inireq->tfcp_req = NULL; - tfcp_req->fcpreq = NULL; + switch (tfcp_req->inistate) { + case INI_IO_START: + case INI_IO_ACTIVE: + tfcp_req->inistate = INI_IO_ABORTED; + break; + case INI_IO_COMPLETED: + abortio = false; + break; + default: + spin_unlock(&tfcp_req->reqlock); + WARN_ON(1); + return; + } spin_unlock(&tfcp_req->reqlock); - WARN_ON(!schedule_work(&tfcp_req->abort_rcv_work)); + if (abortio) + /* leave the reference while the work item is scheduled */ + WARN_ON(!schedule_work(&tfcp_req->abort_rcv_work)); + else { + /* + * as the io has already had the done callback made, + * nothing more to do. So release the reference taken above + */ + fcloop_tfcp_req_put(tfcp_req); + } } static void -- cgit v1.2.3-55-g7522 From 9ce1f2e12e017607fe17a67cea79ebcf0184e5b3 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 29 Nov 2017 15:11:55 -0800 Subject: nvmet-fc: cleanup nvmet add_port/remove_port The current fc transport add_port routine validates that there is a matching port to the target port config. It then takes a reference on the targetport. The del_port removes the reference. Unfortunately, if the LLDD undergoes a hw reset or driver unload and wants to unreg the targetport, due to the reference, the targetport effectively can't be removed. It requires the admin to remove the port from the nvmet config first, which calls the del_port. Note: it appears nvmetcli clear skips over the del_port call (I'm not attempting to change that). There's no real reason to take the reference. With FC, there is nothing to enable or disable as the presence of the FC targetport implicitly means its enabled, and removal of the targtport means its disabled. Change add_port to simply validate and change remove_port to a noop. No references are taken on the targetport. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 840d1a39de33..9b39a6cb1935 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2490,14 +2490,8 @@ nvmet_fc_add_port(struct nvmet_port *port) list_for_each_entry(tgtport, &nvmet_fc_target_list, tgt_list) { if ((tgtport->fc_target_port.node_name == traddr.nn) && (tgtport->fc_target_port.port_name == traddr.pn)) { - /* a FC port can only be 1 nvmet port id */ - if (!tgtport->port) { - tgtport->port = port; - port->priv = tgtport; - nvmet_fc_tgtport_get(tgtport); - ret = 0; - } else - ret = -EALREADY; + tgtport->port = port; + ret = 0; break; } } @@ -2508,19 +2502,7 @@ nvmet_fc_add_port(struct nvmet_port *port) static void nvmet_fc_remove_port(struct nvmet_port *port) { - struct nvmet_fc_tgtport *tgtport = port->priv; - unsigned long flags; - bool matched = false; - - spin_lock_irqsave(&nvmet_fc_tgtlock, flags); - if (tgtport->port == port) { - matched = true; - tgtport->port = NULL; - } - spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); - - if (matched) - nvmet_fc_tgtport_put(tgtport); + /* nothing to do */ } static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { -- cgit v1.2.3-55-g7522 From 0de5cd367c6aa2a31a1c931628f778f79f8ef22e Mon Sep 17 00:00:00 2001 From: Roy Shterman Date: Mon, 25 Dec 2017 14:18:30 +0200 Subject: nvme-fabrics: protect against module unload during create_ctrl NVMe transport driver module unload may (and usually does) trigger iteration over the active controllers and delete them all (sometimes under a mutex). However, a controller can be created concurrently with module unload which can lead to leakage of resources (most important char device node leakage) in case the controller creation occured after the unload delete and drain sequence. To protect against this, we take a module reference to guarantee that the nvme transport driver is not unloaded while creating a controller. Signed-off-by: Roy Shterman Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 17 +++++++++++++---- drivers/nvme/host/fabrics.h | 2 ++ drivers/nvme/host/fc.c | 1 + drivers/nvme/host/rdma.c | 1 + drivers/nvme/target/loop.c | 1 + 5 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 76b4fe6816a0..2f68befd31bf 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -492,7 +492,7 @@ EXPORT_SYMBOL_GPL(nvmf_should_reconnect); */ int nvmf_register_transport(struct nvmf_transport_ops *ops) { - if (!ops->create_ctrl) + if (!ops->create_ctrl || !ops->module) return -EINVAL; down_write(&nvmf_transports_rwsem); @@ -868,32 +868,41 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) goto out_unlock; } + if (!try_module_get(ops->module)) { + ret = -EBUSY; + goto out_unlock; + } + ret = nvmf_check_required_opts(opts, ops->required_opts); if (ret) - goto out_unlock; + goto out_module_put; ret = nvmf_check_allowed_opts(opts, NVMF_ALLOWED_OPTS | ops->allowed_opts | ops->required_opts); if (ret) - goto out_unlock; + goto out_module_put; ctrl = ops->create_ctrl(dev, opts); if (IS_ERR(ctrl)) { ret = PTR_ERR(ctrl); - goto out_unlock; + goto out_module_put; } if (strcmp(ctrl->subsys->subnqn, opts->subsysnqn)) { dev_warn(ctrl->device, "controller returned incorrect NQN: \"%s\".\n", ctrl->subsys->subnqn); + module_put(ops->module); up_read(&nvmf_transports_rwsem); nvme_delete_ctrl_sync(ctrl); return ERR_PTR(-EINVAL); } + module_put(ops->module); up_read(&nvmf_transports_rwsem); return ctrl; +out_module_put: + module_put(ops->module); out_unlock: up_read(&nvmf_transports_rwsem); out_free_opts: diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 9ba614953607..25b19f722f5b 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -108,6 +108,7 @@ struct nvmf_ctrl_options { * fabric implementation of NVMe fabrics. * @entry: Used by the fabrics library to add the new * registration entry to its linked-list internal tree. + * @module: Transport module reference * @name: Name of the NVMe fabric driver implementation. * @required_opts: sysfs command-line options that must be specified * when adding a new NVMe controller. @@ -126,6 +127,7 @@ struct nvmf_ctrl_options { */ struct nvmf_transport_ops { struct list_head entry; + struct module *module; const char *name; int required_opts; int allowed_opts; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0a8af4daef89..2a7a9a75105d 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3381,6 +3381,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) static struct nvmf_transport_ops nvme_fc_transport = { .name = "fc", + .module = THIS_MODULE, .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO, .create_ctrl = nvme_fc_create_ctrl, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 37af56596be6..75d6956eb380 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2006,6 +2006,7 @@ out_free_ctrl: static struct nvmf_transport_ops nvme_rdma_transport = { .name = "rdma", + .module = THIS_MODULE, .required_opts = NVMF_OPT_TRADDR, .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO, diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 1e21b286f299..fdfcc961029f 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -686,6 +686,7 @@ static struct nvmet_fabrics_ops nvme_loop_ops = { static struct nvmf_transport_ops nvme_loop_transport = { .name = "loop", + .module = THIS_MODULE, .create_ctrl = nvme_loop_create_ctrl, }; -- cgit v1.2.3-55-g7522 From 6fbcde6691b514faa963c60f5537332530f1bf0a Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Tue, 5 Dec 2017 05:23:54 +0900 Subject: nvme-pci: remove an unnecessary initialization in HMB code The local variable __size__ will be set a bit later in a for-loop. Remove the explicit initialization at the beginning of this function. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f5800c3c9082..35331fa0013c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1770,7 +1770,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, dma_addr_t descs_dma; int i = 0; void **bufs; - u64 size = 0, tmp; + u64 size, tmp; tmp = (preferred + chunk_size - 1); do_div(tmp, chunk_size); @@ -1853,7 +1853,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) u64 preferred = (u64)dev->ctrl.hmpre * 4096; u64 min = (u64)dev->ctrl.hmmin * 4096; u32 enable_bits = NVME_HOST_MEM_ENABLE; - int ret = 0; + int ret; preferred = min(preferred, max); if (min > max) { -- cgit v1.2.3-55-g7522 From eca19dc1d84d924544dda0c8d2fd4bb4131affeb Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Mon, 13 Nov 2017 12:29:40 +0000 Subject: nvmet: fix error flow in nvmet_alloc_ctrl() Remove the allocated id on error. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b54748ad5f48..07eb45d32a7a 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -830,7 +830,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, /* Don't accept keep-alive timeout for discovery controllers */ if (kato) { status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; - goto out_free_sqs; + goto out_remove_ida; } /* @@ -860,6 +860,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, *ctrlp = ctrl; return 0; +out_remove_ida: + ida_simple_remove(&cntlid_ida, ctrl->cntlid); out_free_sqs: kfree(ctrl->sqs); out_free_cqs: -- cgit v1.2.3-55-g7522 From 6b1943af3f4329c814ec7a651121746d08e6c9ee Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Mon, 13 Nov 2017 12:29:41 +0000 Subject: nvmet: rearrange nvmet_ctrl_free() Make it symmetric to nvmet_alloc_ctrl(). Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 07eb45d32a7a..7282ea8d3b96 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -879,21 +879,22 @@ static void nvmet_ctrl_free(struct kref *ref) struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); struct nvmet_subsys *subsys = ctrl->subsys; - nvmet_stop_keep_alive_timer(ctrl); - mutex_lock(&subsys->lock); list_del(&ctrl->subsys_entry); mutex_unlock(&subsys->lock); + nvmet_stop_keep_alive_timer(ctrl); + flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fatal_err_work); ida_simple_remove(&cntlid_ida, ctrl->cntlid); - nvmet_subsys_put(subsys); kfree(ctrl->sqs); kfree(ctrl->cqs); kfree(ctrl); + + nvmet_subsys_put(subsys); } void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) -- cgit v1.2.3-55-g7522 From 4caff8fc19f10ffb06f095a9cf5a9e755377112e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 31 Dec 2017 14:01:19 +0200 Subject: nvme-pci: don't open-code nvme_reset_ctrl Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 35331fa0013c..add7b18d825d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2498,10 +2498,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto release_pools; - nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING); dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); - queue_work(nvme_wq, &dev->ctrl.reset_work); + nvme_reset_ctrl(&dev->ctrl); + return 0; release_pools: -- cgit v1.2.3-55-g7522 From 1a3838d732eaae47385490de88d978d4132d3d84 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 31 Dec 2017 15:33:27 +0200 Subject: nvme: modify the debug level for setting shutdown timeout When an NVMe controller reports RTD3 Entry Latency larger than the value of shutdown_timeout module parameter, we update the shutdown_timeout accordingly to honor RTD3 Entry Latency. Use an informational debug level instead of a warning level for it. Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f837d666cbd4..2a69d735efbc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2252,7 +2252,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) shutdown_timeout, 60); if (ctrl->shutdown_timeout != shutdown_timeout) - dev_warn(ctrl->device, + dev_info(ctrl->device, "Shutdown timeout set to %u seconds\n", ctrl->shutdown_timeout); } else -- cgit v1.2.3-55-g7522 From 2b1b7e784a63f5ded4dda804e05e3f34b3880b25 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Sat, 6 Jan 2018 08:01:58 +0800 Subject: nvme-pci: fix NULL pointer reference in nvme_alloc_ns When the io queues setup or tagset allocation failed, ctrl.tagset is NULL. But the scan work will still be queued and executed, then panic comes up due to NULL pointer reference of ctrl.tagset. To fix this, add a new ctrl state NVME_CTRL_ADMIN_ONLY to inidcate only admin queue is live. When non io queues or tagset allocation failed, ctrl enters into this state, scan work will not be started. But async event work and nvme dev ioctl will be still available. This will be helpful to do further investigation and recovery. Suggested-by: Sagi Grimberg Signed-off-by: Jianchao Wang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 25 ++++++++++++++++++++++--- drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/pci.c | 30 +++++++++++++++++++++--------- 3 files changed, 44 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2a69d735efbc..609307ca9e4d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -232,6 +232,15 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, old_state = ctrl->state; switch (new_state) { + case NVME_CTRL_ADMIN_ONLY: + switch (old_state) { + case NVME_CTRL_RESETTING: + changed = true; + /* FALLTHRU */ + default: + break; + } + break; case NVME_CTRL_LIVE: switch (old_state) { case NVME_CTRL_NEW: @@ -247,6 +256,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, switch (old_state) { case NVME_CTRL_NEW: case NVME_CTRL_LIVE: + case NVME_CTRL_ADMIN_ONLY: changed = true; /* FALLTHRU */ default: @@ -266,6 +276,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, case NVME_CTRL_DELETING: switch (old_state) { case NVME_CTRL_LIVE: + case NVME_CTRL_ADMIN_ONLY: case NVME_CTRL_RESETTING: case NVME_CTRL_RECONNECTING: changed = true; @@ -2336,8 +2347,14 @@ static int nvme_dev_open(struct inode *inode, struct file *file) struct nvme_ctrl *ctrl = container_of(inode->i_cdev, struct nvme_ctrl, cdev); - if (ctrl->state != NVME_CTRL_LIVE) + switch (ctrl->state) { + case NVME_CTRL_LIVE: + case NVME_CTRL_ADMIN_ONLY: + break; + default: return -EWOULDBLOCK; + } + file->private_data = ctrl; return 0; } @@ -2601,6 +2618,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, static const char *const state_name[] = { [NVME_CTRL_NEW] = "new", [NVME_CTRL_LIVE] = "live", + [NVME_CTRL_ADMIN_ONLY] = "only-admin", [NVME_CTRL_RESETTING] = "resetting", [NVME_CTRL_RECONNECTING]= "reconnecting", [NVME_CTRL_DELETING] = "deleting", @@ -3073,6 +3091,8 @@ static void nvme_scan_work(struct work_struct *work) if (ctrl->state != NVME_CTRL_LIVE) return; + WARN_ON_ONCE(!ctrl->tagset); + if (nvme_identify_ctrl(ctrl, &id)) return; @@ -3093,8 +3113,7 @@ static void nvme_scan_work(struct work_struct *work) void nvme_queue_scan(struct nvme_ctrl *ctrl) { /* - * Do not queue new scan work when a controller is reset during - * removal. + * Only new queue scan work when admin and IO queues are both alive */ if (ctrl->state == NVME_CTRL_LIVE) queue_work(nvme_wq, &ctrl->scan_work); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ea1aa5283e8e..eecf71ce6e75 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -119,6 +119,7 @@ static inline struct nvme_request *nvme_req(struct request *req) enum nvme_ctrl_state { NVME_CTRL_NEW, NVME_CTRL_LIVE, + NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */ NVME_CTRL_RESETTING, NVME_CTRL_RECONNECTING, NVME_CTRL_DELETING, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index add7b18d825d..62119078c2bf 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2035,13 +2035,12 @@ static void nvme_disable_io_queues(struct nvme_dev *dev, int queues) } /* - * Return: error value if an error occurred setting up the queues or calling - * Identify Device. 0 if these succeeded, even if adding some of the - * namespaces failed. At the moment, these failures are silent. TBD which - * failures should be reported. + * return error value only when tagset allocation failed */ static int nvme_dev_add(struct nvme_dev *dev) { + int ret; + if (!dev->ctrl.tagset) { dev->tagset.ops = &nvme_mq_ops; dev->tagset.nr_hw_queues = dev->online_queues - 1; @@ -2057,8 +2056,12 @@ static int nvme_dev_add(struct nvme_dev *dev) dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; dev->tagset.driver_data = dev; - if (blk_mq_alloc_tag_set(&dev->tagset)) - return 0; + ret = blk_mq_alloc_tag_set(&dev->tagset); + if (ret) { + dev_warn(dev->ctrl.device, + "IO queues tagset allocation failed %d\n", ret); + return ret; + } dev->ctrl.tagset = &dev->tagset; nvme_dbbuf_set(dev); @@ -2291,6 +2294,7 @@ static void nvme_reset_work(struct work_struct *work) container_of(work, struct nvme_dev, ctrl.reset_work); bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result = -ENODEV; + enum nvme_ctrl_state new_state = NVME_CTRL_LIVE; if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) goto out; @@ -2354,15 +2358,23 @@ static void nvme_reset_work(struct work_struct *work) dev_warn(dev->ctrl.device, "IO queues not created\n"); nvme_kill_queues(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl); + new_state = NVME_CTRL_ADMIN_ONLY; } else { nvme_start_queues(&dev->ctrl); nvme_wait_freeze(&dev->ctrl); - nvme_dev_add(dev); + /* hit this only when allocate tagset fails */ + if (nvme_dev_add(dev)) + new_state = NVME_CTRL_ADMIN_ONLY; nvme_unfreeze(&dev->ctrl); } - if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { - dev_warn(dev->ctrl.device, "failed to mark controller live\n"); + /* + * If only admin queue live, keep it to do further investigation or + * recovery. + */ + if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) { + dev_warn(dev->ctrl.device, + "failed to mark controller state %d\n", new_state); goto out; } -- cgit v1.2.3-55-g7522 From 85088c4a0f65f0be25a98164ec6bca02ac5cad04 Mon Sep 17 00:00:00 2001 From: Nitzan Carmi Date: Thu, 4 Jan 2018 17:56:13 +0200 Subject: nvme: take refcount on transport module The block device is backed by the transport so we must ensure that the transport driver will not be removed until all references are released. Otherwise, we might end up referencing freed memory. Reviewed-by: Max Gurtovoy Signed-off-by: Nitzan Carmi Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 609307ca9e4d..c8bcfe64e976 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1228,16 +1228,27 @@ static int nvme_open(struct block_device *bdev, fmode_t mode) #ifdef CONFIG_NVME_MULTIPATH /* should never be called due to GENHD_FL_HIDDEN */ if (WARN_ON_ONCE(ns->head->disk)) - return -ENXIO; + goto fail; #endif if (!kref_get_unless_zero(&ns->kref)) - return -ENXIO; + goto fail; + if (!try_module_get(ns->ctrl->ops->module)) + goto fail_put_ns; + return 0; + +fail_put_ns: + nvme_put_ns(ns); +fail: + return -ENXIO; } static void nvme_release(struct gendisk *disk, fmode_t mode) { - nvme_put_ns(disk->private_data); + struct nvme_ns *ns = disk->private_data; + + module_put(ns->ctrl->ops->module); + nvme_put_ns(ns); } static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) -- cgit v1.2.3-55-g7522 From b837b28394fb76993c28bb242db7061ee0417da6 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 4 Jan 2018 17:56:14 +0200 Subject: nvme: fix subsystem multiple controllers support check There is a problem when another module (e.g. nvmet) takes a reference on the nvme block device and the physical nvme drive is removed. In that case nvme_free_ctrl() will not be called and the controller state will be "deleting" or "dead" unless nvmet module releases the block device. Later on, the same nvme drive probes back and nvme_init_subsystem() will be called and fail due to duplicate subnqn (if the nvme device doesn't support subsystem with multiple controllers). This will cause a probe failure. This commit changes the check of multiple controllers support at nvme_init_subsystem() by not counting all the controllers at "dead" or "deleting" state (this is safe because controllers at this state will never be active again). Fixes: ab9e00cc72fa ("nvme: track subsystems") Reviewed-by: Max Gurtovoy Signed-off-by: Israel Rukshin Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c8bcfe64e976..2bcd49584f71 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2069,6 +2069,22 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = { NULL, }; +static int nvme_active_ctrls(struct nvme_subsystem *subsys) +{ + int count = 0; + struct nvme_ctrl *ctrl; + + mutex_lock(&subsys->lock); + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { + if (ctrl->state != NVME_CTRL_DELETING && + ctrl->state != NVME_CTRL_DEAD) + count++; + } + mutex_unlock(&subsys->lock); + + return count; +} + static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { struct nvme_subsystem *subsys, *found; @@ -2107,7 +2123,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) * Verify that the subsystem actually supports multiple * controllers, else bail out. */ - if (!(id->cmic & (1 << 1))) { + if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { dev_err(ctrl->device, "ignoring ctrl due to duplicate subnqn (%s).\n", found->subnqn); -- cgit v1.2.3-55-g7522