summaryrefslogtreecommitdiffstats
path: root/drivers/target/target_core_transport.c
diff options
context:
space:
mode:
authorNicholas Bellinger2017-02-23 07:06:32 +0100
committerNicholas Bellinger2017-02-27 01:08:44 +0100
commitbd4e2d2907fa23a11d46217064ecf80470ddae10 (patch)
treef03b7ab77fe4378f15fe011ec5cb8258aea5be38 /drivers/target/target_core_transport.c
parenttarget: Delete tmr from list before processing (diff)
downloadkernel-qcow2-linux-bd4e2d2907fa23a11d46217064ecf80470ddae10.tar.gz
kernel-qcow2-linux-bd4e2d2907fa23a11d46217064ecf80470ddae10.tar.xz
kernel-qcow2-linux-bd4e2d2907fa23a11d46217064ecf80470ddae10.zip
target: Fix NULL dereference during LUN lookup + active I/O shutdown
When transport_clear_lun_ref() is shutting down a se_lun via configfs with new I/O in-flight, it's possible to trigger a NULL pointer dereference in transport_lookup_cmd_lun() due to the fact percpu_ref_get() doesn't do any __PERCPU_REF_DEAD checking before incrementing lun->lun_ref.count after lun->lun_ref has switched to atomic_t mode. This results in a NULL pointer dereference as LUN shutdown code in core_tpg_remove_lun() continues running after the existing ->release() -> core_tpg_lun_ref_release() callback completes, and clears the RCU protected se_lun->lun_se_dev pointer. During the OOPs, the state of lun->lun_ref in the process which triggered the NULL pointer dereference looks like the following on v4.1.y stable code: struct se_lun { lun_link_magic = 4294932337, lun_status = TRANSPORT_LUN_STATUS_FREE, ..... lun_se_dev = 0x0, lun_sep = 0x0, ..... lun_ref = { count = { counter = 1 }, percpu_count_ptr = 3, release = 0xffffffffa02fa1e0 <core_tpg_lun_ref_release>, confirm_switch = 0x0, force_atomic = false, rcu = { next = 0xffff88154fa1a5d0, func = 0xffffffff8137c4c0 <percpu_ref_switch_to_atomic_rcu> } } } To address this bug, use percpu_ref_tryget_live() to ensure once __PERCPU_REF_DEAD is visable on all CPUs and ->lun_ref has switched to atomic_t, all new I/Os will fail to obtain a new lun->lun_ref reference. Also use an explicit percpu_ref_kill_and_confirm() callback to block on ->lun_ref_comp to allow the first stage and associated RCU grace period to complete, and then block on ->lun_ref_shutdown waiting for the final percpu_ref_put() to drop the last reference via transport_lun_remove_cmd() before continuing with core_tpg_remove_lun() shutdown. Reported-by: Rob Millner <rlm@daterainc.com> Tested-by: Rob Millner <rlm@daterainc.com> Cc: Rob Millner <rlm@daterainc.com> Tested-by: Vaibhav Tandon <vst@datera.io> Cc: Vaibhav Tandon <vst@datera.io> Tested-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com> Cc: <stable@vger.kernel.org> # v3.14+ Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Diffstat (limited to 'drivers/target/target_core_transport.c')
-rw-r--r--drivers/target/target_core_transport.c31
1 files changed, 30 insertions, 1 deletions
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index efb9e6f38201..434d9d693989 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2700,10 +2700,39 @@ void target_wait_for_sess_cmds(struct se_session *se_sess)
}
EXPORT_SYMBOL(target_wait_for_sess_cmds);
+static void target_lun_confirm(struct percpu_ref *ref)
+{
+ struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
+
+ complete(&lun->lun_ref_comp);
+}
+
void transport_clear_lun_ref(struct se_lun *lun)
{
- percpu_ref_kill(&lun->lun_ref);
+ /*
+ * Mark the percpu-ref as DEAD, switch to atomic_t mode, drop
+ * the initial reference and schedule confirm kill to be
+ * executed after one full RCU grace period has completed.
+ */
+ percpu_ref_kill_and_confirm(&lun->lun_ref, target_lun_confirm);
+ /*
+ * The first completion waits for percpu_ref_switch_to_atomic_rcu()
+ * to call target_lun_confirm after lun->lun_ref has been marked
+ * as __PERCPU_REF_DEAD on all CPUs, and switches to atomic_t
+ * mode so that percpu_ref_tryget_live() lookup of lun->lun_ref
+ * fails for all new incoming I/O.
+ */
wait_for_completion(&lun->lun_ref_comp);
+ /*
+ * The second completion waits for percpu_ref_put_many() to
+ * invoke ->release() after lun->lun_ref has switched to
+ * atomic_t mode, and lun->lun_ref.count has reached zero.
+ *
+ * At this point all target-core lun->lun_ref references have
+ * been dropped via transport_lun_remove_cmd(), and it's safe
+ * to proceed with the remaining LUN shutdown.
+ */
+ wait_for_completion(&lun->lun_shutdown_comp);
}
static bool