summaryrefslogtreecommitdiffstats
path: root/drivers/scsi/bnx2i/bnx2i_hwi.c
diff options
context:
space:
mode:
authorEddie Wai2011-06-24 00:51:34 +0200
committerJames Bottomley2011-06-29 23:48:23 +0200
commitb5cf6b63f73abdc051035f0050b367beeb2ef94c (patch)
treee627d05b8cca847e3364e198b49b985efbaf14b2 /drivers/scsi/bnx2i/bnx2i_hwi.c
parent[SCSI] libiscsi_tcp: fix LLD data allocation (diff)
downloadkernel-qcow2-linux-b5cf6b63f73abdc051035f0050b367beeb2ef94c.tar.gz
kernel-qcow2-linux-b5cf6b63f73abdc051035f0050b367beeb2ef94c.tar.xz
kernel-qcow2-linux-b5cf6b63f73abdc051035f0050b367beeb2ef94c.zip
[SCSI] bnx2i: Added the use of kthreads to handle SCSI cmd completion
This patch breaks the SCSI cmd completion into two parts: 1. The bh will allocate and queued work to the cmd specific CPU IO completion kthread. The CPU for the cmd is from the sc->request->cpu. 2. The CPU specific IO completion kthread will call the scsi_cmd_resp routine to do the actual cmd completion. In the normal case, these IO completion kthreads should complete before the blk IO times out at 60s. However, in the case when these kthreads are blocked for whatever reason and exceeded the timeout, the call to conn_destroy will have to iterate and exhaust all related work in the percpu work list for all online CPUs. This will guarantee the protection of the work->session and conn pointers before they get freed. Also modified the event coalescing formula to have at least the event_coal_min outstanding cmds in the pipeline so the SCSI producer would not get underrun. Also changed the following SCSI parameters: - can_queue from 1024 to 2048 - cmds_per_lun from 24 to 128 Signed-off-by: Eddie Wai <eddie.wai@broadcom.com> Acked-by: Benjamin Li <benli@broadcom.com> Acked-by: Michael Chan <mchan@broadcom.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
Diffstat (limited to 'drivers/scsi/bnx2i/bnx2i_hwi.c')
-rw-r--r--drivers/scsi/bnx2i/bnx2i_hwi.c183
1 files changed, 156 insertions, 27 deletions
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index 550e6c4ea8b4..a501a72a243d 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -17,6 +17,8 @@
#include <scsi/libiscsi.h>
#include "bnx2i.h"
+DECLARE_PER_CPU(struct bnx2i_percpu_s, bnx2i_percpu);
+
/**
* bnx2i_get_cid_num - get cid from ep
* @ep: endpoint pointer
@@ -131,16 +133,16 @@ static void bnx2i_iscsi_license_error(struct bnx2i_hba *hba, u32 error_code)
* the driver. EQ event is generated CQ index is hit or at least 1 CQ is
* outstanding and on chip timer expires
*/
-void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action)
+int bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action)
{
struct bnx2i_5771x_cq_db *cq_db;
u16 cq_index;
- u16 next_index;
+ u16 next_index = 0;
u32 num_active_cmds;
/* Coalesce CQ entries only on 10G devices */
if (!test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type))
- return;
+ return 0;
/* Do not update CQ DB multiple times before firmware writes
* '0xFFFF' to CQDB->SQN field. Deviation may cause spurious
@@ -150,16 +152,17 @@ void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action)
if (action != CNIC_ARM_CQE_FP)
if (cq_db->sqn[0] && cq_db->sqn[0] != 0xFFFF)
- return;
+ return 0;
if (action == CNIC_ARM_CQE || action == CNIC_ARM_CQE_FP) {
- num_active_cmds = ep->num_active_cmds;
+ num_active_cmds = atomic_read(&ep->num_active_cmds);
if (num_active_cmds <= event_coal_min)
next_index = 1;
- else
- next_index = event_coal_min +
- ((num_active_cmds - event_coal_min) >>
- ep->ec_shift);
+ else {
+ next_index = num_active_cmds >> ep->ec_shift;
+ if (next_index > num_active_cmds - event_coal_min)
+ next_index = num_active_cmds - event_coal_min;
+ }
if (!next_index)
next_index = 1;
cq_index = ep->qp.cqe_exp_seq_sn + next_index - 1;
@@ -170,6 +173,7 @@ void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action)
cq_db->sqn[0] = cq_index;
}
+ return next_index;
}
@@ -265,7 +269,7 @@ static void bnx2i_ring_sq_dbell(struct bnx2i_conn *bnx2i_conn, int count)
struct bnx2i_5771x_sq_rq_db *sq_db;
struct bnx2i_endpoint *ep = bnx2i_conn->ep;
- ep->num_active_cmds++;
+ atomic_inc(&ep->num_active_cmds);
wmb(); /* flush SQ WQE memory before the doorbell is rung */
if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type)) {
sq_db = (struct bnx2i_5771x_sq_rq_db *) ep->qp.sq_pgtbl_virt;
@@ -1331,14 +1335,15 @@ int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba)
/**
* bnx2i_process_scsi_cmd_resp - this function handles scsi cmd completion.
- * @conn: iscsi connection
+ * @session: iscsi session
+ * @bnx2i_conn: bnx2i connection
* @cqe: pointer to newly DMA'ed CQE entry for processing
*
* process SCSI CMD Response CQE & complete the request to SCSI-ML
*/
-static int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session,
- struct bnx2i_conn *bnx2i_conn,
- struct cqe *cqe)
+int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session,
+ struct bnx2i_conn *bnx2i_conn,
+ struct cqe *cqe)
{
struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
struct bnx2i_cmd_response *resp_cqe;
@@ -1348,7 +1353,7 @@ static int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session,
u32 datalen = 0;
resp_cqe = (struct bnx2i_cmd_response *)cqe;
- spin_lock(&session->lock);
+ spin_lock_bh(&session->lock);
task = iscsi_itt_to_task(conn,
resp_cqe->itt & ISCSI_CMD_RESPONSE_INDEX);
if (!task)
@@ -1409,7 +1414,7 @@ done:
__iscsi_complete_pdu(conn, (struct iscsi_hdr *)hdr,
conn->data, datalen);
fail:
- spin_unlock(&session->lock);
+ spin_unlock_bh(&session->lock);
return 0;
}
@@ -1836,21 +1841,130 @@ static void bnx2i_process_cmd_cleanup_resp(struct iscsi_session *session,
}
+/**
+ * bnx2i_percpu_io_thread - thread per cpu for ios
+ *
+ * @arg: ptr to bnx2i_percpu_info structure
+ */
+int bnx2i_percpu_io_thread(void *arg)
+{
+ struct bnx2i_percpu_s *p = arg;
+ struct bnx2i_work *work, *tmp;
+ LIST_HEAD(work_list);
+
+ set_user_nice(current, -20);
+
+ while (!kthread_should_stop()) {
+ spin_lock_bh(&p->p_work_lock);
+ while (!list_empty(&p->work_list)) {
+ list_splice_init(&p->work_list, &work_list);
+ spin_unlock_bh(&p->p_work_lock);
+
+ list_for_each_entry_safe(work, tmp, &work_list, list) {
+ list_del_init(&work->list);
+ /* work allocated in the bh, freed here */
+ bnx2i_process_scsi_cmd_resp(work->session,
+ work->bnx2i_conn,
+ &work->cqe);
+ atomic_dec(&work->bnx2i_conn->work_cnt);
+ kfree(work);
+ }
+ spin_lock_bh(&p->p_work_lock);
+ }
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_unlock_bh(&p->p_work_lock);
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+
+ return 0;
+}
+
+
+/**
+ * bnx2i_queue_scsi_cmd_resp - queue cmd completion to the percpu thread
+ * @bnx2i_conn: bnx2i connection
+ *
+ * this function is called by generic KCQ handler to queue all pending cmd
+ * completion CQEs
+ *
+ * The implementation is to queue the cmd response based on the
+ * last recorded command for the given connection. The
+ * cpu_id gets recorded upon task_xmit. No out-of-order completion!
+ */
+static int bnx2i_queue_scsi_cmd_resp(struct iscsi_session *session,
+ struct bnx2i_conn *bnx2i_conn,
+ struct bnx2i_nop_in_msg *cqe)
+{
+ struct bnx2i_work *bnx2i_work = NULL;
+ struct bnx2i_percpu_s *p = NULL;
+ struct iscsi_task *task;
+ struct scsi_cmnd *sc;
+ int rc = 0;
+
+ spin_lock(&session->lock);
+ task = iscsi_itt_to_task(bnx2i_conn->cls_conn->dd_data,
+ cqe->itt & ISCSI_CMD_RESPONSE_INDEX);
+ if (!task) {
+ spin_unlock(&session->lock);
+ return -EINVAL;
+ }
+ sc = task->sc;
+ spin_unlock(&session->lock);
+
+ p = &per_cpu(bnx2i_percpu, sc->request->cpu);
+ spin_lock(&p->p_work_lock);
+ if (unlikely(!p->iothread)) {
+ rc = -EINVAL;
+ goto err;
+ }
+ /* Alloc and copy to the cqe */
+ bnx2i_work = kzalloc(sizeof(struct bnx2i_work), GFP_ATOMIC);
+ if (bnx2i_work) {
+ INIT_LIST_HEAD(&bnx2i_work->list);
+ bnx2i_work->session = session;
+ bnx2i_work->bnx2i_conn = bnx2i_conn;
+ memcpy(&bnx2i_work->cqe, cqe, sizeof(struct cqe));
+ list_add_tail(&bnx2i_work->list, &p->work_list);
+ atomic_inc(&bnx2i_conn->work_cnt);
+ wake_up_process(p->iothread);
+ spin_unlock(&p->p_work_lock);
+ goto done;
+ } else
+ rc = -ENOMEM;
+err:
+ spin_unlock(&p->p_work_lock);
+ bnx2i_process_scsi_cmd_resp(session, bnx2i_conn, (struct cqe *)cqe);
+done:
+ return rc;
+}
+
/**
* bnx2i_process_new_cqes - process newly DMA'ed CQE's
- * @bnx2i_conn: iscsi connection
+ * @bnx2i_conn: bnx2i connection
*
* this function is called by generic KCQ handler to process all pending CQE's
*/
-static void bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn)
+static int bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn)
{
struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
struct iscsi_session *session = conn->session;
- struct qp_info *qp = &bnx2i_conn->ep->qp;
+ struct qp_info *qp;
struct bnx2i_nop_in_msg *nopin;
int tgt_async_msg;
+ int cqe_cnt = 0;
+ if (bnx2i_conn->ep == NULL)
+ return 0;
+
+ qp = &bnx2i_conn->ep->qp;
+
+ if (!qp->cq_virt) {
+ printk(KERN_ALERT "bnx2i (%s): cq resr freed in bh execution!",
+ bnx2i_conn->hba->netdev->name);
+ goto out;
+ }
while (1) {
nopin = (struct bnx2i_nop_in_msg *) qp->cq_cons_qe;
if (nopin->cq_req_sn != qp->cqe_exp_seq_sn)
@@ -1873,8 +1987,9 @@ static void bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn)
switch (nopin->op_code) {
case ISCSI_OP_SCSI_CMD_RSP:
case ISCSI_OP_SCSI_DATA_IN:
- bnx2i_process_scsi_cmd_resp(session, bnx2i_conn,
- qp->cq_cons_qe);
+ /* Run the kthread engine only for data cmds
+ All other cmds will be completed in this bh! */
+ bnx2i_queue_scsi_cmd_resp(session, bnx2i_conn, nopin);
break;
case ISCSI_OP_LOGIN_RSP:
bnx2i_process_login_resp(session, bnx2i_conn,
@@ -1918,13 +2033,21 @@ static void bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn)
printk(KERN_ALERT "bnx2i: unknown opcode 0x%x\n",
nopin->op_code);
}
- if (!tgt_async_msg)
- bnx2i_conn->ep->num_active_cmds--;
+ if (!tgt_async_msg) {
+ if (!atomic_read(&bnx2i_conn->ep->num_active_cmds))
+ printk(KERN_ALERT "bnx2i (%s): no active cmd! "
+ "op 0x%x\n",
+ bnx2i_conn->hba->netdev->name,
+ nopin->op_code);
+ else
+ atomic_dec(&bnx2i_conn->ep->num_active_cmds);
+ }
cqe_out:
/* clear out in production version only, till beta keep opcode
* field intact, will be helpful in debugging (context dump)
* nopin->op_code = 0;
*/
+ cqe_cnt++;
qp->cqe_exp_seq_sn++;
if (qp->cqe_exp_seq_sn == (qp->cqe_size * 2 + 1))
qp->cqe_exp_seq_sn = ISCSI_INITIAL_SN;
@@ -1937,6 +2060,8 @@ cqe_out:
qp->cq_cons_idx++;
}
}
+out:
+ return cqe_cnt;
}
/**
@@ -1952,6 +2077,7 @@ static void bnx2i_fastpath_notification(struct bnx2i_hba *hba,
{
struct bnx2i_conn *bnx2i_conn;
u32 iscsi_cid;
+ int nxt_idx;
iscsi_cid = new_cqe_kcqe->iscsi_conn_id;
bnx2i_conn = bnx2i_get_conn_from_id(hba, iscsi_cid);
@@ -1964,9 +2090,12 @@ static void bnx2i_fastpath_notification(struct bnx2i_hba *hba,
printk(KERN_ALERT "cid #%x - ep not bound\n", iscsi_cid);
return;
}
+
bnx2i_process_new_cqes(bnx2i_conn);
- bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep, CNIC_ARM_CQE_FP);
- bnx2i_process_new_cqes(bnx2i_conn);
+ nxt_idx = bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep,
+ CNIC_ARM_CQE_FP);
+ if (nxt_idx && nxt_idx == bnx2i_process_new_cqes(bnx2i_conn))
+ bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep, CNIC_ARM_CQE_FP);
}
@@ -2312,7 +2441,7 @@ static void bnx2i_process_ofld_cmpl(struct bnx2i_hba *hba,
printk(KERN_ALERT "bnx2i (%s): ofld1 cmpl - invalid "
"opcode\n", hba->netdev->name);
else if (ofld_kcqe->completion_status ==
- ISCSI_KCQE_COMPLETION_STATUS_CID_BUSY)
+ ISCSI_KCQE_COMPLETION_STATUS_CID_BUSY)
/* error status code valid only for 5771x chipset */
ep->state = EP_STATE_OFLD_FAILED_CID_BUSY;
else
@@ -2511,7 +2640,7 @@ static void bnx2i_cm_remote_abort(struct cnic_sock *cm_sk)
static int bnx2i_send_nl_mesg(void *context, u32 msg_type,
- char *buf, u16 buflen)
+ char *buf, u16 buflen)
{
struct bnx2i_hba *hba = context;
int rc;