From 869b2b444c58302e3233ce0b671fabf28135a37d Mon Sep 17 00:00:00 2001 From: Martin Peschke Date: Wed, 9 May 2007 11:01:20 +0200 Subject: [SCSI] zfcp: avoid clutter in erp_dbf avoid clutter in erp_dbf cleanup zfcp_fsf_req_dismiss functions: - avoid clutter in erp_dbf (reqs_active is always 0) - fold called three-line function into calling function - add meaningful comment - coding style Signed-off-by: Martin Peschke Signed-off-by: Swen Schillig Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fsf.c | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index a8b02542ac2d..0eb31e162b15 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -156,44 +156,30 @@ zfcp_fsf_req_free(struct zfcp_fsf_req *fsf_req) kfree(fsf_req); } -/** - * zfcp_fsf_req_dismiss - dismiss a single fsf request - */ -static void zfcp_fsf_req_dismiss(struct zfcp_adapter *adapter, - struct zfcp_fsf_req *fsf_req, - unsigned int counter) -{ - u64 dbg_tmp[2]; - - dbg_tmp[0] = (u64) atomic_read(&adapter->reqs_active); - dbg_tmp[1] = (u64) counter; - debug_event(adapter->erp_dbf, 4, (void *) dbg_tmp, 16); - list_del(&fsf_req->list); - fsf_req->status |= ZFCP_STATUS_FSFREQ_DISMISSED; - zfcp_fsf_req_complete(fsf_req); -} - -/** - * zfcp_fsf_req_dismiss_all - dismiss all remaining fsf requests +/* + * Never ever call this without shutting down the adapter first. + * Otherwise the adapter would continue using and corrupting s390 storage. + * Included BUG_ON() call to ensure this is done. + * ERP is supposed to be the only user of this function. */ void zfcp_fsf_req_dismiss_all(struct zfcp_adapter *adapter) { - struct zfcp_fsf_req *request, *tmp; + struct zfcp_fsf_req *fsf_req, *tmp; unsigned long flags; LIST_HEAD(remove_queue); - unsigned int i, counter; + unsigned int i; + BUG_ON(atomic_test_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status)); spin_lock_irqsave(&adapter->req_list_lock, flags); atomic_set(&adapter->reqs_active, 0); - for (i=0; ireq_list[i], &remove_queue); - spin_unlock_irqrestore(&adapter->req_list_lock, flags); - counter = 0; - list_for_each_entry_safe(request, tmp, &remove_queue, list) { - zfcp_fsf_req_dismiss(adapter, request, counter); - counter++; + list_for_each_entry_safe(fsf_req, tmp, &remove_queue, list) { + list_del(&fsf_req->list); + fsf_req->status |= ZFCP_STATUS_FSFREQ_DISMISSED; + zfcp_fsf_req_complete(fsf_req); } } -- cgit v1.2.3-55-g7522 From 9f28745a6b554fdd6b0dbc9856077701a55f9569 Mon Sep 17 00:00:00 2001 From: Michael Loehr Date: Wed, 9 May 2007 11:01:24 +0200 Subject: [SCSI] zfcp: IO stall after deleting and path checker changes after reenabling zfcp devices IO stall after deleting and path checker changes after reenabling zfcp device Setting one zfcp device offline using chccwdev in a multipath environment and waiting will lead to IO stall on all paths. After setting the zfcp device back online using chccwdev, the devices with io stall will have a different path checker. Devices corresponding to the deleted units are never freed. This has the effect that 'slave_destroy' is never called and zfcp still thinks that this unit is registered (ZFCP_STATUS_UNIT_REGISTERED is still set). Hence the erp routine is not called correctly and the unit is not enabled properly. Do not delete rport and the sdev. Just set the host to block on 'offline'. Setting host online again will then remove the blocked status and everything is fine again. Signed-off-by: Michael Loehr Signed-off-by: Swen Schillig Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_aux.c | 1 + drivers/s390/scsi/zfcp_ccw.c | 5 +---- drivers/s390/scsi/zfcp_scsi.c | 3 +++ 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index ddff40c4212c..821cde65e369 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -1127,6 +1127,7 @@ zfcp_adapter_dequeue(struct zfcp_adapter *adapter) int retval = 0; unsigned long flags; + zfcp_adapter_scsi_unregister(adapter); device_unregister(&adapter->generic_services); zfcp_sysfs_adapter_remove_files(&adapter->ccw_device->dev); dev_set_drvdata(&adapter->ccw_device->dev, NULL); diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c index 81680efa1721..1c8f71a59855 100644 --- a/drivers/s390/scsi/zfcp_ccw.c +++ b/drivers/s390/scsi/zfcp_ccw.c @@ -189,9 +189,7 @@ zfcp_ccw_set_online(struct ccw_device *ccw_device) * @ccw_device: pointer to belonging ccw device * * This function gets called by the common i/o layer and sets an adapter - * into state offline. Setting an fcp device offline means that it will be - * unregistered from the SCSI stack and that the adapter will be shut down - * asynchronously. + * into state offline. */ static int zfcp_ccw_set_offline(struct ccw_device *ccw_device) @@ -202,7 +200,6 @@ zfcp_ccw_set_offline(struct ccw_device *ccw_device) adapter = dev_get_drvdata(&ccw_device->dev); zfcp_erp_adapter_shutdown(adapter, 0); zfcp_erp_wait(adapter); - zfcp_adapter_scsi_unregister(adapter); zfcp_erp_thread_kill(adapter); zfcp_adapter_debug_unregister(adapter); up(&zfcp_data.config_sema); diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 16e2d64658af..0acf6db0a08d 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -569,6 +569,9 @@ zfcp_adapter_scsi_register(struct zfcp_adapter *adapter) int retval = 0; static unsigned int unique_id = 0; + if (adapter->scsi_host) + goto out; + /* register adapter as SCSI host with mid layer of SCSI stack */ adapter->scsi_host = scsi_host_alloc(&zfcp_data.scsi_host_template, sizeof (struct zfcp_adapter *)); -- cgit v1.2.3-55-g7522 From 5af23d263c33a3f6fc93facfd87d2a091eff2060 Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 9 May 2007 15:36:35 -0500 Subject: [SCSI] ipr: Proper return codes for eh_dev_reset for SATA devices Currently ipr always returns success from eh_dev_reset when called for a SATA device. If ata_do_eh is unable to recover for some reason, this can result in commands that are still outstanding when ata_do_eh returns. Change ipr to verify no commands are outstanding before returning success. Signed-off-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ipr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 4baa79e68679..fa6ff295e568 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3954,6 +3954,13 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd * scsi_cmd) spin_unlock_irq(scsi_cmd->device->host->host_lock); ata_do_eh(ap, NULL, NULL, ipr_sata_reset, NULL); spin_lock_irq(scsi_cmd->device->host->host_lock); + + list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { + if (ipr_cmd->ioarcb.res_handle == res->cfgte.res_handle) { + rc = -EIO; + break; + } + } } else rc = ipr_device_reset(ioa_cfg, res); res->resetting_device = 0; -- cgit v1.2.3-55-g7522 From e0b2e597d5dd8c4f3778545f65c29a9c6aba0e3a Mon Sep 17 00:00:00 2001 From: Ed Lin Date: Wed, 9 May 2007 20:50:33 -0800 Subject: [SCSI] stex: fix id mapping issue The correct internal mapping of stex controllers should be: id:0~15, lun:0~7 (st_shasta) id:0, lun:0~127 (st_yosemite) id:0~127, lun:0 (st_vsc and st_vsc1) This patch reports the internal mapping to scsi mid layer, eliminating the translation between scsi mid layer and firmware. To achieve this goal, we also need to: -- fail the REPORT_LUNS command for st_shasta because the firmware is known to not report all actual luns -- add an entry in scsi_devindo.c to force sequential lun scan (for st_shasta controllers) -- fail the REPORT_LUNS command for console device -- remove special handling of REPORT_LUNS command for st_yosemite, as there is no translation mapping now Signed-off-by: Ed Lin Signed-off-by: James Bottomley --- drivers/scsi/scsi_devinfo.c | 1 + drivers/scsi/stex.c | 60 +++++++++++++++++++++------------------------ 2 files changed, 29 insertions(+), 32 deletions(-) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index ce63044b1ec8..18dd5cc4d7c6 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -209,6 +209,7 @@ static struct { {"PIONEER", "CD-ROM DRM-602X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"PIONEER", "CD-ROM DRM-604X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"PIONEER", "CD-ROM DRM-624X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, + {"Promise", "", NULL, BLIST_SPARSELUN}, {"REGAL", "CDC-4X", NULL, BLIST_MAX5LUN | BLIST_SINGLELUN}, {"SanDisk", "ImageMate CF-SD1", NULL, BLIST_FORCELUN}, {"SEAGATE", "ST34555N", "0930", BLIST_NOTQ}, /* Chokes on tagged INQUIRY */ diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c index 69be1324b114..96dcbac9545e 100644 --- a/drivers/scsi/stex.c +++ b/drivers/scsi/stex.c @@ -113,10 +113,6 @@ enum { SG_CF_64B = 0x40, /* 64 bit item */ SG_CF_HOST = 0x20, /* sg in host memory */ - ST_MAX_ARRAY_SUPPORTED = 16, - ST_MAX_TARGET_NUM = (ST_MAX_ARRAY_SUPPORTED+1), - ST_MAX_LUN_PER_TARGET = 16, - st_shasta = 0, st_vsc = 1, st_vsc1 = 2, @@ -586,7 +582,7 @@ stex_queuecommand(struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *)) u16 tag; host = cmd->device->host; id = cmd->device->id; - lun = cmd->device->channel; /* firmware lun issue work around */ + lun = cmd->device->lun; hba = (struct st_hba *) &host->hostdata[0]; switch (cmd->cmnd[0]) { @@ -605,8 +601,19 @@ stex_queuecommand(struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *)) stex_invalid_field(cmd, done); return 0; } + case REPORT_LUNS: + /* + * The shasta firmware does not report actual luns in the + * target, so fail the command to force sequential lun scan. + * Also, the console device does not support this command. + */ + if (hba->cardtype == st_shasta || id == host->max_id - 1) { + stex_invalid_field(cmd, done); + return 0; + } + break; case INQUIRY: - if (id != ST_MAX_ARRAY_SUPPORTED) + if (id != host->max_id - 1) break; if (lun == 0 && (cmd->cmnd[1] & INQUIRY_EVPD) == 0) { stex_direct_copy(cmd, console_inq_page, @@ -624,7 +631,7 @@ stex_queuecommand(struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *)) ver.oem = ST_OEM; ver.build = ST_BUILD_VER; ver.signature[0] = PASSTHRU_SIGNATURE; - ver.console_id = ST_MAX_ARRAY_SUPPORTED; + ver.console_id = host->max_id - 1; ver.host_no = hba->host->host_no; cmd->result = stex_direct_copy(cmd, &ver, sizeof(ver)) ? DID_OK << 16 | COMMAND_COMPLETE << 8 : @@ -645,13 +652,8 @@ stex_queuecommand(struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *)) req = stex_alloc_req(hba); - if (hba->cardtype == st_yosemite) { - req->lun = lun * (ST_MAX_TARGET_NUM - 1) + id; - req->target = 0; - } else { - req->lun = lun; - req->target = id; - } + req->lun = lun; + req->target = id; /* cdb */ memcpy(req->cdb, cmd->cmnd, STEX_CDB_LENGTH); @@ -767,18 +769,6 @@ static void stex_ys_commands(struct st_hba *hba, ccb->srb_status = SRB_STATUS_SELECTION_TIMEOUT; else ccb->srb_status = SRB_STATUS_SUCCESS; - } else if (ccb->cmd->cmnd[0] == REPORT_LUNS) { - u8 *report_lun_data = (u8 *)hba->copy_buffer; - - count = STEX_EXTRA_SIZE; - stex_internal_copy(ccb->cmd, report_lun_data, - &count, ccb->sg_count, ST_FROM_CMD); - if (report_lun_data[2] || report_lun_data[3]) { - report_lun_data[2] = 0x00; - report_lun_data[3] = 0x08; - stex_internal_copy(ccb->cmd, report_lun_data, - &count, ccb->sg_count, ST_TO_CMD); - } } } @@ -1229,12 +1219,18 @@ stex_probe(struct pci_dev *pdev, const struct pci_device_id *id) hba->copy_buffer = hba->dma_mem + MU_BUFFER_SIZE; hba->mu_status = MU_STATE_STARTING; - /* firmware uses id/lun pair for a logical drive, but lun would be - always 0 if CONFIG_SCSI_MULTI_LUN not configured, so we use - channel to map lun here */ - host->max_channel = ST_MAX_LUN_PER_TARGET - 1; - host->max_id = ST_MAX_TARGET_NUM; - host->max_lun = 1; + if (hba->cardtype == st_shasta) { + host->max_lun = 8; + host->max_id = 16 + 1; + } else if (hba->cardtype == st_yosemite) { + host->max_lun = 128; + host->max_id = 1 + 1; + } else { + /* st_vsc and st_vsc1 */ + host->max_lun = 1; + host->max_id = 128 + 1; + } + host->max_channel = 0; host->unique_id = host->host_no; host->max_cmd_len = STEX_CDB_LENGTH; -- cgit v1.2.3-55-g7522 From 69f4a513911455670d3322fb5252b437c0485707 Mon Sep 17 00:00:00 2001 From: Ed Lin Date: Wed, 9 May 2007 20:50:37 -0800 Subject: [SCSI] stex: extend hard reset wait time During hard bus reset of st_shasta controllers, 1 ms is not enough for 16-port controllers, although it's good for 8-port controllers. Extend the wait time to 100 ms to allow bus resets finish successfully. Signed-off-by: Ed Lin Signed-off-by: James Bottomley --- drivers/scsi/stex.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c index 96dcbac9545e..81dd3b740daf 100644 --- a/drivers/scsi/stex.c +++ b/drivers/scsi/stex.c @@ -1041,7 +1041,12 @@ static void stex_hard_reset(struct st_hba *hba) pci_read_config_byte(bus->self, PCI_BRIDGE_CONTROL, &pci_bctl); pci_bctl |= PCI_BRIDGE_CTL_BUS_RESET; pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl); - msleep(1); + + /* + * 1 ms may be enough for 8-port controllers. But 16-port controllers + * require more time to finish bus reset. Use 100 ms here for safety + */ + msleep(100); pci_bctl &= ~PCI_BRIDGE_CTL_BUS_RESET; pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl); -- cgit v1.2.3-55-g7522 From d116a7bc6ab4dcf752078daeaf2276f7d7660595 Mon Sep 17 00:00:00 2001 From: Ed Lin Date: Wed, 9 May 2007 20:50:40 -0800 Subject: [SCSI] stex: fix reset recovery for console device After reset completed, the scsi error handler sends out TEST_UNIT_READY to the device. For 'normal' devices the command will be handled by firmware. However, because the RAID console only interfaces to scsi mid layer, the firmware will not process the command for it. This will make the console to be offlined right after reset. Add the handling in driver to fix this problem. Signed-off-by: Ed Lin Signed-off-by: James Bottomley --- drivers/scsi/stex.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c index 81dd3b740daf..47c2ef917fea 100644 --- a/drivers/scsi/stex.c +++ b/drivers/scsi/stex.c @@ -612,6 +612,13 @@ stex_queuecommand(struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *)) return 0; } break; + case TEST_UNIT_READY: + if (id == host->max_id - 1) { + cmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8; + done(cmd); + return 0; + } + break; case INQUIRY: if (id != host->max_id - 1) break; -- cgit v1.2.3-55-g7522 From c25da0afa753c29cd99fb41dc73a33ed69556965 Mon Sep 17 00:00:00 2001 From: Ed Lin Date: Wed, 9 May 2007 20:50:42 -0800 Subject: [SCSI] stex: minor cleanup and version update Add debug information into abort and host_reset routine. Change ioremap to ioremap_nocache. Version updated to 3.6.0000.1. Signed-off-by: Ed Lin Signed-off-by: James Bottomley --- drivers/scsi/stex.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c index 47c2ef917fea..9ac83abc4028 100644 --- a/drivers/scsi/stex.c +++ b/drivers/scsi/stex.c @@ -32,11 +32,12 @@ #include #include #include +#include #define DRV_NAME "stex" -#define ST_DRIVER_VERSION "3.1.0.1" +#define ST_DRIVER_VERSION "3.6.0000.1" #define ST_VER_MAJOR 3 -#define ST_VER_MINOR 1 +#define ST_VER_MINOR 6 #define ST_OEM 0 #define ST_BUILD_VER 1 @@ -992,6 +993,11 @@ static int stex_abort(struct scsi_cmnd *cmd) u32 data; int result = SUCCESS; unsigned long flags; + + printk(KERN_INFO DRV_NAME + "(%s): aborting command\n", pci_name(hba->pdev)); + scsi_print_command(cmd); + base = hba->mmio_base; spin_lock_irqsave(host->host_lock, flags); if (tag < host->can_queue && hba->ccb[tag].cmd == cmd) @@ -1077,6 +1083,10 @@ static int stex_reset(struct scsi_cmnd *cmd) unsigned long before; hba = (struct st_hba *) &cmd->device->host->hostdata[0]; + printk(KERN_INFO DRV_NAME + "(%s): resetting host\n", pci_name(hba->pdev)); + scsi_print_command(cmd); + hba->mu_status = MU_STATE_RESETTING; if (hba->cardtype == st_shasta) @@ -1196,7 +1206,7 @@ stex_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto out_scsi_host_put; } - hba->mmio_base = ioremap(pci_resource_start(pdev, 0), + hba->mmio_base = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); if ( !hba->mmio_base) { printk(KERN_ERR DRV_NAME "(%s): memory map failed\n", -- cgit v1.2.3-55-g7522 From bdd0d7571ac83190ec0caafb86d0d35ba2616fd2 Mon Sep 17 00:00:00 2001 From: Sumant Patro Date: Thu, 10 May 2007 07:22:35 -0700 Subject: [SCSI] MegaRAID: Update MAINTAINERS email-id Update Maintainer email-id for MegaRAID SCSI drivers. Signed-off-by: Sumant Patro Signed-off-by: James Bottomley --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index bbeb5b6b5b05..f6b89dff7d0d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2322,7 +2322,7 @@ S: Maintained MEGARAID SCSI DRIVERS P: Neela Syam Kolli -M: Neela.Kolli@engenio.com +M: megaraidlinux@lsi.com S: linux-scsi@vger.kernel.org W: http://megaraid.lsilogic.com S: Maintained -- cgit v1.2.3-55-g7522 From bcd4e22540309f2f135f278cffe134c63dbcaee5 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 11 May 2007 19:10:45 +0900 Subject: [SCSI] tgt: fix a rdma indirect transfer error bug This sets sg_dma_len to a proper value. Signed-off-by: FUJITA Tomonori Signed-off-by: James Bottomley --- drivers/scsi/libsrp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c index 5631c199a8eb..732446e63963 100644 --- a/drivers/scsi/libsrp.c +++ b/drivers/scsi/libsrp.c @@ -254,6 +254,7 @@ static int srp_indirect_data(struct scsi_cmnd *sc, struct srp_cmd *cmd, sg_init_one(&dummy, md, id->table_desc.len); sg_dma_address(&dummy) = token; + sg_dma_len(&dummy) = id->table_desc.len; err = rdma_io(sc, &dummy, 1, &id->table_desc, 1, DMA_TO_DEVICE, id->table_desc.len); if (err) { -- cgit v1.2.3-55-g7522 From ed56047aecaddc0a45bb23b61ce933fb81f67ef9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 13 May 2007 22:48:31 +0200 Subject: [SCSI] NCR53C9x: correct spelling mistake in deprecation notice On Mon, Apr 30, 2007 at 03:16:39PM +0200, Geert Uytterhoeven wrote: > > +What: old NCR53C9x driver > > +When: October 2007 > > +Why: Replaced by the much better esp_scsi driver. Actual low-level > > + driver can ported over almost trivially. > ^ > be current linus' tree still has my spelling mistake. Here's a patch to update it Signed-off-by: James Bottomley --- Documentation/feature-removal-schedule.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 498ff31f3aa1..adc84d521988 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -62,7 +62,7 @@ Who: Dan Dennedy , Stefan Richter What: old NCR53C9x driver When: October 2007 Why: Replaced by the much better esp_scsi driver. Actual low-level - driver can ported over almost trivially. + driver can be ported over almost trivially. Who: David Miller Christoph Hellwig -- cgit v1.2.3-55-g7522 From 2ab01efd1d2a24db53b4c5d28a2e20cf2b1206c5 Mon Sep 17 00:00:00 2001 From: Salyzyn, Mark Date: Tue, 15 May 2007 09:14:21 -0400 Subject: [SCSI] aacraid: Correct sa platform support. (Was: [Bug 8469] Bad EIP value on pentium3 SMP kernel-2.6.21.1) http://bugzilla.kernel.org/show_bug.cgi?id=8469 As discussed in the bugzilla outlined below, we have an sa based (Mustang) RAID adapter on the system, a Dell PERC2/QC. Affected controllers are HP NetRAID, Adaptec AAC-364, Dell PERC2/QC or Adaptec 5400S. This problem coincides with the introduction of the adapter_comm and adapter_deliver platform functions (Message [PATCH 1/4] aacraid: rework communication support code, January 23 2007, which initially migrated to 2.6.21) The panic occurs with an uninitialized adapter_deliver platform function pointer. The enclosed patch, unmodified as tested by Rainer, solves the problem. Signed-off-by: Mark Salyzyn Signed-off-by: James Bottomley --- drivers/scsi/aacraid/aacraid.h | 1 + drivers/scsi/aacraid/rx.c | 2 +- drivers/scsi/aacraid/sa.c | 9 ++++++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 45ca3e801619..42311a7d7337 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1823,6 +1823,7 @@ int aac_send_shutdown(struct aac_dev *dev); int aac_probe_container(struct aac_dev *dev, int cid); int _aac_rx_init(struct aac_dev *dev); int aac_rx_select_comm(struct aac_dev *dev, int comm); +int aac_rx_deliver_producer(struct fib * fib); extern int numacb; extern int acbsize; extern char aac_driver_version[]; diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c index 291cd14f4e98..0d25853eb061 100644 --- a/drivers/scsi/aacraid/rx.c +++ b/drivers/scsi/aacraid/rx.c @@ -378,7 +378,7 @@ static int aac_rx_check_health(struct aac_dev *dev) * * Will send a fib, returning 0 if successful. */ -static int aac_rx_deliver_producer(struct fib * fib) +int aac_rx_deliver_producer(struct fib * fib) { struct aac_dev *dev = fib->dev; struct aac_queue *q = &dev->queues->queue[AdapNormCmdQueue]; diff --git a/drivers/scsi/aacraid/sa.c b/drivers/scsi/aacraid/sa.c index f4b5e9742ab0..85b91bc578c9 100644 --- a/drivers/scsi/aacraid/sa.c +++ b/drivers/scsi/aacraid/sa.c @@ -5,7 +5,7 @@ * based on the old aacraid driver that is.. * Adaptec aacraid device driver for Linux. * - * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com) + * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -257,6 +257,11 @@ static void aac_sa_start_adapter(struct aac_dev *dev) NULL, NULL, NULL, NULL, NULL); } +static int aac_sa_restart_adapter(struct aac_dev *dev, int bled) +{ + return -EINVAL; +} + /** * aac_sa_check_health * @dev: device to check if healthy @@ -366,7 +371,9 @@ int aac_sa_init(struct aac_dev *dev) dev->a_ops.adapter_notify = aac_sa_notify_adapter; dev->a_ops.adapter_sync_cmd = sa_sync_cmd; dev->a_ops.adapter_check_health = aac_sa_check_health; + dev->a_ops.adapter_restart = aac_sa_restart_adapter; dev->a_ops.adapter_intr = aac_sa_intr; + dev->a_ops.adapter_deliver = aac_rx_deliver_producer; dev->a_ops.adapter_ioremap = aac_sa_ioremap; /* -- cgit v1.2.3-55-g7522 From cab537d609fb718e9fb09d73e3e3e3062db25743 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 16 May 2007 10:06:39 -0400 Subject: [SCSI] aacraid: fix panic on short Inquiry Unable to handle kernel paging request at ffff8101c0000000 RIP: [] :aacraid:aac_internal_transfer+0xd6/0xe3 PGD 8063 PUD 0 Oops: 0000 [1] SMP last sysfs file: /block/sdb/removable CPU 2 Modules linked in: autofs4(U) hidp(U) nfs(U) lockd(U) fscache(U) nfs_acl(U) rfcomm(U) l2cap(U) bluetooth(U) sunrpc(U) ipv6(U) cpufreq_ondemand(U) dm_mirror(U) dm_mod(U) video(U) sbs(U) i2c_ec(U) button(U) battery(U) asus_acpi(U) acpi_memhotplug(U) ac(U) parport_pc(U) lp(U) parport(U) joydev(U) ide_cd(U) i2c_i801(U) i2c_core(U) shpchp(U) cdrom(U) bnx2(U) sg(U) pcspkr(U) ata_piix(U) libata(U) aacraid(U) sd_mod(U) scsi_mod(U) ext3(U) jbd(U) ehci_hcd(U) ohci_hcd(U) uhci_hcd(U) Pid: 2352, comm: syslogd Not tainted 2.6.18-prep #1 RIP: 0010:[] [] :aacraid:aac_internal_transfer+0xd6/0xe3 RSP: 0000:ffff8101bfd1fe68 EFLAGS: 00010083 RAX: 0000000000000063 RBX: 0000000000000008 RCX: 00000000ffd1fea0 RDX: ffffffff802da628 RSI: ffff8101c0000000 RDI: ffff8101b2a08168 RBP: ffff8101b2728010 R08: ffffffff802da628 R09: 0000000000000046 R10: 0000000000000000 R11: 0000000000000080 R12: 0000000000000010 R13: ffff8101bfd1fea8 R14: ffff8101bc74df58 R15: ffff8101bc74df58 FS: 00002aaaab0146f0(0000) GS:ffff8101bfcd2e40(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffff8101c0000000 CR3: 00000001bdecd000 CR4: 00000000000006e0 Process syslogd (pid: 2352, threadinfo ffff8101bc74c000, task ffff8101bd979040) Stack: 0000000000000012 0000000000000036 0000000000000000 ffff8101bee9a800 ffff8101be9d3a00 ffff8101be9d3a00 ffff8101be8014f8 ffffffff880b26cc 40212227607e3141 2029282a26252423 0000000000000003 ffff810037e3a000 Call Trace: ] :aacraid:get_container_name_callback+0x8b/0xb5 [] :aacraid:aac_intr_normal+0x1b3/0x1f9 [] :aacraid:aac_rkt_intr+0x37/0x115 [] __rcu_process_callbacks+0xf8/0x1a8 [] handle_IRQ_event+0x29/0x58 [] __do_IRQ+0xa4/0x105 [] __do_softirq+0x5e/0xd5 [] do_IRQ+0xe7/0xf5 [] ret_from_intr+0x0/0xa On digging into it, it turned out that the customer was probing an aacraid device with an INQUIRY of 8 bytes. The way aacraid works, it was blindly trying to use aac_internal_transfer to copy the container name to byte 16 of the inquiry data, resulting in a negative transfer length. It then copies over the whole of kernel memory before dropping off the end. Fix updated and corrected by Mark Salyzyn Signed-off-by: James Bottomley --- drivers/scsi/aacraid/aachba.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index 1e82c69b36b0..6c4319c98fee 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -340,7 +340,7 @@ int aac_get_containers(struct aac_dev *dev) static void aac_internal_transfer(struct scsi_cmnd *scsicmd, void *data, unsigned int offset, unsigned int len) { void *buf; - unsigned int transfer_len; + int transfer_len; struct scatterlist *sg = scsicmd->request_buffer; if (scsicmd->use_sg) { @@ -351,7 +351,7 @@ static void aac_internal_transfer(struct scsi_cmnd *scsicmd, void *data, unsigne transfer_len = min(scsicmd->request_bufflen, len + offset); } transfer_len -= offset; - if (buf && transfer_len) + if (buf && transfer_len > 0) memcpy(buf + offset, data, transfer_len); if (scsicmd->use_sg) -- cgit v1.2.3-55-g7522 From 09ff92fea2890c697a36d8b26f5a3ea725ef8fb4 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 21 May 2007 09:55:04 -0400 Subject: [SCSI] sd: fix refcounting regression in suspend/resume routines This patch (as909) fixes a couple of refcounting errors in the sd driver's suspend and resume methods. Signed-off-by: Alan Stern Signed-off-by: James Bottomley --- drivers/scsi/sd.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 00e46662296f..3d8c9cb24f91 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1789,7 +1789,7 @@ static void sd_shutdown(struct device *dev) static int sd_suspend(struct device *dev, pm_message_t mesg) { struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); - int ret; + int ret = 0; if (!sdkp) return 0; /* this can happen */ @@ -1798,30 +1798,34 @@ static int sd_suspend(struct device *dev, pm_message_t mesg) sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); ret = sd_sync_cache(sdkp); if (ret) - return ret; + goto done; } if (mesg.event == PM_EVENT_SUSPEND && sdkp->device->manage_start_stop) { sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); ret = sd_start_stop_device(sdkp, 0); - if (ret) - return ret; } - return 0; +done: + scsi_disk_put(sdkp); + return ret; } static int sd_resume(struct device *dev) { struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); + int ret = 0; if (!sdkp->device->manage_start_stop) - return 0; + goto done; sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + ret = sd_start_stop_device(sdkp, 1); - return sd_start_stop_device(sdkp, 1); +done: + scsi_disk_put(sdkp); + return ret; } /** -- cgit v1.2.3-55-g7522 From 1208bab5d07c9a9172f04b76dc107c37507a9bb3 Mon Sep 17 00:00:00 2001 From: Salyzyn, Mark Date: Tue, 22 May 2007 09:32:29 -0400 Subject: [SCSI] aacraid: apply commit config for reset_devices flag Under some conditions associated with the unclean transition to kdump, the aacraid adapters will view the array as foreign and not export it to prevent access and data manipulation. The solution is to submit a commit configuration to export the devices since this is a expected behavior when transitioning to a kdump kernel. This patch adds the aacraid.reset_devices flag and when either this or the global reset_devices flag is set, ensures that a commit config is issued and extends the startup_timeout if it is set less than 5 minutes. Signed-off-by: Mark Salyzyn Signed-off-by: James Bottomley --- drivers/scsi/aacraid/aachba.c | 11 +++++++---- drivers/scsi/aacraid/aacraid.h | 2 ++ drivers/scsi/aacraid/rx.c | 6 +++++- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index 6c4319c98fee..8dcfe4ec35c2 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -146,7 +146,7 @@ static char *aac_get_status_string(u32 status); static int nondasd = -1; static int dacmode = -1; -static int commit = -1; +int aac_commit = -1; int startup_timeout = 180; int aif_timeout = 120; @@ -154,7 +154,7 @@ module_param(nondasd, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(nondasd, "Control scanning of hba for nondasd devices. 0=off, 1=on"); module_param(dacmode, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(dacmode, "Control whether dma addressing is using 64 bit DAC. 0=off, 1=on"); -module_param(commit, int, S_IRUGO|S_IWUSR); +module_param_named(commit, aac_commit, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(commit, "Control whether a COMMIT_CONFIG is issued to the adapter for foreign arrays.\nThis is typically needed in systems that do not have a BIOS. 0=off, 1=on"); module_param(startup_timeout, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(startup_timeout, "The duration of time in seconds to wait for adapter to have it's kernel up and\nrunning. This is typically adjusted for large systems that do not have a BIOS."); @@ -173,6 +173,9 @@ int expose_physicals = -1; module_param(expose_physicals, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(expose_physicals, "Expose physical components of the arrays. -1=protect 0=off, 1=on"); +int aac_reset_devices = 0; +module_param_named(reset_devices, aac_reset_devices, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(reset_devices, "Force an adapter reset at initialization."); static inline int aac_valid_context(struct scsi_cmnd *scsicmd, struct fib *fibptr) { @@ -246,7 +249,7 @@ int aac_get_config_status(struct aac_dev *dev, int commit_flag) aac_fib_complete(fibptr); /* Send a CT_COMMIT_CONFIG to enable discovery of devices */ if (status >= 0) { - if ((commit == 1) || commit_flag) { + if ((aac_commit == 1) || commit_flag) { struct aac_commit_config * dinfo; aac_fib_init(fibptr); dinfo = (struct aac_commit_config *) fib_data(fibptr); @@ -261,7 +264,7 @@ int aac_get_config_status(struct aac_dev *dev, int commit_flag) 1, 1, NULL, NULL); aac_fib_complete(fibptr); - } else if (commit == 0) { + } else if (aac_commit == 0) { printk(KERN_WARNING "aac_get_config_status: Foreign device configurations are being ignored\n"); } diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 42311a7d7337..c81edf36913f 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1830,3 +1830,5 @@ extern char aac_driver_version[]; extern int startup_timeout; extern int aif_timeout; extern int expose_physicals; +extern int aac_reset_devices; +extern int aac_commit; diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c index 0d25853eb061..ae978a373c56 100644 --- a/drivers/scsi/aacraid/rx.c +++ b/drivers/scsi/aacraid/rx.c @@ -488,6 +488,8 @@ static int aac_rx_restart_adapter(struct aac_dev *dev, int bled) return -EINVAL; if (rx_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC) return -ENODEV; + if (startup_timeout < 300) + startup_timeout = 300; return 0; } @@ -542,7 +544,7 @@ int _aac_rx_init(struct aac_dev *dev) dev->a_ops.adapter_sync_cmd = rx_sync_cmd; dev->a_ops.adapter_enable_int = aac_rx_disable_interrupt; dev->OIMR = status = rx_readb (dev, MUnit.OIMR); - if ((((status & 0x0c) != 0x0c) || reset_devices) && + if ((((status & 0x0c) != 0x0c) || aac_reset_devices || reset_devices) && !aac_rx_restart_adapter(dev, 0)) ++restart; /* @@ -594,6 +596,8 @@ int _aac_rx_init(struct aac_dev *dev) } msleep(1); } + if (restart) + aac_commit = 1; /* * Fill in the common function dispatch table. */ -- cgit v1.2.3-55-g7522 From f45ffaec2e51071ea0067849cbb84df9e0531b35 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 22 May 2007 09:26:22 -0500 Subject: [SCSI] aic7xxx: fix aicasm build failure with gcc-3.4.6 On Tue, 2007-05-22 at 06:51 -0500, Bob Tracy wrote: > Second try: originally reported this back on April 17th. 2.6.X > kernel builds started failing after I upgraded my compiler from > gcc-3.3.X to gcc-3.4.6: > > make -C drivers/scsi/aic7xxx/aicasm > (...) > gcc -I/usr/include -I. aicasm.c aicasm_symbol.c aicasm_gram.c aicasm_macro_gram.c aicasm_scan.c aicasm_macro_scan.c -o aicasm -ldb > aicasm_gram.y:1948: error: conflicting types for 'yyerror' > aicasm_gram.tab.c:3004: error: previous implicit declaration of 'yyerror' was here > aicasm_macro_gram.y:162: error: conflicting types for 'mmerror' > aicasm_macro_gram.tab.c:1196: error: previous implicit declaration of 'mmerror' was here Fix is to add a prototype for yyerror and mmerror to the relevant files. Signed-off-by: James Bottomley --- drivers/scsi/aic7xxx/aicasm/aicasm_gram.y | 1 + drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.y | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/scsi/aic7xxx/aicasm/aicasm_gram.y b/drivers/scsi/aic7xxx/aicasm/aicasm_gram.y index c328596def3c..6066998ed562 100644 --- a/drivers/scsi/aic7xxx/aicasm/aicasm_gram.y +++ b/drivers/scsi/aic7xxx/aicasm/aicasm_gram.y @@ -106,6 +106,7 @@ static void make_expression(expression_t *immed, int value); static void add_conditional(symbol_t *symbol); static void add_version(const char *verstring); static int is_download_const(expression_t *immed); +void yyerror(const char *string); #define SRAM_SYMNAME "SRAM_BASE" #define SCB_SYMNAME "SCB_BASE" diff --git a/drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.y b/drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.y index 439f760b34b5..ff46aa6801bf 100644 --- a/drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.y +++ b/drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.y @@ -65,6 +65,7 @@ static symbol_t *macro_symbol; static void add_macro_arg(const char *argtext, int position); +void mmerror(const char *string); %} -- cgit v1.2.3-55-g7522 From 8fdcf86af61bfba744f5868ec04dad71637ac33a Mon Sep 17 00:00:00 2001 From: Darrick J. Wong Date: Wed, 16 May 2007 14:01:48 -0700 Subject: [SCSI] aic94xx: asd_clear_nexus should fail if the cleared task does not complete Every so often, the driver will call asd_clear_nexus to clean out a task. It is supposed to be the case that the CLEAR NEXUS does not go on the done list until after the task itself has been put on the done list, but for some reason this doesn't always happen. Thus, the wait_for_completion_timeout call times out, and we return success. This makes libsas free the task even though the task hasn't completed, leading to a BUG_ON message from aic94xx_hwi.c around line 341. We should return failure from asd_clear_nexus so that libsas tries again; at a bare minimum it shouldn't be freeing active tasks. I _think_ this will fix one of the SCB timeout crash problems (though I've not been able to reproduce it lately...) Signed-off-by: Darrick J. Wong Signed-off-by: James Bottomley --- drivers/scsi/aic94xx/aic94xx_tmf.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/aic94xx/aic94xx_tmf.c b/drivers/scsi/aic94xx/aic94xx_tmf.c index 9a14a6d97275..c0d0b7d7a8ce 100644 --- a/drivers/scsi/aic94xx/aic94xx_tmf.c +++ b/drivers/scsi/aic94xx/aic94xx_tmf.c @@ -290,6 +290,7 @@ static void asd_tmf_tasklet_complete(struct asd_ascb *ascb, static inline int asd_clear_nexus(struct sas_task *task) { int res = TMF_RESP_FUNC_FAILED; + int leftover; struct asd_ascb *tascb = task->lldd_task; unsigned long flags; @@ -298,10 +299,12 @@ static inline int asd_clear_nexus(struct sas_task *task) res = asd_clear_nexus_tag(task); else res = asd_clear_nexus_index(task); - wait_for_completion_timeout(&tascb->completion, - AIC94XX_SCB_TIMEOUT); + leftover = wait_for_completion_timeout(&tascb->completion, + AIC94XX_SCB_TIMEOUT); ASD_DPRINTK("came back from clear nexus\n"); spin_lock_irqsave(&task->task_state_lock, flags); + if (leftover < 1) + res = TMF_RESP_FUNC_FAILED; if (task->task_state_flags & SAS_TASK_STATE_DONE) res = TMF_RESP_FUNC_COMPLETE; spin_unlock_irqrestore(&task->task_state_lock, flags); @@ -350,6 +353,7 @@ int asd_abort_task(struct sas_task *task) unsigned long flags; struct asd_ascb *ascb = NULL; struct scb *scb; + int leftover; spin_lock_irqsave(&task->task_state_lock, flags); if (task->task_state_flags & SAS_TASK_STATE_DONE) { @@ -455,9 +459,11 @@ int asd_abort_task(struct sas_task *task) break; case TF_TMF_TASK_DONE + 0xFF00: /* done but not reported yet */ res = TMF_RESP_FUNC_FAILED; - wait_for_completion_timeout(&tascb->completion, - AIC94XX_SCB_TIMEOUT); + leftover = wait_for_completion_timeout(&tascb->completion, + AIC94XX_SCB_TIMEOUT); spin_lock_irqsave(&task->task_state_lock, flags); + if (leftover < 1) + res = TMF_RESP_FUNC_FAILED; if (task->task_state_flags & SAS_TASK_STATE_DONE) res = TMF_RESP_FUNC_COMPLETE; spin_unlock_irqrestore(&task->task_state_lock, flags); -- cgit v1.2.3-55-g7522 From 3d9780b97667fcd63159c0933fdce75465da6c70 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 21 May 2007 20:59:47 -0400 Subject: [SCSI] fusion: Fix |/|| confusion There are several cases where the fusion driver uses the logical || to try to do an arithmetical or ... fix by replacing with |. Signed-off-by: Dave Jones Acked-by: "Moore, Eric" Signed-off-by: James Bottomley --- drivers/message/fusion/mptbase.h | 2 +- drivers/message/fusion/mptscsih.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h index d25d3be8fcd2..165f81d16d00 100644 --- a/drivers/message/fusion/mptbase.h +++ b/drivers/message/fusion/mptbase.h @@ -436,7 +436,7 @@ typedef struct _MPT_SAS_MGMT { typedef struct _mpt_ioctl_events { u32 event; /* Specified by define above */ u32 eventContext; /* Index or counter */ - int data[2]; /* First 8 bytes of Event Data */ + u32 data[2]; /* First 8 bytes of Event Data */ } MPT_IOCTL_EVENTS; /* diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index fa0f7761652a..3bd94f11e7d6 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -2463,11 +2463,11 @@ mptscsih_copy_sense_data(struct scsi_cmnd *sc, MPT_SCSI_HOST *hd, MPT_FRAME_HDR ioc->events[idx].event = MPI_EVENT_SCSI_DEVICE_STATUS_CHANGE; ioc->events[idx].eventContext = ioc->eventContext; - ioc->events[idx].data[0] = (pReq->LUN[1] << 24) || - (MPI_EVENT_SCSI_DEV_STAT_RC_SMART_DATA << 16) || - (sc->device->channel << 8) || sc->device->id; + ioc->events[idx].data[0] = (pReq->LUN[1] << 24) | + (MPI_EVENT_SCSI_DEV_STAT_RC_SMART_DATA << 16) | + (sc->device->channel << 8) | sc->device->id; - ioc->events[idx].data[1] = (sense_data[13] << 8) || sense_data[12]; + ioc->events[idx].data[1] = (sense_data[13] << 8) | sense_data[12]; ioc->eventContext++; if (hd->ioc->pcidev->vendor == -- cgit v1.2.3-55-g7522 From 05e9ebbefb379a4da782b21b8427c88ac28a2334 Mon Sep 17 00:00:00 2001 From: Sumant Patro Date: Thu, 17 May 2007 05:47:51 -0700 Subject: [SCSI] megaraid_sas: intercept cmd timeout and throttle io eh_timed_out call back (megasas_reset_timer) is used to throttle io to the adapter when it is called the first time for a scmd. The MEGASAS_FW_BUSY flag is set and can_queue reduced to 16. The can_queue is restored from completion routine in following two conditions : 5 seconds has elapsed and the # of outstanding cmds in FW is < 17. Signed-off-by: Sumant Patro Signed-off-by: James Bottomley --- drivers/scsi/megaraid/megaraid_sas.c | 67 ++++++++++++++++++++++++++++++++---- drivers/scsi/megaraid/megaraid_sas.h | 14 +++++--- 2 files changed, 69 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c index 7a812677ff8a..e2cf12ef3688 100644 --- a/drivers/scsi/megaraid/megaraid_sas.c +++ b/drivers/scsi/megaraid/megaraid_sas.c @@ -10,7 +10,7 @@ * 2 of the License, or (at your option) any later version. * * FILE : megaraid_sas.c - * Version : v00.00.03.10-rc1 + * Version : v00.00.03.10-rc5 * * Authors: * (email-id : megaraidlinux@lsi.com) @@ -886,6 +886,7 @@ megasas_queue_command(struct scsi_cmnd *scmd, void (*done) (struct scsi_cmnd *)) goto out_return_cmd; cmd->scmd = scmd; + scmd->SCp.ptr = (char *)cmd; /* * Issue the command to the FW @@ -919,7 +920,7 @@ static int megasas_slave_configure(struct scsi_device *sdev) * The RAID firmware may require extended timeouts. */ if (sdev->channel >= MEGASAS_MAX_PD_CHANNELS) - sdev->timeout = 90 * HZ; + sdev->timeout = MEGASAS_DEFAULT_CMD_TIMEOUT * HZ; return 0; } @@ -981,8 +982,8 @@ static int megasas_generic_reset(struct scsi_cmnd *scmd) instance = (struct megasas_instance *)scmd->device->host->hostdata; - scmd_printk(KERN_NOTICE, scmd, "megasas: RESET -%ld cmd=%x\n", - scmd->serial_number, scmd->cmnd[0]); + scmd_printk(KERN_NOTICE, scmd, "megasas: RESET -%ld cmd=%x retries=%x\n", + scmd->serial_number, scmd->cmnd[0], scmd->retries); if (instance->hw_crit_error) { printk(KERN_ERR "megasas: cannot recover from previous reset " @@ -999,6 +1000,39 @@ static int megasas_generic_reset(struct scsi_cmnd *scmd) return ret_val; } +/** + * megasas_reset_timer - quiesce the adapter if required + * @scmd: scsi cmnd + * + * Sets the FW busy flag and reduces the host->can_queue if the + * cmd has not been completed within the timeout period. + */ +static enum +scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) +{ + struct megasas_cmd *cmd = (struct megasas_cmd *)scmd->SCp.ptr; + struct megasas_instance *instance; + unsigned long flags; + + if (time_after(jiffies, scmd->jiffies_at_alloc + + (MEGASAS_DEFAULT_CMD_TIMEOUT * 2) * HZ)) { + return EH_NOT_HANDLED; + } + + instance = cmd->instance; + if (!(instance->flag & MEGASAS_FW_BUSY)) { + /* FW is busy, throttle IO */ + spin_lock_irqsave(instance->host->host_lock, flags); + + instance->host->can_queue = 16; + instance->last_time = jiffies; + instance->flag |= MEGASAS_FW_BUSY; + + spin_unlock_irqrestore(instance->host->host_lock, flags); + } + return EH_RESET_TIMER; +} + /** * megasas_reset_device - Device reset handler entry point */ @@ -1112,6 +1146,7 @@ static struct scsi_host_template megasas_template = { .eh_device_reset_handler = megasas_reset_device, .eh_bus_reset_handler = megasas_reset_bus_host, .eh_host_reset_handler = megasas_reset_bus_host, + .eh_timed_out = megasas_reset_timer, .bios_param = megasas_bios_param, .use_clustering = ENABLE_CLUSTERING, }; @@ -1215,9 +1250,8 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd, int exception = 0; struct megasas_header *hdr = &cmd->frame->hdr; - if (cmd->scmd) { - cmd->scmd->SCp.ptr = (char *)0; - } + if (cmd->scmd) + cmd->scmd->SCp.ptr = NULL; switch (hdr->cmd) { @@ -1806,6 +1840,7 @@ static void megasas_complete_cmd_dpc(unsigned long instance_addr) u32 context; struct megasas_cmd *cmd; struct megasas_instance *instance = (struct megasas_instance *)instance_addr; + unsigned long flags; /* If we have already declared adapter dead, donot complete cmds */ if (instance->hw_crit_error) @@ -1828,6 +1863,22 @@ static void megasas_complete_cmd_dpc(unsigned long instance_addr) } *instance->consumer = producer; + + /* + * Check if we can restore can_queue + */ + if (instance->flag & MEGASAS_FW_BUSY + && time_after(jiffies, instance->last_time + 5 * HZ) + && atomic_read(&instance->fw_outstanding) < 17) { + + spin_lock_irqsave(instance->host->host_lock, flags); + instance->flag &= ~MEGASAS_FW_BUSY; + instance->host->can_queue = + instance->max_fw_cmds - MEGASAS_INT_CMDS; + + spin_unlock_irqrestore(instance->host->host_lock, flags); + } + } /** @@ -2398,6 +2449,8 @@ megasas_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) instance->init_id = MEGASAS_DEFAULT_INIT_ID; megasas_dbg_lvl = 0; + instance->flag = 0; + instance->last_time = 0; /* * Initialize MFI Firmware diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index e862992ee377..4dffc918a414 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -18,9 +18,9 @@ /* * MegaRAID SAS Driver meta data */ -#define MEGASAS_VERSION "00.00.03.10-rc1" -#define MEGASAS_RELDATE "Feb 14, 2007" -#define MEGASAS_EXT_VERSION "Wed Feb 14 10:14:25 PST 2007" +#define MEGASAS_VERSION "00.00.03.10-rc5" +#define MEGASAS_RELDATE "May 17, 2007" +#define MEGASAS_EXT_VERSION "Thu May 17 10:09:32 PDT 2007" /* * Device IDs @@ -539,6 +539,8 @@ struct megasas_ctrl_info { #define MEGASAS_DBG_LVL 1 +#define MEGASAS_FW_BUSY 1 + /* * When SCSI mid-layer calls driver's reset routine, driver waits for * MEGASAS_RESET_WAIT_TIME seconds for all outstanding IO to complete. Note @@ -549,8 +551,8 @@ struct megasas_ctrl_info { #define MEGASAS_RESET_WAIT_TIME 180 #define MEGASAS_INTERNAL_CMD_WAIT_TIME 180 #define MEGASAS_RESET_NOTICE_INTERVAL 5 - #define MEGASAS_IOCTL_CMD 0 +#define MEGASAS_DEFAULT_CMD_TIMEOUT 90 /* * FW reports the maximum of number of commands that it can accept (maximum @@ -1073,7 +1075,6 @@ struct megasas_instance { struct megasas_register_set __iomem *reg_set; s8 init_id; - u8 reserved[3]; u16 max_num_sge; u16 max_fw_cmds; @@ -1104,6 +1105,9 @@ struct megasas_instance { struct megasas_instance_template *instancet; struct tasklet_struct isr_tasklet; + + u8 flag; + unsigned long last_time; }; #define MEGASAS_IS_LOGICAL(scp) \ -- cgit v1.2.3-55-g7522