From af73623f5f10eb3832c87a169b28f7df040a875b Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Mon, 23 Sep 2013 14:47:32 +0200 Subject: [SCSI] sd: Reduce buffer size for vpd request Somehow older areca firmware versions have issues with scsi_get_vpd_page() and a large buffer, the firmware seems to crash and the scsi error-handler will start endless recovery retries. Limiting the buf-size to 64-bytes fixes this issue with older firmware versions (<1.49 for my controller). Fixes a regression with areca controllers and older firmware versions introduced by commit: 66c28f97120e8a621afd5aa7a31c4b85c547d33d Reported-by: Nix Tested-by: Nix Signed-off-by: Bernd Schubert Cc: stable@vger.kernel.org # delay inclusion for 2 months for testing Acked-by: Martin K. Petersen Signed-off-by: James Bottomley --- drivers/scsi/sd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/scsi/sd.c') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 5693f6d7eddb..ab96b793f904 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2639,13 +2639,16 @@ static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer) struct scsi_device *sdev = sdkp->device; if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, INQUIRY) < 0) { + /* too large values might cause issues with arcmsr */ + int vpd_buf_len = 64; + sdev->no_report_opcodes = 1; /* Disable WRITE SAME if REPORT SUPPORTED OPERATION * CODES is unsupported and the device has an ATA * Information VPD page (SAT). */ - if (!scsi_get_vpd_page(sdev, 0x89, buffer, SD_BUF_SIZE)) + if (!scsi_get_vpd_page(sdev, 0x89, buffer, vpd_buf_len)) sdev->no_write_same = 1; } -- cgit v1.2.3-55-g7522 From 95897910a5b8ecdc7e86ca2c38e21e84324c98bd Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 16 Sep 2013 13:28:15 +0200 Subject: [SCSI] sd: Add error handling during flushing caches It makes no sense to flush the cache of a device without medium. Errors during suspend must be handled according to their causes. Errors due to missing media or unplugged devices must be ignored. Errors due to devices being offlined must also be ignored. The error returns must be modified so that the generic layer understands them. [jejb: fix up whitespace and other formatting problems] Signed-off-by: Oliver Neukum Acked-by: Alan Stern Signed-off-by: James Bottomley --- drivers/scsi/scsi_pm.c | 3 ++- drivers/scsi/sd.c | 70 ++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 59 insertions(+), 14 deletions(-) (limited to 'drivers/scsi/sd.c') diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c index 4c5aabe21755..af4c050ce6e4 100644 --- a/drivers/scsi/scsi_pm.c +++ b/drivers/scsi/scsi_pm.c @@ -54,7 +54,8 @@ scsi_bus_suspend_common(struct device *dev, int (*cb)(struct device *)) /* * All the high-level SCSI drivers that implement runtime * PM treat runtime suspend, system suspend, and system - * hibernate identically. + * hibernate nearly identically. In all cases the requirements + * for runtime suspension are stricter. */ if (pm_runtime_suspended(dev)) return 0; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index ab96b793f904..3824e754ca1d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -105,7 +105,8 @@ static void sd_unlock_native_capacity(struct gendisk *disk); static int sd_probe(struct device *); static int sd_remove(struct device *); static void sd_shutdown(struct device *); -static int sd_suspend(struct device *); +static int sd_suspend_system(struct device *); +static int sd_suspend_runtime(struct device *); static int sd_resume(struct device *); static void sd_rescan(struct device *); static int sd_done(struct scsi_cmnd *); @@ -484,11 +485,11 @@ static struct class sd_disk_class = { }; static const struct dev_pm_ops sd_pm_ops = { - .suspend = sd_suspend, + .suspend = sd_suspend_system, .resume = sd_resume, - .poweroff = sd_suspend, + .poweroff = sd_suspend_system, .restore = sd_resume, - .runtime_suspend = sd_suspend, + .runtime_suspend = sd_suspend_runtime, .runtime_resume = sd_resume, }; @@ -1438,7 +1439,6 @@ static int sd_sync_cache(struct scsi_disk *sdkp) if (!scsi_device_online(sdp)) return -ENODEV; - for (retries = 3; retries > 0; --retries) { unsigned char cmd[10] = { 0 }; @@ -1456,12 +1456,31 @@ static int sd_sync_cache(struct scsi_disk *sdkp) if (res) { sd_print_result(sdkp, res); + if (driver_byte(res) & DRIVER_SENSE) sd_print_sense_hdr(sdkp, &sshdr); + /* we need to evaluate the error return */ + if (scsi_sense_valid(&sshdr) && + /* 0x3a is medium not present */ + sshdr.asc == 0x3a) + /* this is no error here */ + return 0; + + switch (host_byte(res)) { + /* ignore errors due to racing a disconnection */ + case DID_BAD_TARGET: + case DID_NO_CONNECT: + return 0; + /* signal the upper layer it might try again */ + case DID_BUS_BUSY: + case DID_IMM_RETRY: + case DID_REQUEUE: + case DID_SOFT_ERROR: + return -EBUSY; + default: + return -EIO; + } } - - if (res) - return -EIO; return 0; } @@ -3061,9 +3080,17 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start) sd_print_result(sdkp, res); if (driver_byte(res) & DRIVER_SENSE) sd_print_sense_hdr(sdkp, &sshdr); + if (scsi_sense_valid(&sshdr) && + /* 0x3a is medium not present */ + sshdr.asc == 0x3a) + res = 0; } - return res; + /* SCSI error codes must not go to the generic layer */ + if (res) + return -EIO; + + return 0; } /* @@ -3081,7 +3108,7 @@ static void sd_shutdown(struct device *dev) if (pm_runtime_suspended(dev)) goto exit; - if (sdkp->WCE) { + if (sdkp->WCE && sdkp->media_present) { sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); sd_sync_cache(sdkp); } @@ -3095,7 +3122,7 @@ exit: scsi_disk_put(sdkp); } -static int sd_suspend(struct device *dev) +static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) { struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); int ret = 0; @@ -3103,16 +3130,23 @@ static int sd_suspend(struct device *dev) if (!sdkp) return 0; /* this can happen */ - if (sdkp->WCE) { + if (sdkp->WCE && sdkp->media_present) { sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); ret = sd_sync_cache(sdkp); - if (ret) + if (ret) { + /* ignore OFFLINE device */ + if (ret == -ENODEV) + ret = 0; goto done; + } } if (sdkp->device->manage_start_stop) { sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); + /* an error is not worth aborting a system sleep */ ret = sd_start_stop_device(sdkp, 0); + if (ignore_stop_errors) + ret = 0; } done: @@ -3120,6 +3154,16 @@ done: return ret; } +static int sd_suspend_system(struct device *dev) +{ + return sd_suspend_common(dev, true); +} + +static int sd_suspend_runtime(struct device *dev) +{ + return sd_suspend_common(dev, false); +} + static int sd_resume(struct device *dev) { struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); -- cgit v1.2.3-55-g7522 From 7e660100d85af860e7ad763202fff717adcdaacd Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 4 Oct 2013 21:42:24 +0000 Subject: [SCSI] Derive the FLUSH_TIMEOUT from the basic I/O timeout Rather than having a separate constant for specifying the timeout on FLUSH operations, use the basic I/O timeout value that is already configurable on a per target basis to derive the FLUSH timeout. Looking at the current definitions of these timeout values, the FLUSH operation is supposed to have a value that is twice the normal timeout value. This patch preserves this relationship while leveraging the flexibility of specifying the I/O timeout. Based on a prior patch by KY Srinivasan Reviewed-by: KY Srinivasan Signed-off-by: James Bottomley --- drivers/scsi/sd.c | 8 +++++--- drivers/scsi/sd.h | 6 +++++- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/scsi/sd.c') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3824e754ca1d..fd874b846dc1 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -830,7 +830,7 @@ static int sd_setup_write_same_cmnd(struct scsi_device *sdp, struct request *rq) static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq) { - rq->timeout = SD_FLUSH_TIMEOUT; + rq->timeout *= SD_FLUSH_TIMEOUT_MULTIPLIER; rq->retries = SD_MAX_RETRIES; rq->cmd[0] = SYNCHRONIZE_CACHE; rq->cmd_len = 10; @@ -1434,6 +1434,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp) { int retries, res; struct scsi_device *sdp = sdkp->device; + const int timeout = sdp->request_queue->rq_timeout + * SD_FLUSH_TIMEOUT_MULTIPLIER; struct scsi_sense_hdr sshdr; if (!scsi_device_online(sdp)) @@ -1448,8 +1450,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp) * flush everything. */ res = scsi_execute_req_flags(sdp, cmd, DMA_NONE, NULL, 0, - &sshdr, SD_FLUSH_TIMEOUT, - SD_MAX_RETRIES, NULL, REQ_PM); + &sshdr, timeout, SD_MAX_RETRIES, + NULL, REQ_PM); if (res == 0) break; } diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 7a049de22051..26895ff247c5 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -13,7 +13,11 @@ */ #define SD_TIMEOUT (30 * HZ) #define SD_MOD_TIMEOUT (75 * HZ) -#define SD_FLUSH_TIMEOUT (60 * HZ) +/* + * Flush timeout is a multiplier over the standard device timeout which is + * user modifiable via sysfs but initially set to SD_TIMEOUT + */ +#define SD_FLUSH_TIMEOUT_MULTIPLIER 2 #define SD_WRITE_SAME_TIMEOUT (120 * HZ) /* -- cgit v1.2.3-55-g7522