summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig1
-rw-r--r--drivers/block/cciss.c27
-rw-r--r--drivers/block/cciss_scsi.c1
-rw-r--r--drivers/block/drbd/drbd_int.h1
-rw-r--r--drivers/block/drbd/drbd_main.c10
-rw-r--r--drivers/block/drbd/drbd_receiver.c4
-rw-r--r--drivers/block/loop.c84
-rw-r--r--drivers/block/loop.h3
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c228
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h10
-rw-r--r--drivers/block/nbd.c52
-rw-r--r--drivers/block/null_blk.c14
-rw-r--r--drivers/block/nvme-core.c818
-rw-r--r--drivers/block/nvme-scsi.c1236
-rw-r--r--drivers/block/paride/pd.c4
-rw-r--r--drivers/block/pktcdvd.c1
-rw-r--r--drivers/block/pmem.c4
-rw-r--r--drivers/block/ps3vram.c34
-rw-r--r--drivers/block/rbd.c5
-rw-r--r--drivers/block/sx8.c4
-rw-r--r--drivers/block/virtio_blk.c6
-rw-r--r--drivers/block/xen-blkback/blkback.c35
-rw-r--r--drivers/block/zram/Kconfig10
-rw-r--r--drivers/block/zram/zcomp.c7
-rw-r--r--drivers/block/zram/zcomp.h1
-rw-r--r--drivers/block/zram/zram_drv.c948
-rw-r--r--drivers/block/zram/zram_drv.h10
27 files changed, 1614 insertions, 1944 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index eb1fed5bd516..3ccef9eba6f9 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -406,6 +406,7 @@ config BLK_DEV_RAM_DAX
config BLK_DEV_PMEM
tristate "Persistent memory block device support"
+ depends on HAS_IOMEM
help
Saying Y here will allow you to use a contiguous range of reserved
memory as one or more persistent block devices.
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index ff20f192b0f6..0422c47261c3 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -139,8 +139,6 @@ static struct board_type products[] = {
{0x3214103C, "Smart Array E200i", &SA5_access},
{0x3215103C, "Smart Array E200i", &SA5_access},
{0x3237103C, "Smart Array E500", &SA5_access},
- {0x3223103C, "Smart Array P800", &SA5_access},
- {0x3234103C, "Smart Array P400", &SA5_access},
{0x323D103C, "Smart Array P700m", &SA5_access},
};
@@ -574,8 +572,6 @@ static void cciss_procinit(ctlr_info_t *h)
/* List of controllers which cannot be hard reset on kexec with reset_devices */
static u32 unresettable_controller[] = {
- 0x324a103C, /* Smart Array P712m */
- 0x324b103C, /* SmartArray P711m */
0x3223103C, /* Smart Array P800 */
0x3234103C, /* Smart Array P400 */
0x3235103C, /* Smart Array P400i */
@@ -586,12 +582,32 @@ static u32 unresettable_controller[] = {
0x3215103C, /* Smart Array E200i */
0x3237103C, /* Smart Array E500 */
0x323D103C, /* Smart Array P700m */
+ 0x40800E11, /* Smart Array 5i */
0x409C0E11, /* Smart Array 6400 */
0x409D0E11, /* Smart Array 6400 EM */
+ 0x40700E11, /* Smart Array 5300 */
+ 0x40820E11, /* Smart Array 532 */
+ 0x40830E11, /* Smart Array 5312 */
+ 0x409A0E11, /* Smart Array 641 */
+ 0x409B0E11, /* Smart Array 642 */
+ 0x40910E11, /* Smart Array 6i */
};
/* List of controllers which cannot even be soft reset */
static u32 soft_unresettable_controller[] = {
+ 0x40800E11, /* Smart Array 5i */
+ 0x40700E11, /* Smart Array 5300 */
+ 0x40820E11, /* Smart Array 532 */
+ 0x40830E11, /* Smart Array 5312 */
+ 0x409A0E11, /* Smart Array 641 */
+ 0x409B0E11, /* Smart Array 642 */
+ 0x40910E11, /* Smart Array 6i */
+ /* Exclude 640x boards. These are two pci devices in one slot
+ * which share a battery backed cache module. One controls the
+ * cache, the other accesses the cache through the one that controls
+ * it. If we reset the one controlling the cache, the other will
+ * likely not be happy. Just forbid resetting this conjoined mess.
+ */
0x409C0E11, /* Smart Array 6400 */
0x409D0E11, /* Smart Array 6400 EM */
};
@@ -4667,8 +4683,7 @@ static int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
*/
cciss_lookup_board_id(pdev, &board_id);
if (!ctlr_is_resettable(board_id)) {
- dev_warn(&pdev->dev, "Cannot reset Smart Array 640x "
- "due to shared cache module.");
+ dev_warn(&pdev->dev, "Controller not resettable\n");
return -ENODEV;
}
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index ecd845cd28d8..1537302e56e3 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -84,7 +84,6 @@ static struct scsi_host_template cciss_driver_template = {
.show_info = cciss_scsi_show_info,
.queuecommand = cciss_scsi_queue_command,
.this_id = 7,
- .cmd_per_lun = 1,
.use_clustering = DISABLE_CLUSTERING,
/* Can't have eh_bus_reset_handler or eh_host_reset_handler for cciss */
.eh_device_reset_handler= cciss_eh_device_reset_handler,
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index b905e9888b88..efd19c2da9c2 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -38,6 +38,7 @@
#include <linux/mutex.h>
#include <linux/major.h>
#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
#include <linux/genhd.h>
#include <linux/idr.h>
#include <net/tcp.h>
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 81fde9ef7f8e..a1518539b858 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2359,7 +2359,7 @@ static void drbd_cleanup(void)
* @congested_data: User data
* @bdi_bits: Bits the BDI flusher thread is currently interested in
*
- * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
+ * Returns 1<<WB_async_congested and/or 1<<WB_sync_congested if we are congested.
*/
static int drbd_congested(void *congested_data, int bdi_bits)
{
@@ -2376,14 +2376,14 @@ static int drbd_congested(void *congested_data, int bdi_bits)
}
if (test_bit(CALLBACK_PENDING, &first_peer_device(device)->connection->flags)) {
- r |= (1 << BDI_async_congested);
+ r |= (1 << WB_async_congested);
/* Without good local data, we would need to read from remote,
* and that would need the worker thread as well, which is
* currently blocked waiting for that usermode helper to
* finish.
*/
if (!get_ldev_if_state(device, D_UP_TO_DATE))
- r |= (1 << BDI_sync_congested);
+ r |= (1 << WB_sync_congested);
else
put_ldev(device);
r &= bdi_bits;
@@ -2399,9 +2399,9 @@ static int drbd_congested(void *congested_data, int bdi_bits)
reason = 'b';
}
- if (bdi_bits & (1 << BDI_async_congested) &&
+ if (bdi_bits & (1 << WB_async_congested) &&
test_bit(NET_CONGESTED, &first_peer_device(device)->connection->flags)) {
- r |= (1 << BDI_async_congested);
+ r |= (1 << WB_async_congested);
reason = reason == 'b' ? 'a' : 'n';
}
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index cee20354ac37..c097909c589c 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -598,7 +598,7 @@ static struct socket *drbd_try_connect(struct drbd_connection *connection)
memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
what = "sock_create_kern";
- err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
+ err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
SOCK_STREAM, IPPROTO_TCP, &sock);
if (err < 0) {
sock = NULL;
@@ -693,7 +693,7 @@ static int prepare_listen_socket(struct drbd_connection *connection, struct acce
memcpy(&my_addr, &connection->my_addr, my_addr_len);
what = "sock_create_kern";
- err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
+ err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
SOCK_STREAM, IPPROTO_TCP, &s_listen);
if (err) {
s_listen = NULL;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ae3fcb4199e9..40580dc7f41c 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -86,8 +86,6 @@ static DEFINE_MUTEX(loop_index_mutex);
static int max_part;
static int part_shift;
-static struct workqueue_struct *loop_wq;
-
static int transfer_xor(struct loop_device *lo, int cmd,
struct page *raw_page, unsigned raw_off,
struct page *loop_page, unsigned loop_off,
@@ -476,6 +474,28 @@ static int loop_flush(struct loop_device *lo)
return loop_switch(lo, NULL);
}
+static void loop_reread_partitions(struct loop_device *lo,
+ struct block_device *bdev)
+{
+ int rc;
+
+ /*
+ * bd_mutex has been held already in release path, so don't
+ * acquire it if this function is called in such case.
+ *
+ * If the reread partition isn't from release path, lo_refcnt
+ * must be at least one and it can only become zero when the
+ * current holder is released.
+ */
+ if (!atomic_read(&lo->lo_refcnt))
+ rc = __blkdev_reread_part(bdev);
+ else
+ rc = blkdev_reread_part(bdev);
+ if (rc)
+ pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
+ __func__, lo->lo_number, lo->lo_file_name, rc);
+}
+
/*
* loop_change_fd switched the backing store of a loopback device to
* a new file. This is useful for operating system installers to free up
@@ -524,7 +544,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
fput(old_file);
if (lo->lo_flags & LO_FLAGS_PARTSCAN)
- ioctl_by_bdev(bdev, BLKRRPART, 0);
+ loop_reread_partitions(lo, bdev);
return 0;
out_putf:
@@ -725,6 +745,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
size = get_loop_size(lo, file);
if ((loff_t)(sector_t)size != size)
goto out_putf;
+ error = -ENOMEM;
+ lo->wq = alloc_workqueue("kloopd%d",
+ WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16,
+ lo->lo_number);
+ if (!lo->wq)
+ goto out_putf;
error = 0;
@@ -755,7 +781,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
if (part_shift)
lo->lo_flags |= LO_FLAGS_PARTSCAN;
if (lo->lo_flags & LO_FLAGS_PARTSCAN)
- ioctl_by_bdev(bdev, BLKRRPART, 0);
+ loop_reread_partitions(lo, bdev);
/* Grab the block_device to prevent its destruction after we
* put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev).
@@ -827,7 +853,7 @@ static int loop_clr_fd(struct loop_device *lo)
* <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
* command to fail with EBUSY.
*/
- if (lo->lo_refcnt > 1) {
+ if (atomic_read(&lo->lo_refcnt) > 1) {
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
mutex_unlock(&lo->lo_ctl_mutex);
return 0;
@@ -836,6 +862,9 @@ static int loop_clr_fd(struct loop_device *lo)
if (filp == NULL)
return -EINVAL;
+ /* freeze request queue during the transition */
+ blk_mq_freeze_queue(lo->lo_queue);
+
spin_lock_irq(&lo->lo_lock);
lo->lo_state = Lo_rundown;
lo->lo_backing_file = NULL;
@@ -867,11 +896,15 @@ static int loop_clr_fd(struct loop_device *lo)
lo->lo_state = Lo_unbound;
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
+ blk_mq_unfreeze_queue(lo->lo_queue);
+
if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
- ioctl_by_bdev(bdev, BLKRRPART, 0);
+ loop_reread_partitions(lo, bdev);
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
+ destroy_workqueue(lo->wq);
+ lo->wq = NULL;
mutex_unlock(&lo->lo_ctl_mutex);
/*
* Need not hold lo_ctl_mutex to fput backing file.
@@ -943,7 +976,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
!(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
lo->lo_flags |= LO_FLAGS_PARTSCAN;
lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
- ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
+ loop_reread_partitions(lo, lo->lo_device);
}
lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
@@ -1324,9 +1357,7 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
goto out;
}
- mutex_lock(&lo->lo_ctl_mutex);
- lo->lo_refcnt++;
- mutex_unlock(&lo->lo_ctl_mutex);
+ atomic_inc(&lo->lo_refcnt);
out:
mutex_unlock(&loop_index_mutex);
return err;
@@ -1337,11 +1368,10 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
struct loop_device *lo = disk->private_data;
int err;
- mutex_lock(&lo->lo_ctl_mutex);
-
- if (--lo->lo_refcnt)
- goto out;
+ if (atomic_dec_return(&lo->lo_refcnt))
+ return;
+ mutex_lock(&lo->lo_ctl_mutex);
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
/*
* In autoclear mode, stop the loop thread
@@ -1358,7 +1388,6 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
loop_flush(lo);
}
-out:
mutex_unlock(&lo->lo_ctl_mutex);
}
@@ -1425,9 +1454,13 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+ struct loop_device *lo = cmd->rq->q->queuedata;
blk_mq_start_request(bd->rq);
+ if (lo->lo_state != Lo_bound)
+ return -EIO;
+
if (cmd->rq->cmd_flags & REQ_WRITE) {
struct loop_device *lo = cmd->rq->q->queuedata;
bool need_sched = true;
@@ -1441,9 +1474,9 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
spin_unlock_irq(&lo->lo_lock);
if (need_sched)
- queue_work(loop_wq, &lo->write_work);
+ queue_work(lo->wq, &lo->write_work);
} else {
- queue_work(loop_wq, &cmd->read_work);
+ queue_work(lo->wq, &cmd->read_work);
}
return BLK_MQ_RQ_QUEUE_OK;
@@ -1455,9 +1488,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
struct loop_device *lo = cmd->rq->q->queuedata;
int ret = -EIO;
- if (lo->lo_state != Lo_bound)
- goto failed;
-
if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY))
goto failed;
@@ -1594,6 +1624,7 @@ static int loop_add(struct loop_device **l, int i)
disk->flags |= GENHD_FL_NO_PART_SCAN;
disk->flags |= GENHD_FL_EXT_DEVT;
mutex_init(&lo->lo_ctl_mutex);
+ atomic_set(&lo->lo_refcnt, 0);
lo->lo_number = i;
spin_lock_init(&lo->lo_lock);
disk->major = LOOP_MAJOR;
@@ -1620,8 +1651,8 @@ out:
static void loop_remove(struct loop_device *lo)
{
- del_gendisk(lo->lo_disk);
blk_cleanup_queue(lo->lo_queue);
+ del_gendisk(lo->lo_disk);
blk_mq_free_tag_set(&lo->tag_set);
put_disk(lo->lo_disk);
kfree(lo);
@@ -1711,7 +1742,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
mutex_unlock(&lo->lo_ctl_mutex);
break;
}
- if (lo->lo_refcnt > 0) {
+ if (atomic_read(&lo->lo_refcnt) > 0) {
ret = -EBUSY;
mutex_unlock(&lo->lo_ctl_mutex);
break;
@@ -1806,13 +1837,6 @@ static int __init loop_init(void)
goto misc_out;
}
- loop_wq = alloc_workqueue("kloopd",
- WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0);
- if (!loop_wq) {
- err = -ENOMEM;
- goto misc_out;
- }
-
blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
THIS_MODULE, loop_probe, NULL, NULL);
@@ -1850,8 +1874,6 @@ static void __exit loop_exit(void)
blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
unregister_blkdev(LOOP_MAJOR, "loop");
- destroy_workqueue(loop_wq);
-
misc_deregister(&loop_misc);
}
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 301c27f8323f..25e8997ed246 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -28,7 +28,7 @@ struct loop_func_table;
struct loop_device {
int lo_number;
- int lo_refcnt;
+ atomic_t lo_refcnt;
loff_t lo_offset;
loff_t lo_sizelimit;
int lo_flags;
@@ -54,6 +54,7 @@ struct loop_device {
gfp_t old_gfp_mask;
spinlock_t lo_lock;
+ struct workqueue_struct *wq;
struct list_head write_cmd_head;
struct work_struct write_work;
bool write_started;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3bd7ca9853a8..4a2ef09e6704 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -163,12 +163,6 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev)
else
dev_warn(&dd->pdev->dev,
"%s: dd->queue is NULL\n", __func__);
- if (dd->port) {
- set_bit(MTIP_PF_SR_CLEANUP_BIT, &dd->port->flags);
- wake_up_interruptible(&dd->port->svc_wait);
- } else
- dev_warn(&dd->pdev->dev,
- "%s: dd->port is NULL\n", __func__);
return true; /* device removed */
}
@@ -269,8 +263,11 @@ static int mtip_hba_reset(struct driver_data *dd)
/* Flush */
readl(dd->mmio + HOST_CTL);
- /* Spin for up to 2 seconds, waiting for reset acknowledgement */
- timeout = jiffies + msecs_to_jiffies(2000);
+ /*
+ * Spin for up to 10 seconds waiting for reset acknowledgement. Spec
+ * is 1 sec but in LUN failure conditions, up to 10 secs are required
+ */
+ timeout = jiffies + msecs_to_jiffies(10000);
do {
mdelay(10);
if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
@@ -623,8 +620,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
- if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
- test_bit(MTIP_TAG_INTERNAL, port->allocated)) {
+ if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
@@ -896,6 +892,10 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
/* Acknowledge the interrupt status on the port.*/
port_stat = readl(port->mmio + PORT_IRQ_STAT);
+ if (unlikely(port_stat == 0xFFFFFFFF)) {
+ mtip_check_surprise_removal(dd->pdev);
+ return IRQ_HANDLED;
+ }
writel(port_stat, port->mmio + PORT_IRQ_STAT);
/* Demux port status */
@@ -991,15 +991,10 @@ static bool mtip_pause_ncq(struct mtip_port *port,
reply = port->rxfis + RX_FIS_D2H_REG;
task_file_data = readl(port->mmio+PORT_TFDATA);
- if (fis->command == ATA_CMD_SEC_ERASE_UNIT)
- clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
-
if ((task_file_data & 1))
return false;
if (fis->command == ATA_CMD_SEC_ERASE_PREP) {
- set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
- set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
port->ic_pause_timer = jiffies;
return true;
} else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) &&
@@ -1011,8 +1006,10 @@ static bool mtip_pause_ncq(struct mtip_port *port,
((fis->command == 0xFC) &&
(fis->features == 0x27 || fis->features == 0x72 ||
fis->features == 0x62 || fis->features == 0x26))) {
+ clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
/* Com reset after secure erase or lowlevel format */
mtip_restart_port(port);
+ clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
return false;
}
@@ -1112,9 +1109,10 @@ static int mtip_exec_internal_command(struct mtip_port *port,
int_cmd = mtip_get_int_command(dd);
set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
- port->ic_pause_timer = 0;
- clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
+ if (fis->command == ATA_CMD_SEC_ERASE_PREP)
+ set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
+
clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
if (atomic == GFP_KERNEL) {
@@ -1251,11 +1249,11 @@ static int mtip_exec_internal_command(struct mtip_port *port,
exec_ic_exit:
/* Clear the allocated and active bits for the internal command. */
mtip_put_int_command(dd, int_cmd);
+ clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
if (rv >= 0 && mtip_pause_ncq(port, fis)) {
/* NCQ paused */
return rv;
}
- clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
wake_up_interruptible(&port->svc_wait);
return rv;
@@ -2625,18 +2623,6 @@ static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
readl(dd->mmio + HOST_IRQ_STAT));
size += sprintf(&buf[size], "\n");
- size += sprintf(&buf[size], "L/ Allocated : [ 0x");
-
- for (n = dd->slot_groups-1; n >= 0; n--) {
- if (sizeof(long) > sizeof(u32))
- group_allocated =
- dd->port->allocated[n/2] >> (32*(n&1));
- else
- group_allocated = dd->port->allocated[n];
- size += sprintf(&buf[size], "%08X ", group_allocated);
- }
- size += sprintf(&buf[size], "]\n");
-
size += sprintf(&buf[size], "L/ Commands in Q : [ 0x");
for (n = dd->slot_groups-1; n >= 0; n--) {
@@ -2780,48 +2766,6 @@ static void mtip_hw_debugfs_exit(struct driver_data *dd)
debugfs_remove_recursive(dd->dfs_node);
}
-static int mtip_free_orphan(struct driver_data *dd)
-{
- struct kobject *kobj;
-
- if (dd->bdev) {
- if (dd->bdev->bd_holders >= 1)
- return -2;
-
- bdput(dd->bdev);
- dd->bdev = NULL;
- }
-
- mtip_hw_debugfs_exit(dd);
-
- spin_lock(&rssd_index_lock);
- ida_remove(&rssd_index_ida, dd->index);
- spin_unlock(&rssd_index_lock);
-
- if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag) &&
- test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) {
- put_disk(dd->disk);
- } else {
- if (dd->disk) {
- kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
- if (kobj) {
- mtip_hw_sysfs_exit(dd, kobj);
- kobject_put(kobj);
- }
- del_gendisk(dd->disk);
- dd->disk = NULL;
- }
- if (dd->queue) {
- dd->queue->queuedata = NULL;
- blk_cleanup_queue(dd->queue);
- blk_mq_free_tag_set(&dd->tags);
- dd->queue = NULL;
- }
- }
- kfree(dd);
- return 0;
-}
-
/*
* Perform any init/resume time hardware setup
*
@@ -2944,7 +2888,6 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd)
mtip_block_initialize(dd);
return 0;
}
- ssleep(10);
} while (time_before(jiffies, timeout));
/* Check for timeout */
@@ -2969,7 +2912,6 @@ static int mtip_service_thread(void *data)
unsigned long slot, slot_start, slot_wrap;
unsigned int num_cmd_slots = dd->slot_groups * 32;
struct mtip_port *port = dd->port;
- int ret;
while (1) {
if (kthread_should_stop() ||
@@ -2990,10 +2932,6 @@ static int mtip_service_thread(void *data)
test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
goto st_out;
- /* If I am an orphan, start self cleanup */
- if (test_bit(MTIP_PF_SR_CLEANUP_BIT, &port->flags))
- break;
-
if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
&dd->dd_flag)))
goto st_out;
@@ -3047,26 +2985,6 @@ restart_eh:
}
}
- /* wait for pci remove to exit */
- while (1) {
- if (test_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag))
- break;
- msleep_interruptible(1000);
- if (kthread_should_stop())
- goto st_out;
- }
-
- while (1) {
- ret = mtip_free_orphan(dd);
- if (!ret) {
- /* NOTE: All data structures are invalid, do not
- * access any here */
- return 0;
- }
- msleep_interruptible(1000);
- if (kthread_should_stop())
- goto st_out;
- }
st_out:
return 0;
}
@@ -3394,6 +3312,7 @@ static int mtip_hw_exit(struct driver_data *dd)
/* Release the IRQ. */
irq_set_affinity_hint(dd->pdev->irq, NULL);
devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
+ msleep(1000);
/* Free dma regions */
mtip_dma_free(dd);
@@ -3699,6 +3618,26 @@ static const struct block_device_operations mtip_block_ops = {
.owner = THIS_MODULE
};
+static inline bool is_se_active(struct driver_data *dd)
+{
+ if (unlikely(test_bit(MTIP_PF_SE_ACTIVE_BIT, &dd->port->flags))) {
+ if (dd->port->ic_pause_timer) {
+ unsigned long to = dd->port->ic_pause_timer +
+ msecs_to_jiffies(1000);
+ if (time_after(jiffies, to)) {
+ clear_bit(MTIP_PF_SE_ACTIVE_BIT,
+ &dd->port->flags);
+ clear_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
+ dd->port->ic_pause_timer = 0;
+ wake_up_interruptible(&dd->port->svc_wait);
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
/*
* Block layer make request function.
*
@@ -3716,6 +3655,9 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
unsigned int nents;
+ if (is_se_active(dd))
+ return -ENODATA;
+
if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
&dd->dd_flag))) {
@@ -3900,7 +3842,8 @@ static int mtip_block_initialize(struct driver_data *dd)
dd->disk->driverfs_dev = &dd->pdev->dev;
dd->disk->major = dd->major;
- dd->disk->first_minor = dd->instance * MTIP_MAX_MINORS;
+ dd->disk->first_minor = index * MTIP_MAX_MINORS;
+ dd->disk->minors = MTIP_MAX_MINORS;
dd->disk->fops = &mtip_block_ops;
dd->disk->private_data = dd;
dd->index = index;
@@ -4066,52 +4009,51 @@ static int mtip_block_remove(struct driver_data *dd)
{
struct kobject *kobj;
- if (!dd->sr) {
- mtip_hw_debugfs_exit(dd);
+ mtip_hw_debugfs_exit(dd);
- if (dd->mtip_svc_handler) {
- set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
- wake_up_interruptible(&dd->port->svc_wait);
- kthread_stop(dd->mtip_svc_handler);
- }
+ if (dd->mtip_svc_handler) {
+ set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
+ wake_up_interruptible(&dd->port->svc_wait);
+ kthread_stop(dd->mtip_svc_handler);
+ }
- /* Clean up the sysfs attributes, if created */
- if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
- kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
- if (kobj) {
- mtip_hw_sysfs_exit(dd, kobj);
- kobject_put(kobj);
- }
+ /* Clean up the sysfs attributes, if created */
+ if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
+ kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
+ if (kobj) {
+ mtip_hw_sysfs_exit(dd, kobj);
+ kobject_put(kobj);
}
+ }
+ if (!dd->sr)
mtip_standby_drive(dd);
-
- /*
- * Delete our gendisk structure. This also removes the device
- * from /dev
- */
- if (dd->bdev) {
- bdput(dd->bdev);
- dd->bdev = NULL;
- }
- if (dd->disk) {
- if (dd->disk->queue) {
- del_gendisk(dd->disk);
- blk_cleanup_queue(dd->queue);
- blk_mq_free_tag_set(&dd->tags);
- dd->queue = NULL;
- } else
- put_disk(dd->disk);
- }
- dd->disk = NULL;
-
- spin_lock(&rssd_index_lock);
- ida_remove(&rssd_index_ida, dd->index);
- spin_unlock(&rssd_index_lock);
- } else {
+ else
dev_info(&dd->pdev->dev, "device %s surprise removal\n",
dd->disk->disk_name);
+
+ /*
+ * Delete our gendisk structure. This also removes the device
+ * from /dev
+ */
+ if (dd->bdev) {
+ bdput(dd->bdev);
+ dd->bdev = NULL;
}
+ if (dd->disk) {
+ del_gendisk(dd->disk);
+ if (dd->disk->queue) {
+ blk_cleanup_queue(dd->queue);
+ blk_mq_free_tag_set(&dd->tags);
+ dd->queue = NULL;
+ }
+ put_disk(dd->disk);
+ }
+ dd->disk = NULL;
+
+ spin_lock(&rssd_index_lock);
+ ida_remove(&rssd_index_ida, dd->index);
+ spin_unlock(&rssd_index_lock);
/* De-initialize the protocol layer. */
mtip_hw_exit(dd);
@@ -4140,12 +4082,12 @@ static int mtip_block_shutdown(struct driver_data *dd)
dev_info(&dd->pdev->dev,
"Shutting down %s ...\n", dd->disk->disk_name);
+ del_gendisk(dd->disk);
if (dd->disk->queue) {
- del_gendisk(dd->disk);
blk_cleanup_queue(dd->queue);
blk_mq_free_tag_set(&dd->tags);
- } else
- put_disk(dd->disk);
+ }
+ put_disk(dd->disk);
dd->disk = NULL;
dd->queue = NULL;
}
@@ -4507,6 +4449,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
"Completion workers still active!\n");
}
+ blk_mq_stop_hw_queues(dd->queue);
/* Clean up the block layer. */
mtip_block_remove(dd);
@@ -4524,10 +4467,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
list_del_init(&dd->remove_list);
spin_unlock_irqrestore(&dev_lock, flags);
- if (!dd->sr)
- kfree(dd);
- else
- set_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag);
+ kfree(dd);
pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
pci_set_drvdata(pdev, NULL);
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index ba1b31ee22ec..3274784008eb 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -142,7 +142,6 @@ enum {
MTIP_PF_SVC_THD_ACTIVE_BIT = 4,
MTIP_PF_ISSUE_CMDS_BIT = 5,
MTIP_PF_REBUILD_BIT = 6,
- MTIP_PF_SR_CLEANUP_BIT = 7,
MTIP_PF_SVC_THD_STOP_BIT = 8,
/* below are bit numbers in 'dd_flag' defined in driver_data */
@@ -150,7 +149,6 @@ enum {
MTIP_DDF_REMOVE_PENDING_BIT = 1,
MTIP_DDF_OVER_TEMP_BIT = 2,
MTIP_DDF_WRITE_PROTECT_BIT = 3,
- MTIP_DDF_REMOVE_DONE_BIT = 4,
MTIP_DDF_CLEANUP_BIT = 5,
MTIP_DDF_RESUME_BIT = 6,
MTIP_DDF_INIT_DONE_BIT = 7,
@@ -412,19 +410,13 @@ struct mtip_port {
* by the DMA when the driver issues internal commands.
*/
dma_addr_t sector_buffer_dma;
- /*
- * Bit significant, used to determine if a command slot has
- * been allocated. i.e. the slot is in use. Bits are cleared
- * when the command slot and all associated data structures
- * are no longer needed.
- */
+
u16 *log_buf;
dma_addr_t log_buf_dma;
u8 *smart_buf;
dma_addr_t smart_buf_dma;
- unsigned long allocated[SLOTBITS_IN_LONGS];
/*
* used to queue commands when an internal command is in progress
* or error handling is active
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 39e5f7fae3ef..0e385d8e9b86 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -230,29 +230,40 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
int result, flags;
struct nbd_request request;
unsigned long size = blk_rq_bytes(req);
+ u32 type;
+
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ type = NBD_CMD_DISC;
+ else if (req->cmd_flags & REQ_DISCARD)
+ type = NBD_CMD_TRIM;
+ else if (req->cmd_flags & REQ_FLUSH)
+ type = NBD_CMD_FLUSH;
+ else if (rq_data_dir(req) == WRITE)
+ type = NBD_CMD_WRITE;
+ else
+ type = NBD_CMD_READ;
memset(&request, 0, sizeof(request));
request.magic = htonl(NBD_REQUEST_MAGIC);
- request.type = htonl(nbd_cmd(req));
-
- if (nbd_cmd(req) != NBD_CMD_FLUSH && nbd_cmd(req) != NBD_CMD_DISC) {
+ request.type = htonl(type);
+ if (type != NBD_CMD_FLUSH && type != NBD_CMD_DISC) {
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
request.len = htonl(size);
}
memcpy(request.handle, &req, sizeof(req));
dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
- req, nbdcmd_to_ascii(nbd_cmd(req)),
+ req, nbdcmd_to_ascii(type),
(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
result = sock_xmit(nbd, 1, &request, sizeof(request),
- (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
+ (type == NBD_CMD_WRITE) ? MSG_MORE : 0);
if (result <= 0) {
dev_err(disk_to_dev(nbd->disk),
"Send control failed (result %d)\n", result);
return -EIO;
}
- if (nbd_cmd(req) == NBD_CMD_WRITE) {
+ if (type == NBD_CMD_WRITE) {
struct req_iterator iter;
struct bio_vec bvec;
/*
@@ -352,7 +363,7 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
}
dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
- if (nbd_cmd(req) == NBD_CMD_READ) {
+ if (rq_data_dir(req) != WRITE) {
struct req_iterator iter;
struct bio_vec bvec;
@@ -452,23 +463,11 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
if (req->cmd_type != REQ_TYPE_FS)
goto error_out;
- nbd_cmd(req) = NBD_CMD_READ;
- if (rq_data_dir(req) == WRITE) {
- if ((req->cmd_flags & REQ_DISCARD)) {
- WARN_ON(!(nbd->flags & NBD_FLAG_SEND_TRIM));
- nbd_cmd(req) = NBD_CMD_TRIM;
- } else
- nbd_cmd(req) = NBD_CMD_WRITE;
- if (nbd->flags & NBD_FLAG_READ_ONLY) {
- dev_err(disk_to_dev(nbd->disk),
- "Write on read-only\n");
- goto error_out;
- }
- }
-
- if (req->cmd_flags & REQ_FLUSH) {
- BUG_ON(unlikely(blk_rq_sectors(req)));
- nbd_cmd(req) = NBD_CMD_FLUSH;
+ if (rq_data_dir(req) == WRITE &&
+ (nbd->flags & NBD_FLAG_READ_ONLY)) {
+ dev_err(disk_to_dev(nbd->disk),
+ "Write on read-only\n");
+ goto error_out;
}
req->errors = 0;
@@ -592,8 +591,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
fsync_bdev(bdev);
mutex_lock(&nbd->tx_lock);
blk_rq_init(NULL, &sreq);
- sreq.cmd_type = REQ_TYPE_SPECIAL;
- nbd_cmd(&sreq) = NBD_CMD_DISC;
+ sreq.cmd_type = REQ_TYPE_DRV_PRIV;
/* Check again after getting mutex back. */
if (!nbd->sock)
@@ -713,7 +711,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
bdev->bd_inode->i_size = 0;
set_capacity(nbd->disk, 0);
if (max_part > 0)
- ioctl_by_bdev(bdev, BLKRRPART, 0);
+ blkdev_reread_part(bdev);
if (nbd->disconnect) /* user requested, ignore socket errors */
return 0;
return nbd->harderror;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 65cd61a4145e..6f9b7534928e 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -243,6 +243,17 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
cmd = container_of(entry, struct nullb_cmd, ll_list);
entry = entry->next;
end_cmd(cmd);
+
+ if (cmd->rq) {
+ struct request_queue *q = cmd->rq->q;
+
+ if (!q->mq_ops && blk_queue_stopped(q)) {
+ spin_lock(q->queue_lock);
+ if (blk_queue_stopped(q))
+ blk_start_queue(q);
+ spin_unlock(q->queue_lock);
+ }
+ }
} while (entry);
}
@@ -257,7 +268,7 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
if (llist_add(&cmd->ll_list, &cq->list)) {
ktime_t kt = ktime_set(0, completion_nsec);
- hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL);
+ hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED);
}
put_cpu();
@@ -334,6 +345,7 @@ static int null_rq_prep_fn(struct request_queue *q, struct request *req)
req->special = cmd;
return BLKPREP_OK;
}
+ blk_stop_queue(q);
return BLKPREP_DEFER;
}
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 85b8036deaa3..e5112714188f 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -29,6 +29,7 @@
#include <linux/kdev_t.h>
#include <linux/kthread.h>
#include <linux/kernel.h>
+#include <linux/list_sort.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -80,6 +81,7 @@ static wait_queue_head_t nvme_kthread_wait;
static struct class *nvme_class;
static void nvme_reset_failed_dev(struct work_struct *ws);
+static int nvme_reset(struct nvme_dev *dev);
static int nvme_process_cq(struct nvme_queue *nvmeq);
struct async_cmd_info {
@@ -102,6 +104,7 @@ struct nvme_queue {
spinlock_t q_lock;
struct nvme_command *sq_cmds;
volatile struct nvme_completion *cqes;
+ struct blk_mq_tags **tags;
dma_addr_t sq_dma_addr;
dma_addr_t cq_dma_addr;
u32 __iomem *q_db;
@@ -114,7 +117,6 @@ struct nvme_queue {
u8 cq_phase;
u8 cqe_seen;
struct async_cmd_info cmdinfo;
- struct blk_mq_hw_ctx *hctx;
};
/*
@@ -182,9 +184,12 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
struct nvme_dev *dev = data;
struct nvme_queue *nvmeq = dev->queues[0];
- WARN_ON(nvmeq->hctx);
- nvmeq->hctx = hctx;
+ WARN_ON(hctx_idx != 0);
+ WARN_ON(dev->admin_tagset.tags[0] != hctx->tags);
+ WARN_ON(nvmeq->tags);
+
hctx->driver_data = nvmeq;
+ nvmeq->tags = &dev->admin_tagset.tags[0];
return 0;
}
@@ -201,27 +206,16 @@ static int nvme_admin_init_request(void *data, struct request *req,
return 0;
}
-static void nvme_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
-{
- struct nvme_queue *nvmeq = hctx->driver_data;
-
- nvmeq->hctx = NULL;
-}
-
static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
struct nvme_dev *dev = data;
- struct nvme_queue *nvmeq = dev->queues[
- (hctx_idx % dev->queue_count) + 1];
-
- if (!nvmeq->hctx)
- nvmeq->hctx = hctx;
+ struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
- /* nvmeq queues are shared between namespaces. We assume here that
- * blk-mq map the tags so they match up with the nvme queue tags. */
- WARN_ON(nvmeq->hctx->tags != hctx->tags);
+ if (!nvmeq->tags)
+ nvmeq->tags = &dev->tagset.tags[hctx_idx];
+ WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags);
hctx->driver_data = nvmeq;
return 0;
}
@@ -307,9 +301,16 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
++nvmeq->dev->event_limit;
- if (status == NVME_SC_SUCCESS)
- dev_warn(nvmeq->q_dmadev,
- "async event result %08x\n", result);
+ if (status != NVME_SC_SUCCESS)
+ return;
+
+ switch (result & 0xff07) {
+ case NVME_AER_NOTICE_NS_CHANGED:
+ dev_info(nvmeq->q_dmadev, "rescanning\n");
+ schedule_work(&nvmeq->dev->scan_work);
+ default:
+ dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
+ }
}
static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -320,7 +321,7 @@ static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
u16 status = le16_to_cpup(&cqe->status) >> 1;
u32 result = le32_to_cpup(&cqe->result);
- blk_mq_free_hctx_request(nvmeq->hctx, req);
+ blk_mq_free_request(req);
dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
++nvmeq->dev->abort_limit;
@@ -333,14 +334,13 @@ static void async_completion(struct nvme_queue *nvmeq, void *ctx,
cmdinfo->result = le32_to_cpup(&cqe->result);
cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
queue_kthread_work(cmdinfo->worker, &cmdinfo->work);
- blk_mq_free_hctx_request(nvmeq->hctx, cmdinfo->req);
+ blk_mq_free_request(cmdinfo->req);
}
static inline struct nvme_cmd_info *get_cmd_from_tag(struct nvme_queue *nvmeq,
unsigned int tag)
{
- struct blk_mq_hw_ctx *hctx = nvmeq->hctx;
- struct request *req = blk_mq_tag_to_rq(hctx->tags, tag);
+ struct request *req = blk_mq_tag_to_rq(*nvmeq->tags, tag);
return blk_mq_rq_to_pdu(req);
}
@@ -445,7 +445,7 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
(unsigned long) rq, gfp);
}
-void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
+static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
{
const int last_prp = dev->page_size / 8 - 1;
int i;
@@ -605,22 +605,30 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
spin_unlock_irqrestore(req->q->queue_lock, flags);
return;
}
- req->errors = nvme_error_status(status);
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
+ req->errors = status;
+ } else {
+ req->errors = nvme_error_status(status);
+ }
} else
req->errors = 0;
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
+ u32 result = le32_to_cpup(&cqe->result);
+ req->special = (void *)(uintptr_t)result;
+ }
if (cmd_rq->aborted)
- dev_warn(&nvmeq->dev->pci_dev->dev,
+ dev_warn(nvmeq->dev->dev,
"completing aborted command with status:%04x\n",
status);
if (iod->nents) {
- dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents,
+ dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
if (blk_integrity_rq(req)) {
if (!rq_data_dir(req))
nvme_dif_remap(req, nvme_dif_complete);
- dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->meta_sg, 1,
+ dma_unmap_sg(nvmeq->dev->dev, iod->meta_sg, 1,
rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
}
}
@@ -630,8 +638,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
}
/* length is in bytes. gfp flags indicates whether we may sleep. */
-int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
- gfp_t gfp)
+static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
+ int total_len, gfp_t gfp)
{
struct dma_pool *pool;
int length = total_len;
@@ -709,6 +717,23 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
return total_len;
}
+static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
+ struct nvme_iod *iod)
+{
+ struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
+
+ memcpy(cmnd, req->cmd, sizeof(struct nvme_command));
+ cmnd->rw.command_id = req->tag;
+ if (req->nr_phys_segments) {
+ cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+ cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
+ }
+
+ if (++nvmeq->sq_tail == nvmeq->q_depth)
+ nvmeq->sq_tail = 0;
+ writel(nvmeq->sq_tail, nvmeq->q_db);
+}
+
/*
* We reuse the small pool to allocate the 16-byte range here as it is not
* worth having a special pool for these or additional cases to handle freeing
@@ -807,11 +832,15 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
return 0;
}
+/*
+ * NOTE: ns is NULL when called on the admin queue.
+ */
static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nvme_ns *ns = hctx->queue->queuedata;
struct nvme_queue *nvmeq = hctx->driver_data;
+ struct nvme_dev *dev = nvmeq->dev;
struct request *req = bd->rq;
struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
struct nvme_iod *iod;
@@ -822,15 +851,16 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
* unless this namespace is formated such that the metadata can be
* stripped/generated by the controller with PRACT=1.
*/
- if (ns->ms && !blk_integrity_rq(req)) {
- if (!(ns->pi_type && ns->ms == 8)) {
+ if (ns && ns->ms && !blk_integrity_rq(req)) {
+ if (!(ns->pi_type && ns->ms == 8) &&
+ req->cmd_type != REQ_TYPE_DRV_PRIV) {
req->errors = -EFAULT;
blk_mq_complete_request(req);
return BLK_MQ_RQ_QUEUE_OK;
}
}
- iod = nvme_alloc_iod(req, ns->dev, GFP_ATOMIC);
+ iod = nvme_alloc_iod(req, dev, GFP_ATOMIC);
if (!iod)
return BLK_MQ_RQ_QUEUE_BUSY;
@@ -841,8 +871,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
* as it is not worth having a special pool for these or
* additional cases to handle freeing the iod.
*/
- range = dma_pool_alloc(nvmeq->dev->prp_small_pool,
- GFP_ATOMIC,
+ range = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC,
&iod->first_dma);
if (!range)
goto retry_cmd;
@@ -860,9 +889,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
goto retry_cmd;
if (blk_rq_bytes(req) !=
- nvme_setup_prps(nvmeq->dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
- dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg,
- iod->nents, dma_dir);
+ nvme_setup_prps(dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
goto retry_cmd;
}
if (blk_integrity_rq(req)) {
@@ -884,7 +912,9 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
nvme_set_info(cmd, iod, req_completion);
spin_lock_irq(&nvmeq->q_lock);
- if (req->cmd_flags & REQ_DISCARD)
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ nvme_submit_priv(nvmeq, req, iod);
+ else if (req->cmd_flags & REQ_DISCARD)
nvme_submit_discard(nvmeq, ns, req, iod);
else if (req->cmd_flags & REQ_FLUSH)
nvme_submit_flush(nvmeq, ns, req->tag);
@@ -896,10 +926,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_MQ_RQ_QUEUE_OK;
error_cmd:
- nvme_free_iod(nvmeq->dev, iod);
+ nvme_free_iod(dev, iod);
return BLK_MQ_RQ_QUEUE_ERROR;
retry_cmd:
- nvme_free_iod(nvmeq->dev, iod);
+ nvme_free_iod(dev, iod);
return BLK_MQ_RQ_QUEUE_BUSY;
}
@@ -942,15 +972,6 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
return 1;
}
-/* Admin queue isn't initialized as a request queue. If at some point this
- * happens anyway, make sure to notify the user */
-static int nvme_admin_queue_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *bd)
-{
- WARN_ON_ONCE(1);
- return BLK_MQ_RQ_QUEUE_ERROR;
-}
-
static irqreturn_t nvme_irq(int irq, void *data)
{
irqreturn_t result;
@@ -972,46 +993,61 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
return IRQ_WAKE_THREAD;
}
-struct sync_cmd_info {
- struct task_struct *task;
- u32 result;
- int status;
-};
-
-static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
- struct nvme_completion *cqe)
-{
- struct sync_cmd_info *cmdinfo = ctx;
- cmdinfo->result = le32_to_cpup(&cqe->result);
- cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
- wake_up_process(cmdinfo->task);
-}
-
/*
* Returns 0 on success. If the result is negative, it's a Linux error code;
* if the result is positive, it's an NVM Express status code
*/
-static int nvme_submit_sync_cmd(struct request *req, struct nvme_command *cmd,
- u32 *result, unsigned timeout)
+int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void *buffer, void __user *ubuffer, unsigned bufflen,
+ u32 *result, unsigned timeout)
{
- struct sync_cmd_info cmdinfo;
- struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = cmd_rq->nvmeq;
+ bool write = cmd->common.opcode & 1;
+ struct bio *bio = NULL;
+ struct request *req;
+ int ret;
- cmdinfo.task = current;
- cmdinfo.status = -EINTR;
+ req = blk_mq_alloc_request(q, write, GFP_KERNEL, false);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
- cmd->common.command_id = req->tag;
+ req->cmd_type = REQ_TYPE_DRV_PRIV;
+ req->cmd_flags |= REQ_FAILFAST_DRIVER;
+ req->__data_len = 0;
+ req->__sector = (sector_t) -1;
+ req->bio = req->biotail = NULL;
- nvme_set_info(cmd_rq, &cmdinfo, sync_completion);
+ req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
- set_current_state(TASK_UNINTERRUPTIBLE);
- nvme_submit_cmd(nvmeq, cmd);
- schedule();
+ req->cmd = (unsigned char *)cmd;
+ req->cmd_len = sizeof(struct nvme_command);
+ req->special = (void *)0;
+ if (buffer && bufflen) {
+ ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT);
+ if (ret)
+ goto out;
+ } else if (ubuffer && bufflen) {
+ ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, __GFP_WAIT);
+ if (ret)
+ goto out;
+ bio = req->bio;
+ }
+
+ blk_execute_rq(req->q, NULL, req, 0);
+ if (bio)
+ blk_rq_unmap_user(bio);
if (result)
- *result = cmdinfo.result;
- return cmdinfo.status;
+ *result = (u32)(uintptr_t)req->special;
+ ret = req->errors;
+ out:
+ blk_mq_free_request(req);
+ return ret;
+}
+
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void *buffer, unsigned bufflen)
+{
+ return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
}
static int nvme_submit_async_admin_req(struct nvme_dev *dev)
@@ -1033,7 +1069,7 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
c.common.opcode = nvme_admin_async_event;
c.common.command_id = req->tag;
- blk_mq_free_hctx_request(nvmeq->hctx, req);
+ blk_mq_free_request(req);
return __nvme_submit_cmd(nvmeq, &c);
}
@@ -1060,41 +1096,6 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
return nvme_submit_cmd(nvmeq, cmd);
}
-static int __nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
- u32 *result, unsigned timeout)
-{
- int res;
- struct request *req;
-
- req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false);
- if (IS_ERR(req))
- return PTR_ERR(req);
- res = nvme_submit_sync_cmd(req, cmd, result, timeout);
- blk_mq_free_request(req);
- return res;
-}
-
-int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
- u32 *result)
-{
- return __nvme_submit_admin_cmd(dev, cmd, result, ADMIN_TIMEOUT);
-}
-
-int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
- struct nvme_command *cmd, u32 *result)
-{
- int res;
- struct request *req;
-
- req = blk_mq_alloc_request(ns->queue, WRITE, (GFP_KERNEL|__GFP_WAIT),
- false);
- if (IS_ERR(req))
- return PTR_ERR(req);
- res = nvme_submit_sync_cmd(req, cmd, result, NVME_IO_TIMEOUT);
- blk_mq_free_request(req);
- return res;
-}
-
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
{
struct nvme_command c;
@@ -1103,7 +1104,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
c.delete_queue.opcode = opcode;
c.delete_queue.qid = cpu_to_le16(id);
- return nvme_submit_admin_cmd(dev, &c, NULL);
+ return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
}
static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
@@ -1112,6 +1113,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
struct nvme_command c;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
+ /*
+ * Note: we (ab)use the fact the the prp fields survive if no data
+ * is attached to the request.
+ */
memset(&c, 0, sizeof(c));
c.create_cq.opcode = nvme_admin_create_cq;
c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
@@ -1120,7 +1125,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
c.create_cq.cq_flags = cpu_to_le16(flags);
c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
- return nvme_submit_admin_cmd(dev, &c, NULL);
+ return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
}
static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
@@ -1129,6 +1134,10 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
struct nvme_command c;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
+ /*
+ * Note: we (ab)use the fact the the prp fields survive if no data
+ * is attached to the request.
+ */
memset(&c, 0, sizeof(c));
c.create_sq.opcode = nvme_admin_create_sq;
c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
@@ -1137,7 +1146,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
c.create_sq.sq_flags = cpu_to_le16(flags);
c.create_sq.cqid = cpu_to_le16(qid);
- return nvme_submit_admin_cmd(dev, &c, NULL);
+ return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
}
static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
@@ -1150,18 +1159,43 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
}
-int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns,
- dma_addr_t dma_addr)
+int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
{
- struct nvme_command c;
+ struct nvme_command c = {
+ .identify.opcode = nvme_admin_identify,
+ .identify.cns = cpu_to_le32(1),
+ };
+ int error;
- memset(&c, 0, sizeof(c));
- c.identify.opcode = nvme_admin_identify;
- c.identify.nsid = cpu_to_le32(nsid);
- c.identify.prp1 = cpu_to_le64(dma_addr);
- c.identify.cns = cpu_to_le32(cns);
+ *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+ sizeof(struct nvme_id_ctrl));
+ if (error)
+ kfree(*id);
+ return error;
+}
+
+int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+ struct nvme_id_ns **id)
+{
+ struct nvme_command c = {
+ .identify.opcode = nvme_admin_identify,
+ .identify.nsid = cpu_to_le32(nsid),
+ };
+ int error;
+
+ *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
- return nvme_submit_admin_cmd(dev, &c, NULL);
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+ sizeof(struct nvme_id_ns));
+ if (error)
+ kfree(*id);
+ return error;
}
int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
@@ -1175,7 +1209,8 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
c.features.prp1 = cpu_to_le64(dma_addr);
c.features.fid = cpu_to_le32(fid);
- return nvme_submit_admin_cmd(dev, &c, result);
+ return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
+ result, 0);
}
int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
@@ -1189,7 +1224,30 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
c.features.fid = cpu_to_le32(fid);
c.features.dword11 = cpu_to_le32(dword11);
- return nvme_submit_admin_cmd(dev, &c, result);
+ return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
+ result, 0);
+}
+
+int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
+{
+ struct nvme_command c = {
+ .common.opcode = nvme_admin_get_log_page,
+ .common.nsid = cpu_to_le32(0xFFFFFFFF),
+ .common.cdw10[0] = cpu_to_le32(
+ (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
+ NVME_LOG_SMART),
+ };
+ int error;
+
+ *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
+ if (!*log)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
+ sizeof(struct nvme_smart_log));
+ if (error)
+ kfree(*log);
+ return error;
}
/**
@@ -1214,8 +1272,7 @@ static void nvme_abort_req(struct request *req)
if (work_busy(&dev->reset_work))
goto out;
list_del_init(&dev->node);
- dev_warn(&dev->pci_dev->dev,
- "I/O %d QID %d timeout, reset controller\n",
+ dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
dev->reset_workfn = nvme_reset_failed_dev;
queue_work(nvme_workq, &dev->reset_work);
@@ -1254,8 +1311,7 @@ static void nvme_abort_req(struct request *req)
}
}
-static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx,
- struct request *req, void *data, bool reserved)
+static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved)
{
struct nvme_queue *nvmeq = data;
void *ctx;
@@ -1352,11 +1408,9 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
static void nvme_clear_queue(struct nvme_queue *nvmeq)
{
- struct blk_mq_hw_ctx *hctx = nvmeq->hctx;
-
spin_lock_irq(&nvmeq->q_lock);
- if (hctx && hctx->tags)
- blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq);
+ if (nvmeq->tags && *nvmeq->tags)
+ blk_mq_all_tag_busy_iter(*nvmeq->tags, nvme_cancel_queue_ios, nvmeq);
spin_unlock_irq(&nvmeq->q_lock);
}
@@ -1384,22 +1438,21 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth)
{
- struct device *dmadev = &dev->pci_dev->dev;
struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
if (!nvmeq)
return NULL;
- nvmeq->cqes = dma_zalloc_coherent(dmadev, CQ_SIZE(depth),
+ nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth),
&nvmeq->cq_dma_addr, GFP_KERNEL);
if (!nvmeq->cqes)
goto free_nvmeq;
- nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth),
+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
&nvmeq->sq_dma_addr, GFP_KERNEL);
if (!nvmeq->sq_cmds)
goto free_cqdma;
- nvmeq->q_dmadev = dmadev;
+ nvmeq->q_dmadev = dev->dev;
nvmeq->dev = dev;
snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
dev->instance, qid);
@@ -1409,13 +1462,16 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
nvmeq->q_depth = depth;
nvmeq->qid = qid;
- dev->queue_count++;
dev->queues[qid] = nvmeq;
+ /* make sure queue descriptor is set before queue count, for kthread */
+ mb();
+ dev->queue_count++;
+
return nvmeq;
free_cqdma:
- dma_free_coherent(dmadev, CQ_SIZE(depth), (void *)nvmeq->cqes,
+ dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes,
nvmeq->cq_dma_addr);
free_nvmeq:
kfree(nvmeq);
@@ -1487,7 +1543,7 @@ static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
if (fatal_signal_pending(current))
return -EINTR;
if (time_after(jiffies, timeout)) {
- dev_err(&dev->pci_dev->dev,
+ dev_err(dev->dev,
"Device not ready; aborting %s\n", enabled ?
"initialisation" : "reset");
return -ENODEV;
@@ -1537,7 +1593,7 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev)
if (fatal_signal_pending(current))
return -EINTR;
if (time_after(jiffies, timeout)) {
- dev_err(&dev->pci_dev->dev,
+ dev_err(dev->dev,
"Device shutdown incomplete; abort shutdown\n");
return -ENODEV;
}
@@ -1547,10 +1603,9 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev)
}
static struct blk_mq_ops nvme_mq_admin_ops = {
- .queue_rq = nvme_admin_queue_rq,
+ .queue_rq = nvme_queue_rq,
.map_queue = blk_mq_map_queue,
.init_hctx = nvme_admin_init_hctx,
- .exit_hctx = nvme_exit_hctx,
.init_request = nvme_admin_init_request,
.timeout = nvme_timeout,
};
@@ -1559,7 +1614,6 @@ static struct blk_mq_ops nvme_mq_ops = {
.queue_rq = nvme_queue_rq,
.map_queue = blk_mq_map_queue,
.init_hctx = nvme_init_hctx,
- .exit_hctx = nvme_exit_hctx,
.init_request = nvme_init_request,
.timeout = nvme_timeout,
};
@@ -1580,7 +1634,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
dev->admin_tagset.reserved_tags = 1;
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
- dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
+ dev->admin_tagset.numa_node = dev_to_node(dev->dev);
dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
dev->admin_tagset.driver_data = dev;
@@ -1613,14 +1667,14 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
if (page_shift < dev_page_min) {
- dev_err(&dev->pci_dev->dev,
+ dev_err(dev->dev,
"Minimum device page size (%u) too large for "
"host (%u)\n", 1 << dev_page_min,
1 << page_shift);
return -ENODEV;
}
if (page_shift > dev_page_max) {
- dev_info(&dev->pci_dev->dev,
+ dev_info(dev->dev,
"Device maximum page size (%u) smaller than "
"host (%u); enabling work-around\n",
1 << dev_page_max, 1 << page_shift);
@@ -1668,131 +1722,51 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
return result;
}
-struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
- unsigned long addr, unsigned length)
-{
- int i, err, count, nents, offset;
- struct scatterlist *sg;
- struct page **pages;
- struct nvme_iod *iod;
-
- if (addr & 3)
- return ERR_PTR(-EINVAL);
- if (!length || length > INT_MAX - PAGE_SIZE)
- return ERR_PTR(-EINVAL);
-
- offset = offset_in_page(addr);
- count = DIV_ROUND_UP(offset + length, PAGE_SIZE);
- pages = kcalloc(count, sizeof(*pages), GFP_KERNEL);
- if (!pages)
- return ERR_PTR(-ENOMEM);
-
- err = get_user_pages_fast(addr, count, 1, pages);
- if (err < count) {
- count = err;
- err = -EFAULT;
- goto put_pages;
- }
-
- err = -ENOMEM;
- iod = __nvme_alloc_iod(count, length, dev, 0, GFP_KERNEL);
- if (!iod)
- goto put_pages;
-
- sg = iod->sg;
- sg_init_table(sg, count);
- for (i = 0; i < count; i++) {
- sg_set_page(&sg[i], pages[i],
- min_t(unsigned, length, PAGE_SIZE - offset),
- offset);
- length -= (PAGE_SIZE - offset);
- offset = 0;
- }
- sg_mark_end(&sg[i - 1]);
- iod->nents = count;
-
- nents = dma_map_sg(&dev->pci_dev->dev, sg, count,
- write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (!nents)
- goto free_iod;
-
- kfree(pages);
- return iod;
-
- free_iod:
- kfree(iod);
- put_pages:
- for (i = 0; i < count; i++)
- put_page(pages[i]);
- kfree(pages);
- return ERR_PTR(err);
-}
-
-void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
- struct nvme_iod *iod)
-{
- int i;
-
- dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
- write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
- for (i = 0; i < iod->nents; i++)
- put_page(sg_page(&iod->sg[i]));
-}
-
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
{
struct nvme_dev *dev = ns->dev;
struct nvme_user_io io;
struct nvme_command c;
- unsigned length, meta_len, prp_len;
+ unsigned length, meta_len;
int status, write;
- struct nvme_iod *iod;
dma_addr_t meta_dma = 0;
void *meta = NULL;
+ void __user *metadata;
if (copy_from_user(&io, uio, sizeof(io)))
return -EFAULT;
- length = (io.nblocks + 1) << ns->lba_shift;
- meta_len = (io.nblocks + 1) * ns->ms;
-
- if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext)
- return -EINVAL;
- else if (meta_len && ns->ext) {
- length += meta_len;
- meta_len = 0;
- }
-
- write = io.opcode & 1;
switch (io.opcode) {
case nvme_cmd_write:
case nvme_cmd_read:
case nvme_cmd_compare:
- iod = nvme_map_user_pages(dev, write, io.addr, length);
break;
default:
return -EINVAL;
}
- if (IS_ERR(iod))
- return PTR_ERR(iod);
+ length = (io.nblocks + 1) << ns->lba_shift;
+ meta_len = (io.nblocks + 1) * ns->ms;
+ metadata = (void __user *)(unsigned long)io.metadata;
+ write = io.opcode & 1;
- prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
- if (length != prp_len) {
- status = -ENOMEM;
- goto unmap;
+ if (ns->ext) {
+ length += meta_len;
+ meta_len = 0;
}
if (meta_len) {
- meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
+ if (((io.metadata & 3) || !io.metadata) && !ns->ext)
+ return -EINVAL;
+
+ meta = dma_alloc_coherent(dev->dev, meta_len,
&meta_dma, GFP_KERNEL);
+
if (!meta) {
status = -ENOMEM;
goto unmap;
}
if (write) {
- if (copy_from_user(meta, (void __user *)io.metadata,
- meta_len)) {
+ if (copy_from_user(meta, metadata, meta_len)) {
status = -EFAULT;
goto unmap;
}
@@ -1810,20 +1784,17 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.reftag = cpu_to_le32(io.reftag);
c.rw.apptag = cpu_to_le16(io.apptag);
c.rw.appmask = cpu_to_le16(io.appmask);
- c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- c.rw.prp2 = cpu_to_le64(iod->first_dma);
c.rw.metadata = cpu_to_le64(meta_dma);
- status = nvme_submit_io_cmd(dev, ns, &c, NULL);
+
+ status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
+ (void __user *)io.addr, length, NULL, 0);
unmap:
- nvme_unmap_user_pages(dev, write, iod);
- nvme_free_iod(dev, iod);
if (meta) {
if (status == NVME_SC_SUCCESS && !write) {
- if (copy_to_user((void __user *)io.metadata, meta,
- meta_len))
+ if (copy_to_user(metadata, meta, meta_len))
status = -EFAULT;
}
- dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma);
+ dma_free_coherent(dev->dev, meta_len, meta, meta_dma);
}
return status;
}
@@ -1833,9 +1804,8 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
{
struct nvme_passthru_cmd cmd;
struct nvme_command c;
- int status, length;
- struct nvme_iod *uninitialized_var(iod);
- unsigned timeout;
+ unsigned timeout = 0;
+ int status;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
@@ -1855,46 +1825,17 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
- length = cmd.data_len;
- if (cmd.data_len) {
- iod = nvme_map_user_pages(dev, cmd.opcode & 1, cmd.addr,
- length);
- if (IS_ERR(iod))
- return PTR_ERR(iod);
- length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
- c.common.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- c.common.prp2 = cpu_to_le64(iod->first_dma);
- }
-
- timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) :
- ADMIN_TIMEOUT;
-
- if (length != cmd.data_len)
- status = -ENOMEM;
- else if (ns) {
- struct request *req;
-
- req = blk_mq_alloc_request(ns->queue, WRITE,
- (GFP_KERNEL|__GFP_WAIT), false);
- if (IS_ERR(req))
- status = PTR_ERR(req);
- else {
- status = nvme_submit_sync_cmd(req, &c, &cmd.result,
- timeout);
- blk_mq_free_request(req);
- }
- } else
- status = __nvme_submit_admin_cmd(dev, &c, &cmd.result, timeout);
+ if (cmd.timeout_ms)
+ timeout = msecs_to_jiffies(cmd.timeout_ms);
- if (cmd.data_len) {
- nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
- nvme_free_iod(dev, iod);
+ status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
+ NULL, (void __user *)cmd.addr, cmd.data_len,
+ &cmd.result, timeout);
+ if (status >= 0) {
+ if (put_user(cmd.result, &ucmd->result))
+ return -EFAULT;
}
- if ((status >= 0) && copy_to_user(&ucmd->result, &cmd.result,
- sizeof(cmd.result)))
- status = -EFAULT;
-
return status;
}
@@ -1986,23 +1927,18 @@ static int nvme_revalidate_disk(struct gendisk *disk)
struct nvme_ns *ns = disk->private_data;
struct nvme_dev *dev = ns->dev;
struct nvme_id_ns *id;
- dma_addr_t dma_addr;
u8 lbaf, pi_type;
u16 old_ms;
unsigned short bs;
- id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
- GFP_KERNEL);
- if (!id) {
- dev_warn(&dev->pci_dev->dev, "%s: Memory alocation failure\n",
- __func__);
- return 0;
+ if (nvme_identify_ns(dev, ns->ns_id, &id)) {
+ dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__,
+ dev->instance, ns->ns_id);
+ return -ENODEV;
}
- if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) {
- dev_warn(&dev->pci_dev->dev,
- "identify failed ns:%d, setting capacity to 0\n",
- ns->ns_id);
- memset(id, 0, sizeof(*id));
+ if (id->ncap == 0) {
+ kfree(id);
+ return -ENODEV;
}
old_ms = ns->ms;
@@ -2036,7 +1972,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
!ns->ext)
nvme_init_integrity(ns);
- if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
+ if (ns->ms && !blk_get_integrity(disk))
set_capacity(disk, 0);
else
set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
@@ -2044,7 +1980,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
if (dev->oncs & NVME_CTRL_ONCS_DSM)
nvme_config_discard(ns);
- dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
+ kfree(id);
return 0;
}
@@ -2071,7 +2007,7 @@ static int nvme_kthread(void *data)
if (work_busy(&dev->reset_work))
continue;
list_del_init(&dev->node);
- dev_warn(&dev->pci_dev->dev,
+ dev_warn(dev->dev,
"Failed status: %x, reset controller\n",
readl(&dev->bar->csts));
dev->reset_workfn = nvme_reset_failed_dev;
@@ -2103,7 +2039,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
{
struct nvme_ns *ns;
struct gendisk *disk;
- int node = dev_to_node(&dev->pci_dev->dev);
+ int node = dev_to_node(dev->dev);
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
if (!ns)
@@ -2151,11 +2087,16 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
* requires it.
*/
set_capacity(disk, 0);
- nvme_revalidate_disk(ns->disk);
+ if (nvme_revalidate_disk(ns->disk))
+ goto out_free_disk;
+
add_disk(ns->disk);
if (ns->ms)
revalidate_disk(ns->disk);
return;
+ out_free_disk:
+ kfree(disk);
+ list_del(&ns->list);
out_free_queue:
blk_cleanup_queue(ns->queue);
out_free_ns:
@@ -2186,8 +2127,7 @@ static int set_queue_count(struct nvme_dev *dev, int count)
if (status < 0)
return status;
if (status > 0) {
- dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n",
- status);
+ dev_err(dev->dev, "Could not set queue count (%d)\n", status);
return 0;
}
return min(result & 0xffff, result >> 16) + 1;
@@ -2201,7 +2141,7 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
struct nvme_queue *adminq = dev->queues[0];
- struct pci_dev *pdev = dev->pci_dev;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
int result, i, vecs, nr_io_queues, size;
nr_io_queues = num_possible_cpus();
@@ -2273,6 +2213,99 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
return result;
}
+static void nvme_free_namespace(struct nvme_ns *ns)
+{
+ list_del(&ns->list);
+
+ spin_lock(&dev_list_lock);
+ ns->disk->private_data = NULL;
+ spin_unlock(&dev_list_lock);
+
+ put_disk(ns->disk);
+ kfree(ns);
+}
+
+static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
+ struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
+
+ return nsa->ns_id - nsb->ns_id;
+}
+
+static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
+{
+ struct nvme_ns *ns;
+
+ list_for_each_entry(ns, &dev->namespaces, list) {
+ if (ns->ns_id == nsid)
+ return ns;
+ if (ns->ns_id > nsid)
+ break;
+ }
+ return NULL;
+}
+
+static inline bool nvme_io_incapable(struct nvme_dev *dev)
+{
+ return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS ||
+ dev->online_queues < 2);
+}
+
+static void nvme_ns_remove(struct nvme_ns *ns)
+{
+ bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue);
+
+ if (kill)
+ blk_set_queue_dying(ns->queue);
+ if (ns->disk->flags & GENHD_FL_UP) {
+ if (blk_get_integrity(ns->disk))
+ blk_integrity_unregister(ns->disk);
+ del_gendisk(ns->disk);
+ }
+ if (kill || !blk_queue_dying(ns->queue)) {
+ blk_mq_abort_requeue_list(ns->queue);
+ blk_cleanup_queue(ns->queue);
+ }
+}
+
+static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
+{
+ struct nvme_ns *ns, *next;
+ unsigned i;
+
+ for (i = 1; i <= nn; i++) {
+ ns = nvme_find_ns(dev, i);
+ if (ns) {
+ if (revalidate_disk(ns->disk)) {
+ nvme_ns_remove(ns);
+ nvme_free_namespace(ns);
+ }
+ } else
+ nvme_alloc_ns(dev, i);
+ }
+ list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+ if (ns->ns_id > nn) {
+ nvme_ns_remove(ns);
+ nvme_free_namespace(ns);
+ }
+ }
+ list_sort(NULL, &dev->namespaces, ns_cmp);
+}
+
+static void nvme_dev_scan(struct work_struct *work)
+{
+ struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
+ struct nvme_id_ctrl *ctrl;
+
+ if (!dev->tagset.tags)
+ return;
+ if (nvme_identify_ctrl(dev, &ctrl))
+ return;
+ nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
+ kfree(ctrl);
+}
+
/*
* Return: error value if an error occurred setting up the queues or calling
* Identify Device. 0 if these succeeded, even if adding some of the
@@ -2281,26 +2314,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
*/
static int nvme_dev_add(struct nvme_dev *dev)
{
- struct pci_dev *pdev = dev->pci_dev;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
int res;
- unsigned nn, i;
+ unsigned nn;
struct nvme_id_ctrl *ctrl;
- void *mem;
- dma_addr_t dma_addr;
int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
- mem = dma_alloc_coherent(&pdev->dev, 4096, &dma_addr, GFP_KERNEL);
- if (!mem)
- return -ENOMEM;
-
- res = nvme_identify(dev, 0, 1, dma_addr);
+ res = nvme_identify_ctrl(dev, &ctrl);
if (res) {
- dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res);
- dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr);
+ dev_err(dev->dev, "Identify Controller failed (%d)\n", res);
return -EIO;
}
- ctrl = mem;
nn = le32_to_cpup(&ctrl->nn);
dev->oncs = le16_to_cpup(&ctrl->oncs);
dev->abort_limit = ctrl->acl + 1;
@@ -2322,12 +2347,12 @@ static int nvme_dev_add(struct nvme_dev *dev)
} else
dev->max_hw_sectors = max_hw_sectors;
}
- dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr);
+ kfree(ctrl);
dev->tagset.ops = &nvme_mq_ops;
dev->tagset.nr_hw_queues = dev->online_queues - 1;
dev->tagset.timeout = NVME_IO_TIMEOUT;
- dev->tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
+ dev->tagset.numa_node = dev_to_node(dev->dev);
dev->tagset.queue_depth =
min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
dev->tagset.cmd_size = nvme_cmd_size(dev);
@@ -2337,9 +2362,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
if (blk_mq_alloc_tag_set(&dev->tagset))
return 0;
- for (i = 1; i <= nn; i++)
- nvme_alloc_ns(dev, i);
-
+ schedule_work(&dev->scan_work);
return 0;
}
@@ -2347,7 +2370,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
{
u64 cap;
int bars, result = -ENOMEM;
- struct pci_dev *pdev = dev->pci_dev;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
if (pci_enable_device_mem(pdev))
return result;
@@ -2361,8 +2384,8 @@ static int nvme_dev_map(struct nvme_dev *dev)
if (pci_request_selected_regions(pdev, bars, "nvme"))
goto disable_pci;
- if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) &&
- dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
+ if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
+ dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
goto disable;
dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
@@ -2403,19 +2426,21 @@ static int nvme_dev_map(struct nvme_dev *dev)
static void nvme_dev_unmap(struct nvme_dev *dev)
{
- if (dev->pci_dev->msi_enabled)
- pci_disable_msi(dev->pci_dev);
- else if (dev->pci_dev->msix_enabled)
- pci_disable_msix(dev->pci_dev);
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (pdev->msi_enabled)
+ pci_disable_msi(pdev);
+ else if (pdev->msix_enabled)
+ pci_disable_msix(pdev);
if (dev->bar) {
iounmap(dev->bar);
dev->bar = NULL;
- pci_release_regions(dev->pci_dev);
+ pci_release_regions(pdev);
}
- if (pci_is_enabled(dev->pci_dev))
- pci_disable_device(dev->pci_dev);
+ if (pci_is_enabled(pdev))
+ pci_disable_device(pdev);
}
struct nvme_delq_ctx {
@@ -2534,7 +2559,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev)
&worker, "nvme%d", dev->instance);
if (IS_ERR(kworker_task)) {
- dev_err(&dev->pci_dev->dev,
+ dev_err(dev->dev,
"Failed to create queue del task\n");
for (i = dev->queue_count - 1; i > 0; i--)
nvme_disable_queue(dev, i);
@@ -2585,9 +2610,9 @@ static void nvme_freeze_queues(struct nvme_dev *dev)
list_for_each_entry(ns, &dev->namespaces, list) {
blk_mq_freeze_queue_start(ns->queue);
- spin_lock(ns->queue->queue_lock);
+ spin_lock_irq(ns->queue->queue_lock);
queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
- spin_unlock(ns->queue->queue_lock);
+ spin_unlock_irq(ns->queue->queue_lock);
blk_mq_cancel_requeue_work(ns->queue);
blk_mq_stop_hw_queues(ns->queue);
@@ -2637,29 +2662,19 @@ static void nvme_dev_remove(struct nvme_dev *dev)
{
struct nvme_ns *ns;
- list_for_each_entry(ns, &dev->namespaces, list) {
- if (ns->disk->flags & GENHD_FL_UP) {
- if (blk_get_integrity(ns->disk))
- blk_integrity_unregister(ns->disk);
- del_gendisk(ns->disk);
- }
- if (!blk_queue_dying(ns->queue)) {
- blk_mq_abort_requeue_list(ns->queue);
- blk_cleanup_queue(ns->queue);
- }
- }
+ list_for_each_entry(ns, &dev->namespaces, list)
+ nvme_ns_remove(ns);
}
static int nvme_setup_prp_pools(struct nvme_dev *dev)
{
- struct device *dmadev = &dev->pci_dev->dev;
- dev->prp_page_pool = dma_pool_create("prp list page", dmadev,
+ dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
PAGE_SIZE, PAGE_SIZE, 0);
if (!dev->prp_page_pool)
return -ENOMEM;
/* Optimisation for I/Os between 4k and 128k */
- dev->prp_small_pool = dma_pool_create("prp list 256", dmadev,
+ dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev,
256, 256, 0);
if (!dev->prp_small_pool) {
dma_pool_destroy(dev->prp_page_pool);
@@ -2707,23 +2722,15 @@ static void nvme_free_namespaces(struct nvme_dev *dev)
{
struct nvme_ns *ns, *next;
- list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
- list_del(&ns->list);
-
- spin_lock(&dev_list_lock);
- ns->disk->private_data = NULL;
- spin_unlock(&dev_list_lock);
-
- put_disk(ns->disk);
- kfree(ns);
- }
+ list_for_each_entry_safe(ns, next, &dev->namespaces, list)
+ nvme_free_namespace(ns);
}
static void nvme_free_dev(struct kref *kref)
{
struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
- pci_dev_put(dev->pci_dev);
+ put_device(dev->dev);
put_device(dev->device);
nvme_free_namespaces(dev);
nvme_release_instance(dev);
@@ -2779,6 +2786,9 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
return -ENOTTY;
ns = list_first_entry(&dev->namespaces, struct nvme_ns, list);
return nvme_user_cmd(dev, ns, (void __user *)arg);
+ case NVME_IOCTL_RESET:
+ dev_warn(dev->dev, "resetting controller\n");
+ return nvme_reset(dev);
default:
return -ENOTTY;
}
@@ -2800,11 +2810,11 @@ static void nvme_set_irq_hints(struct nvme_dev *dev)
for (i = 0; i < dev->online_queues; i++) {
nvmeq = dev->queues[i];
- if (!nvmeq->hctx)
+ if (!nvmeq->tags || !(*nvmeq->tags))
continue;
irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
- nvmeq->hctx->cpumask);
+ blk_mq_tags_cpumask(*nvmeq->tags));
}
}
@@ -2867,7 +2877,7 @@ static int nvme_dev_start(struct nvme_dev *dev)
static int nvme_remove_dead_ctrl(void *arg)
{
struct nvme_dev *dev = (struct nvme_dev *)arg;
- struct pci_dev *pdev = dev->pci_dev;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
if (pci_get_drvdata(pdev))
pci_stop_and_remove_bus_device_locked(pdev);
@@ -2897,6 +2907,7 @@ static int nvme_dev_resume(struct nvme_dev *dev)
spin_unlock(&dev_list_lock);
} else {
nvme_unfreeze_queues(dev);
+ schedule_work(&dev->scan_work);
nvme_set_irq_hints(dev);
}
return 0;
@@ -2906,11 +2917,11 @@ static void nvme_dev_reset(struct nvme_dev *dev)
{
nvme_dev_shutdown(dev);
if (nvme_dev_resume(dev)) {
- dev_warn(&dev->pci_dev->dev, "Device failed to resume\n");
+ dev_warn(dev->dev, "Device failed to resume\n");
kref_get(&dev->kref);
if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
dev->instance))) {
- dev_err(&dev->pci_dev->dev,
+ dev_err(dev->dev,
"Failed to start controller remove task\n");
kref_put(&dev->kref, nvme_free_dev);
}
@@ -2929,6 +2940,44 @@ static void nvme_reset_workfn(struct work_struct *work)
dev->reset_workfn(work);
}
+static int nvme_reset(struct nvme_dev *dev)
+{
+ int ret = -EBUSY;
+
+ if (!dev->admin_q || blk_queue_dying(dev->admin_q))
+ return -ENODEV;
+
+ spin_lock(&dev_list_lock);
+ if (!work_pending(&dev->reset_work)) {
+ dev->reset_workfn = nvme_reset_failed_dev;
+ queue_work(nvme_workq, &dev->reset_work);
+ ret = 0;
+ }
+ spin_unlock(&dev_list_lock);
+
+ if (!ret) {
+ flush_work(&dev->reset_work);
+ return 0;
+ }
+
+ return ret;
+}
+
+static ssize_t nvme_sysfs_reset(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct nvme_dev *ndev = dev_get_drvdata(dev);
+ int ret;
+
+ ret = nvme_reset(ndev);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
static void nvme_async_probe(struct work_struct *work);
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
@@ -2954,7 +3003,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
INIT_LIST_HEAD(&dev->namespaces);
dev->reset_workfn = nvme_reset_failed_dev;
INIT_WORK(&dev->reset_work, nvme_reset_workfn);
- dev->pci_dev = pci_dev_get(pdev);
+ dev->dev = get_device(&pdev->dev);
pci_set_drvdata(pdev, dev);
result = nvme_set_instance(dev);
if (result)
@@ -2973,18 +3022,27 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto release_pools;
}
get_device(dev->device);
+ dev_set_drvdata(dev->device, dev);
+
+ result = device_create_file(dev->device, &dev_attr_reset_controller);
+ if (result)
+ goto put_dev;
INIT_LIST_HEAD(&dev->node);
+ INIT_WORK(&dev->scan_work, nvme_dev_scan);
INIT_WORK(&dev->probe_work, nvme_async_probe);
schedule_work(&dev->probe_work);
return 0;
+ put_dev:
+ device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
+ put_device(dev->device);
release_pools:
nvme_release_prp_pools(dev);
release:
nvme_release_instance(dev);
put_pci:
- pci_dev_put(dev->pci_dev);
+ put_device(dev->dev);
free:
kfree(dev->queues);
kfree(dev->entry);
@@ -3009,10 +3067,12 @@ static void nvme_async_probe(struct work_struct *work)
nvme_set_irq_hints(dev);
return;
reset:
+ spin_lock(&dev_list_lock);
if (!work_busy(&dev->reset_work)) {
dev->reset_workfn = nvme_reset_failed_dev;
queue_work(nvme_workq, &dev->reset_work);
}
+ spin_unlock(&dev_list_lock);
}
static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
@@ -3042,6 +3102,8 @@ static void nvme_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
flush_work(&dev->probe_work);
flush_work(&dev->reset_work);
+ flush_work(&dev->scan_work);
+ device_remove_file(dev->device, &dev_attr_reset_controller);
nvme_dev_shutdown(dev);
nvme_dev_remove(dev);
nvme_dev_remove_admin(dev);
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 6b736b00f63e..e5a63f06fb0f 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -41,15 +41,13 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/types.h>
+#include <asm/unaligned.h>
#include <scsi/sg.h>
#include <scsi/scsi.h>
static int sg_version_num = 30534; /* 2 digits for each component */
-#define SNTI_TRANSLATION_SUCCESS 0
-#define SNTI_INTERNAL_ERROR 1
-
/* VPD Page Codes */
#define VPD_SUPPORTED_PAGES 0x00
#define VPD_SERIAL_NUMBER 0x80
@@ -58,49 +56,14 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#define VPD_BLOCK_LIMITS 0xB0
#define VPD_BLOCK_DEV_CHARACTERISTICS 0xB1
-/* CDB offsets */
-#define REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET 6
-#define REPORT_LUNS_SR_OFFSET 2
-#define READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET 10
-#define REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET 4
-#define REQUEST_SENSE_DESC_OFFSET 1
-#define REQUEST_SENSE_DESC_MASK 0x01
-#define DESCRIPTOR_FORMAT_SENSE_DATA_TYPE 1
-#define INQUIRY_EVPD_BYTE_OFFSET 1
-#define INQUIRY_PAGE_CODE_BYTE_OFFSET 2
-#define INQUIRY_EVPD_BIT_MASK 1
-#define INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET 3
-#define START_STOP_UNIT_CDB_IMMED_OFFSET 1
-#define START_STOP_UNIT_CDB_IMMED_MASK 0x1
-#define START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET 3
-#define START_STOP_UNIT_CDB_POWER_COND_MOD_MASK 0xF
-#define START_STOP_UNIT_CDB_POWER_COND_OFFSET 4
-#define START_STOP_UNIT_CDB_POWER_COND_MASK 0xF0
-#define START_STOP_UNIT_CDB_NO_FLUSH_OFFSET 4
-#define START_STOP_UNIT_CDB_NO_FLUSH_MASK 0x4
-#define START_STOP_UNIT_CDB_START_OFFSET 4
-#define START_STOP_UNIT_CDB_START_MASK 0x1
-#define WRITE_BUFFER_CDB_MODE_OFFSET 1
-#define WRITE_BUFFER_CDB_MODE_MASK 0x1F
-#define WRITE_BUFFER_CDB_BUFFER_ID_OFFSET 2
-#define WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET 3
-#define WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET 6
-#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET 1
-#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK 0xC0
-#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT 6
-#define FORMAT_UNIT_CDB_LONG_LIST_OFFSET 1
-#define FORMAT_UNIT_CDB_LONG_LIST_MASK 0x20
-#define FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET 1
-#define FORMAT_UNIT_CDB_FORMAT_DATA_MASK 0x10
+/* format unit paramter list offsets */
#define FORMAT_UNIT_SHORT_PARM_LIST_LEN 4
#define FORMAT_UNIT_LONG_PARM_LIST_LEN 8
#define FORMAT_UNIT_PROT_INT_OFFSET 3
#define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET 0
#define FORMAT_UNIT_PROT_FIELD_USAGE_MASK 0x07
-#define UNMAP_CDB_PARAM_LIST_LENGTH_OFFSET 7
/* Misc. defines */
-#define NIBBLE_SHIFT 4
#define FIXED_SENSE_DATA 0x70
#define DESC_FORMAT_SENSE_DATA 0x72
#define FIXED_SENSE_DATA_ADD_LENGTH 10
@@ -144,27 +107,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#define EXTENDED_INQUIRY_DATA_PAGE_LENGTH 0x3C
#define RESERVED_FIELD 0
-/* SCSI READ/WRITE Defines */
-#define IO_CDB_WP_MASK 0xE0
-#define IO_CDB_WP_SHIFT 5
-#define IO_CDB_FUA_MASK 0x8
-#define IO_6_CDB_LBA_OFFSET 0
-#define IO_6_CDB_LBA_MASK 0x001FFFFF
-#define IO_6_CDB_TX_LEN_OFFSET 4
-#define IO_6_DEFAULT_TX_LEN 256
-#define IO_10_CDB_LBA_OFFSET 2
-#define IO_10_CDB_TX_LEN_OFFSET 7
-#define IO_10_CDB_WP_OFFSET 1
-#define IO_10_CDB_FUA_OFFSET 1
-#define IO_12_CDB_LBA_OFFSET 2
-#define IO_12_CDB_TX_LEN_OFFSET 6
-#define IO_12_CDB_WP_OFFSET 1
-#define IO_12_CDB_FUA_OFFSET 1
-#define IO_16_CDB_FUA_OFFSET 1
-#define IO_16_CDB_WP_OFFSET 1
-#define IO_16_CDB_LBA_OFFSET 2
-#define IO_16_CDB_TX_LEN_OFFSET 10
-
/* Mode Sense/Select defines */
#define MODE_PAGE_INFO_EXCEP 0x1C
#define MODE_PAGE_CACHING 0x08
@@ -179,23 +121,14 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#define MODE_PAGE_INF_EXC_LEN 0x0C
#define MODE_PAGE_ALL_LEN 0x54
#define MODE_SENSE6_MPH_SIZE 4
-#define MODE_SENSE6_ALLOC_LEN_OFFSET 4
-#define MODE_SENSE_PAGE_CONTROL_OFFSET 2
#define MODE_SENSE_PAGE_CONTROL_MASK 0xC0
#define MODE_SENSE_PAGE_CODE_OFFSET 2
#define MODE_SENSE_PAGE_CODE_MASK 0x3F
-#define MODE_SENSE_LLBAA_OFFSET 1
#define MODE_SENSE_LLBAA_MASK 0x10
#define MODE_SENSE_LLBAA_SHIFT 4
-#define MODE_SENSE_DBD_OFFSET 1
#define MODE_SENSE_DBD_MASK 8
#define MODE_SENSE_DBD_SHIFT 3
#define MODE_SENSE10_MPH_SIZE 8
-#define MODE_SENSE10_ALLOC_LEN_OFFSET 7
-#define MODE_SELECT_CDB_PAGE_FORMAT_OFFSET 1
-#define MODE_SELECT_CDB_SAVE_PAGES_OFFSET 1
-#define MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET 4
-#define MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET 7
#define MODE_SELECT_CDB_PAGE_FORMAT_MASK 0x10
#define MODE_SELECT_CDB_SAVE_PAGES_MASK 0x1
#define MODE_SELECT_6_BD_OFFSET 3
@@ -221,14 +154,11 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH 0x07
#define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE 0x2F
#define LOG_PAGE_TEMPERATURE_PAGE 0x0D
-#define LOG_SENSE_CDB_SP_OFFSET 1
#define LOG_SENSE_CDB_SP_NOT_ENABLED 0
-#define LOG_SENSE_CDB_PC_OFFSET 2
#define LOG_SENSE_CDB_PC_MASK 0xC0
#define LOG_SENSE_CDB_PC_SHIFT 6
#define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES 1
#define LOG_SENSE_CDB_PAGE_CODE_MASK 0x3F
-#define LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET 7
#define REMAINING_INFO_EXCP_PAGE_LENGTH 0x8
#define LOG_INFO_EXCP_PAGE_LENGTH 0xC
#define REMAINING_TEMP_PAGE_LENGTH 0xC
@@ -278,77 +208,11 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#define SCSI_ASCQ_POWER_LOSS_EXPECTED 0x08
#define SCSI_ASCQ_INVALID_LUN_ID 0x09
-/**
- * DEVICE_SPECIFIC_PARAMETER in mode parameter header (see sbc2r16) to
- * enable DPOFUA support type 0x10 value.
- */
-#define DEVICE_SPECIFIC_PARAMETER 0
-#define VPD_ID_DESCRIPTOR_LENGTH sizeof(VPD_IDENTIFICATION_DESCRIPTOR)
-
-/* MACROs to extract information from CDBs */
-
-#define GET_OPCODE(cdb) cdb[0]
-
-#define GET_U8_FROM_CDB(cdb, index) (cdb[index] << 0)
-
-#define GET_U16_FROM_CDB(cdb, index) ((cdb[index] << 8) | (cdb[index + 1] << 0))
-
-#define GET_U24_FROM_CDB(cdb, index) ((cdb[index] << 16) | \
-(cdb[index + 1] << 8) | \
-(cdb[index + 2] << 0))
-
-#define GET_U32_FROM_CDB(cdb, index) ((cdb[index] << 24) | \
-(cdb[index + 1] << 16) | \
-(cdb[index + 2] << 8) | \
-(cdb[index + 3] << 0))
-
-#define GET_U64_FROM_CDB(cdb, index) ((((u64)cdb[index]) << 56) | \
-(((u64)cdb[index + 1]) << 48) | \
-(((u64)cdb[index + 2]) << 40) | \
-(((u64)cdb[index + 3]) << 32) | \
-(((u64)cdb[index + 4]) << 24) | \
-(((u64)cdb[index + 5]) << 16) | \
-(((u64)cdb[index + 6]) << 8) | \
-(((u64)cdb[index + 7]) << 0))
-
-/* Inquiry Helper Macros */
-#define GET_INQ_EVPD_BIT(cdb) \
-((GET_U8_FROM_CDB(cdb, INQUIRY_EVPD_BYTE_OFFSET) & \
-INQUIRY_EVPD_BIT_MASK) ? 1 : 0)
-
-#define GET_INQ_PAGE_CODE(cdb) \
-(GET_U8_FROM_CDB(cdb, INQUIRY_PAGE_CODE_BYTE_OFFSET))
-
-#define GET_INQ_ALLOC_LENGTH(cdb) \
-(GET_U16_FROM_CDB(cdb, INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET))
-
-/* Report LUNs Helper Macros */
-#define GET_REPORT_LUNS_ALLOC_LENGTH(cdb) \
-(GET_U32_FROM_CDB(cdb, REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET))
-
-/* Read Capacity Helper Macros */
-#define GET_READ_CAP_16_ALLOC_LENGTH(cdb) \
-(GET_U32_FROM_CDB(cdb, READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET))
-
-#define IS_READ_CAP_16(cdb) \
-((cdb[0] == SERVICE_ACTION_IN_16 && cdb[1] == SAI_READ_CAPACITY_16) ? 1 : 0)
-
-/* Request Sense Helper Macros */
-#define GET_REQUEST_SENSE_ALLOC_LENGTH(cdb) \
-(GET_U8_FROM_CDB(cdb, REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET))
-
-/* Mode Sense Helper Macros */
-#define GET_MODE_SENSE_DBD(cdb) \
-((GET_U8_FROM_CDB(cdb, MODE_SENSE_DBD_OFFSET) & MODE_SENSE_DBD_MASK) >> \
-MODE_SENSE_DBD_SHIFT)
-
-#define GET_MODE_SENSE_LLBAA(cdb) \
-((GET_U8_FROM_CDB(cdb, MODE_SENSE_LLBAA_OFFSET) & \
-MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT)
-
-#define GET_MODE_SENSE_MPH_SIZE(cdb10) \
-(cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE)
-
+/* copied from drivers/usb/gadget/function/storage_common.h */
+static inline u32 get_unaligned_be24(u8 *buf)
+{
+ return 0xffffff & (u32) get_unaligned_be32(buf - 1);
+}
/* Struct to gather data that needs to be extracted from a SCSI CDB.
Not conforming to any particular CDB variant, but compatible with all. */
@@ -369,8 +233,6 @@ struct nvme_trans_io_cdb {
static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
unsigned long n)
{
- int res = SNTI_TRANSLATION_SUCCESS;
- unsigned long not_copied;
int i;
void *index = from;
size_t remaining = n;
@@ -380,29 +242,25 @@ static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
struct sg_iovec sgl;
for (i = 0; i < hdr->iovec_count; i++) {
- not_copied = copy_from_user(&sgl, hdr->dxferp +
+ if (copy_from_user(&sgl, hdr->dxferp +
i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec));
- if (not_copied)
+ sizeof(struct sg_iovec)))
return -EFAULT;
xfer_len = min(remaining, sgl.iov_len);
- not_copied = copy_to_user(sgl.iov_base, index,
- xfer_len);
- if (not_copied) {
- res = -EFAULT;
- break;
- }
+ if (copy_to_user(sgl.iov_base, index, xfer_len))
+ return -EFAULT;
+
index += xfer_len;
remaining -= xfer_len;
if (remaining == 0)
break;
}
- return res;
+ return 0;
}
- not_copied = copy_to_user(hdr->dxferp, from, n);
- if (not_copied)
- res = -EFAULT;
- return res;
+
+ if (copy_to_user(hdr->dxferp, from, n))
+ return -EFAULT;
+ return 0;
}
/* Copy data from userspace memory */
@@ -410,8 +268,6 @@ static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
unsigned long n)
{
- int res = SNTI_TRANSLATION_SUCCESS;
- unsigned long not_copied;
int i;
void *index = to;
size_t remaining = n;
@@ -421,30 +277,24 @@ static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
struct sg_iovec sgl;
for (i = 0; i < hdr->iovec_count; i++) {
- not_copied = copy_from_user(&sgl, hdr->dxferp +
+ if (copy_from_user(&sgl, hdr->dxferp +
i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec));
- if (not_copied)
+ sizeof(struct sg_iovec)))
return -EFAULT;
xfer_len = min(remaining, sgl.iov_len);
- not_copied = copy_from_user(index, sgl.iov_base,
- xfer_len);
- if (not_copied) {
- res = -EFAULT;
- break;
- }
+ if (copy_from_user(index, sgl.iov_base, xfer_len))
+ return -EFAULT;
index += xfer_len;
remaining -= xfer_len;
if (remaining == 0)
break;
}
- return res;
+ return 0;
}
- not_copied = copy_from_user(to, hdr->dxferp, n);
- if (not_copied)
- res = -EFAULT;
- return res;
+ if (copy_from_user(to, hdr->dxferp, n))
+ return -EFAULT;
+ return 0;
}
/* Status/Sense Buffer Writeback */
@@ -452,7 +302,6 @@ static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
u8 asc, u8 ascq)
{
- int res = SNTI_TRANSLATION_SUCCESS;
u8 xfer_len;
u8 resp[DESC_FMT_SENSE_DATA_SIZE];
@@ -477,25 +326,29 @@ static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE);
hdr->sb_len_wr = xfer_len;
if (copy_to_user(hdr->sbp, resp, xfer_len) > 0)
- res = -EFAULT;
+ return -EFAULT;
}
- return res;
+ return 0;
}
+/*
+ * Take a status code from a lowlevel routine, and if it was a positive NVMe
+ * error code update the sense data based on it. In either case the passed
+ * in value is returned again, unless an -EFAULT from copy_to_user overrides
+ * it.
+ */
static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
{
u8 status, sense_key, asc, ascq;
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
/* For non-nvme (Linux) errors, simply return the error code */
if (nvme_sc < 0)
return nvme_sc;
/* Mask DNR, More, and reserved fields */
- nvme_sc &= 0x7FF;
-
- switch (nvme_sc) {
+ switch (nvme_sc & 0x7FF) {
/* Generic Command Status */
case NVME_SC_SUCCESS:
status = SAM_STAT_GOOD;
@@ -662,8 +515,7 @@ static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
}
res = nvme_trans_completion(hdr, status, sense_key, asc, ascq);
-
- return res;
+ return res ? res : nvme_sc;
}
/* INQUIRY Helper Functions */
@@ -673,10 +525,8 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
int alloc_len)
{
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
struct nvme_id_ns *id_ns;
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
int xfer_len;
u8 resp_data_format = 0x02;
@@ -684,31 +534,17 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
u8 cmdque = 0x01 << 1;
u8 fw_offset = sizeof(dev->firmware_rev);
- mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out_dma;
- }
-
/* nvme ns identify - use DPS value for PROTECT field */
- nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
- /*
- * If nvme_sc was -ve, res will be -ve here.
- * If nvme_sc was +ve, the status would bace been translated, and res
- * can only be 0 or -ve.
- * - If 0 && nvme_sc > 0, then go into next if where res gets nvme_sc
- * - If -ve, return because its a Linux error.
- */
if (res)
- goto out_free;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_free;
- }
- id_ns = mem;
- (id_ns->dps) ? (protect = 0x01) : (protect = 0);
+ return res;
+
+ if (id_ns->dps)
+ protect = 0x01;
+ else
+ protect = 0;
+ kfree(id_ns);
memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
inq_response[2] = VERSION_SPC_4;
@@ -725,20 +561,13 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4);
xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- out_free:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
- dma_addr);
- out_dma:
- return res;
+ return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
}
static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
struct sg_io_hdr *hdr, u8 *inq_response,
int alloc_len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
int xfer_len;
memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
@@ -752,9 +581,7 @@ static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
inq_response[9] = INQ_BDEV_LIMITS_PAGE;
xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- return res;
+ return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
}
static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
@@ -762,7 +589,6 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
int alloc_len)
{
struct nvme_dev *dev = ns->dev;
- int res = SNTI_TRANSLATION_SUCCESS;
int xfer_len;
memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
@@ -771,53 +597,42 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
strncpy(&inq_response[4], dev->serial, INQ_SERIAL_NUMBER_LENGTH);
xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- return res;
+ return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
}
static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *inq_response, int alloc_len)
{
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
int xfer_len;
__be32 tmp_id = cpu_to_be32(ns->ns_id);
- mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out_dma;
- }
-
memset(inq_response, 0, alloc_len);
inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE; /* Page Code */
if (readl(&dev->bar->vs) >= NVME_VS(1, 1)) {
- struct nvme_id_ns *id_ns = mem;
- void *eui = id_ns->eui64;
- int len = sizeof(id_ns->eui64);
+ struct nvme_id_ns *id_ns;
+ void *eui;
+ int len;
- nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_free;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_free;
- }
+ return res;
+ eui = id_ns->eui64;
+ len = sizeof(id_ns->eui64);
if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) {
if (bitmap_empty(eui, len * 8)) {
eui = id_ns->nguid;
len = sizeof(id_ns->nguid);
}
}
- if (bitmap_empty(eui, len * 8))
+ if (bitmap_empty(eui, len * 8)) {
+ kfree(id_ns);
goto scsi_string;
+ }
inq_response[3] = 4 + len; /* Page Length */
/* Designation Descriptor start */
@@ -826,14 +641,14 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
inq_response[6] = 0x00; /* Rsvd */
inq_response[7] = len; /* Designator Length */
memcpy(&inq_response[8], eui, len);
+ kfree(id_ns);
} else {
scsi_string:
if (alloc_len < 72) {
- res = nvme_trans_completion(hdr,
+ return nvme_trans_completion(hdr,
SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_free;
}
inq_response[3] = 0x48; /* Page Length */
/* Designation Descriptor start */
@@ -842,30 +657,22 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
inq_response[6] = 0x00; /* Rsvd */
inq_response[7] = 0x44; /* Designator Length */
- sprintf(&inq_response[8], "%04x", dev->pci_dev->vendor);
+ sprintf(&inq_response[8], "%04x", to_pci_dev(dev->dev)->vendor);
memcpy(&inq_response[12], dev->model, sizeof(dev->model));
sprintf(&inq_response[52], "%04x", tmp_id);
memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
}
xfer_len = alloc_len;
- res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- out_free:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
- dma_addr);
- out_dma:
- return res;
+ return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
}
static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
int alloc_len)
{
u8 *inq_response;
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
struct nvme_id_ctrl *id_ctrl;
struct nvme_id_ns *id_ns;
int xfer_len;
@@ -878,45 +685,32 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 luiclr = 0x01;
inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
- if (inq_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
-
- mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out_dma;
- }
+ if (inq_response == NULL)
+ return -ENOMEM;
- /* nvme ns identify */
- nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_free;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_free;
- }
- id_ns = mem;
- spt = spt_lut[(id_ns->dpc) & 0x07] << 3;
- (id_ns->dps) ? (protect = 0x01) : (protect = 0);
+ goto out_free_inq;
+
+ spt = spt_lut[id_ns->dpc & 0x07] << 3;
+ if (id_ns->dps)
+ protect = 0x01;
+ else
+ protect = 0;
+ kfree(id_ns);
+
grd_chk = protect << 2;
app_chk = protect << 1;
ref_chk = protect;
- /* nvme controller identify */
- nvme_sc = nvme_identify(dev, 0, 1, dma_addr);
+ nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_free;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_free;
- }
- id_ctrl = mem;
+ goto out_free_inq;
+
v_sup = id_ctrl->vwc;
+ kfree(id_ctrl);
memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE; /* Page Code */
@@ -932,19 +726,16 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
- out_free:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
- dma_addr);
- out_dma:
+ out_free_inq:
kfree(inq_response);
- out_mem:
return res;
}
static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *inq_response, int alloc_len)
{
- __be32 max_sectors = cpu_to_be32(queue_max_hw_sectors(ns->queue));
+ __be32 max_sectors = cpu_to_be32(
+ nvme_block_nr(ns, queue_max_hw_sectors(ns->queue)));
__be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
__be32 discard_desc_count = cpu_to_be32(0x100);
@@ -964,7 +755,7 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
int alloc_len)
{
u8 *inq_response;
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int xfer_len;
inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
@@ -993,7 +784,7 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
int alloc_len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int xfer_len;
u8 *log_response;
@@ -1021,47 +812,30 @@ static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
struct sg_io_hdr *hdr, int alloc_len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int xfer_len;
u8 *log_response;
- struct nvme_command c;
struct nvme_dev *dev = ns->dev;
struct nvme_smart_log *smart_log;
- dma_addr_t dma_addr;
- void *mem;
u8 temp_c;
u16 temp_k;
log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
- if (log_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
+ if (log_response == NULL)
+ return -ENOMEM;
- mem = dma_alloc_coherent(&dev->pci_dev->dev,
- sizeof(struct nvme_smart_log),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out_dma;
- }
+ res = nvme_get_log_page(dev, &smart_log);
+ if (res < 0)
+ goto out_free_response;
- /* Get SMART Log Page */
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_admin_get_log_page;
- c.common.nsid = cpu_to_le32(0xFFFFFFFF);
- c.common.prp1 = cpu_to_le64(dma_addr);
- c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
- BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
- res = nvme_submit_admin_cmd(dev, &c, NULL);
if (res != NVME_SC_SUCCESS) {
temp_c = LOG_TEMP_UNKNOWN;
} else {
- smart_log = mem;
temp_k = (smart_log->temperature[1] << 8) +
(smart_log->temperature[0]);
temp_c = temp_k - KELVIN_TEMP_FACTOR;
}
+ kfree(smart_log);
log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
/* Subpage=0x00, Page Length MSB=0 */
@@ -1077,59 +851,39 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH);
res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log),
- mem, dma_addr);
- out_dma:
+ out_free_response:
kfree(log_response);
- out_mem:
return res;
}
static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
int alloc_len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int xfer_len;
u8 *log_response;
- struct nvme_command c;
struct nvme_dev *dev = ns->dev;
struct nvme_smart_log *smart_log;
- dma_addr_t dma_addr;
- void *mem;
u32 feature_resp;
u8 temp_c_cur, temp_c_thresh;
u16 temp_k;
log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
- if (log_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
+ if (log_response == NULL)
+ return -ENOMEM;
- mem = dma_alloc_coherent(&dev->pci_dev->dev,
- sizeof(struct nvme_smart_log),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out_dma;
- }
+ res = nvme_get_log_page(dev, &smart_log);
+ if (res < 0)
+ goto out_free_response;
- /* Get SMART Log Page */
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_admin_get_log_page;
- c.common.nsid = cpu_to_le32(0xFFFFFFFF);
- c.common.prp1 = cpu_to_le64(dma_addr);
- c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
- BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
- res = nvme_submit_admin_cmd(dev, &c, NULL);
if (res != NVME_SC_SUCCESS) {
temp_c_cur = LOG_TEMP_UNKNOWN;
} else {
- smart_log = mem;
temp_k = (smart_log->temperature[1] << 8) +
(smart_log->temperature[0]);
temp_c_cur = temp_k - KELVIN_TEMP_FACTOR;
}
+ kfree(smart_log);
/* Get Features for Temp Threshold */
res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0,
@@ -1158,11 +912,8 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH);
res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log),
- mem, dma_addr);
- out_dma:
+ out_free_response:
kfree(log_response);
- out_mem:
return res;
}
@@ -1173,59 +924,45 @@ static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa,
{
/* Quick check to make sure I don't stomp on my own memory... */
if ((cdb10 && len < 8) || (!cdb10 && len < 4))
- return SNTI_INTERNAL_ERROR;
+ return -EINVAL;
if (cdb10) {
resp[0] = (mode_data_length & 0xFF00) >> 8;
resp[1] = (mode_data_length & 0x00FF);
- /* resp[2] and [3] are zero */
+ resp[3] = 0x10 /* DPOFUA */;
resp[4] = llbaa;
resp[5] = RESERVED_FIELD;
resp[6] = (blk_desc_len & 0xFF00) >> 8;
resp[7] = (blk_desc_len & 0x00FF);
} else {
resp[0] = (mode_data_length & 0x00FF);
- /* resp[1] and [2] are zero */
+ resp[2] = 0x10 /* DPOFUA */;
resp[3] = (blk_desc_len & 0x00FF);
}
- return SNTI_TRANSLATION_SUCCESS;
+ return 0;
}
static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *resp, int len, u8 llbaa)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
struct nvme_id_ns *id_ns;
u8 flbas;
u32 lba_length;
if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN)
- return SNTI_INTERNAL_ERROR;
+ return -EINVAL;
else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
- return SNTI_INTERNAL_ERROR;
-
- mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out;
- }
+ return -EINVAL;
- /* nvme ns identify */
- nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_dma;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_dma;
- }
- id_ns = mem;
+ return res;
+
flbas = (id_ns->flbas) & 0x0F;
lba_length = (1 << (id_ns->lbaf[flbas].ds));
@@ -1245,10 +982,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
memcpy(&resp[12], &tmp_len, sizeof(u32));
}
- out_dma:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
- dma_addr);
- out:
+ kfree(id_ns);
return res;
}
@@ -1257,7 +991,7 @@ static int nvme_trans_fill_control_page(struct nvme_ns *ns,
int len)
{
if (len < MODE_PAGE_CONTROL_LEN)
- return SNTI_INTERNAL_ERROR;
+ return -EINVAL;
resp[0] = MODE_PAGE_CONTROL;
resp[1] = MODE_PAGE_CONTROL_LEN_FIELD;
@@ -1271,78 +1005,69 @@ static int nvme_trans_fill_control_page(struct nvme_ns *ns,
resp[9] = 0xFF;
/* Bytes 10,11: Extended selftest completion time = 0x0000 */
- return SNTI_TRANSLATION_SUCCESS;
+ return 0;
}
static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
struct sg_io_hdr *hdr,
u8 *resp, int len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res = 0;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
u32 feature_resp;
u8 vwc;
if (len < MODE_PAGE_CACHING_LEN)
- return SNTI_INTERNAL_ERROR;
+ return -EINVAL;
nvme_sc = nvme_get_features(dev, NVME_FEAT_VOLATILE_WC, 0, 0,
&feature_resp);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out;
- if (nvme_sc) {
- res = nvme_sc;
- goto out;
- }
+ return res;
+
vwc = feature_resp & 0x00000001;
resp[0] = MODE_PAGE_CACHING;
resp[1] = MODE_PAGE_CACHING_LEN_FIELD;
resp[2] = vwc << 2;
-
- out:
- return res;
+ return 0;
}
static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns,
struct sg_io_hdr *hdr, u8 *resp,
int len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
-
if (len < MODE_PAGE_POW_CND_LEN)
- return SNTI_INTERNAL_ERROR;
+ return -EINVAL;
resp[0] = MODE_PAGE_POWER_CONDITION;
resp[1] = MODE_PAGE_POW_CND_LEN_FIELD;
/* All other bytes are zero */
- return res;
+ return 0;
}
static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns,
struct sg_io_hdr *hdr, u8 *resp,
int len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
-
if (len < MODE_PAGE_INF_EXC_LEN)
- return SNTI_INTERNAL_ERROR;
+ return -EINVAL;
resp[0] = MODE_PAGE_INFO_EXCEP;
resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD;
resp[2] = 0x88;
/* All other bytes are zero */
- return res;
+ return 0;
}
static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *resp, int len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
u16 mode_pages_offset_1 = 0;
u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4;
@@ -1352,23 +1077,18 @@ static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1],
MODE_PAGE_CACHING_LEN);
- if (res != SNTI_TRANSLATION_SUCCESS)
- goto out;
+ if (res)
+ return res;
res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2],
MODE_PAGE_CONTROL_LEN);
- if (res != SNTI_TRANSLATION_SUCCESS)
- goto out;
+ if (res)
+ return res;
res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3],
MODE_PAGE_POW_CND_LEN);
- if (res != SNTI_TRANSLATION_SUCCESS)
- goto out;
- res = nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
+ if (res)
+ return res;
+ return nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
MODE_PAGE_INF_EXC_LEN);
- if (res != SNTI_TRANSLATION_SUCCESS)
- goto out;
-
- out:
- return res;
}
static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa)
@@ -1389,7 +1109,7 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
struct sg_io_hdr *hdr, u8 *, int),
u16 mode_pages_tot_len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int xfer_len;
u8 *response;
u8 dbd, llbaa;
@@ -1398,9 +1118,10 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
u16 mode_pages_offset_1;
u16 blk_desc_len, blk_desc_offset, mode_data_length;
- dbd = GET_MODE_SENSE_DBD(cmd);
- llbaa = GET_MODE_SENSE_LLBAA(cmd);
- mph_size = GET_MODE_SENSE_MPH_SIZE(cdb10);
+ dbd = (cmd[1] & MODE_SENSE_DBD_MASK) >> MODE_SENSE_DBD_SHIFT;
+ llbaa = (cmd[1] & MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT;
+ mph_size = cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE;
+
blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa);
resp_size = mph_size + blk_desc_len + mode_pages_tot_len;
@@ -1418,18 +1139,18 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
llbaa, mode_data_length, blk_desc_len);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out_free;
if (blk_desc_len > 0) {
res = nvme_trans_fill_blk_desc(ns, hdr,
&response[blk_desc_offset],
blk_desc_len, llbaa);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out_free;
}
res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1],
mode_pages_tot_len);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out_free;
xfer_len = min(alloc_len, resp_size);
@@ -1484,33 +1205,20 @@ static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns,
static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 pc, u8 pcmod, u8 start)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
struct nvme_id_ctrl *id_ctrl;
int lowest_pow_st; /* max npss = lowest power consumption */
unsigned ps_desired = 0;
- /* NVMe Controller Identify */
- mem = dma_alloc_coherent(&dev->pci_dev->dev,
- sizeof(struct nvme_id_ctrl),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out;
- }
- nvme_sc = nvme_identify(dev, 0, 1, dma_addr);
+ nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_dma;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_dma;
- }
- id_ctrl = mem;
+ return res;
+
lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1));
+ kfree(id_ctrl);
switch (pc) {
case NVME_POWER_STATE_START_VALID:
@@ -1550,79 +1258,48 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
}
nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0,
NULL);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out_dma;
- if (nvme_sc)
- res = nvme_sc;
- out_dma:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem,
- dma_addr);
- out:
- return res;
+ return nvme_trans_status_code(hdr, nvme_sc);
}
-/* Write Buffer Helper Functions */
-/* Also using this for Format Unit with hdr passed as NULL, and buffer_id, 0 */
+static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+ u8 buffer_id)
+{
+ struct nvme_command c;
+ int nvme_sc;
-static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = nvme_admin_activate_fw;
+ c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV);
+
+ nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
+ return nvme_trans_status_code(hdr, nvme_sc);
+}
+
+static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 opcode, u32 tot_len, u32 offset,
u8 buffer_id)
{
- int res = SNTI_TRANSLATION_SUCCESS;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
struct nvme_command c;
- struct nvme_iod *iod = NULL;
- unsigned length;
- memset(&c, 0, sizeof(c));
- c.common.opcode = opcode;
- if (opcode == nvme_admin_download_fw) {
- if (hdr->iovec_count > 0) {
- /* Assuming SGL is not allowed for this command */
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
- iod = nvme_map_user_pages(dev, DMA_TO_DEVICE,
- (unsigned long)hdr->dxferp, tot_len);
- if (IS_ERR(iod)) {
- res = PTR_ERR(iod);
- goto out;
- }
- length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL);
- if (length != tot_len) {
- res = -ENOMEM;
- goto out_unmap;
- }
-
- c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- c.dlfw.prp2 = cpu_to_le64(iod->first_dma);
- c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
- c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
- } else if (opcode == nvme_admin_activate_fw) {
- u32 cdw10 = buffer_id | NVME_FWACT_REPL_ACTV;
- c.common.cdw10[0] = cpu_to_le32(cdw10);
+ if (hdr->iovec_count > 0) {
+ /* Assuming SGL is not allowed for this command */
+ return nvme_trans_completion(hdr,
+ SAM_STAT_CHECK_CONDITION,
+ ILLEGAL_REQUEST,
+ SCSI_ASC_INVALID_CDB,
+ SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
}
- nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out_unmap;
- if (nvme_sc)
- res = nvme_sc;
-
- out_unmap:
- if (opcode == nvme_admin_download_fw) {
- nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod);
- nvme_free_iod(dev, iod);
- }
- out:
- return res;
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = nvme_admin_download_fw;
+ c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
+ c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
+
+ nvme_sc = __nvme_submit_sync_cmd(dev->admin_q, &c, NULL,
+ hdr->dxferp, tot_len, NULL, 0);
+ return nvme_trans_status_code(hdr, nvme_sc);
}
/* Mode Select Helper Functions */
@@ -1685,7 +1362,7 @@ static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list,
static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *mode_page, u8 page_code)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res = 0;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
unsigned dword11;
@@ -1696,12 +1373,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
nvme_sc = nvme_set_features(dev, NVME_FEAT_VOLATILE_WC, dword11,
0, NULL);
res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- break;
- if (nvme_sc) {
- res = nvme_sc;
- break;
- }
break;
case MODE_PAGE_CONTROL:
break;
@@ -1713,8 +1384,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
ILLEGAL_REQUEST,
SCSI_ASC_INVALID_PARAMETER,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- if (!res)
- res = SNTI_INTERNAL_ERROR;
break;
}
break;
@@ -1722,8 +1391,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- if (!res)
- res = SNTI_INTERNAL_ERROR;
break;
}
@@ -1734,7 +1401,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd, u16 parm_list_len, u8 pf,
u8 sp, u8 cdb10)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
u8 *parm_list;
u16 bd_len;
u8 llbaa = 0;
@@ -1750,7 +1417,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
}
res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out_mem;
nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa);
@@ -1788,7 +1455,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
mp_size = parm_list[index + 1] + 2;
res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index],
page_code);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
break;
index += mp_size;
} while (index < parm_list_len);
@@ -1804,12 +1471,9 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
struct sg_io_hdr *hdr)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res = 0;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
- struct nvme_id_ns *id_ns;
u8 flbas;
/*
@@ -1820,22 +1484,12 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
*/
if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
- mem = dma_alloc_coherent(&dev->pci_dev->dev,
- sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out;
- }
- /* nvme ns identify */
- nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ struct nvme_id_ns *id_ns;
+
+ nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_dma;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_dma;
- }
- id_ns = mem;
+ return res;
if (ns->mode_select_num_blocks == 0)
ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap);
@@ -1844,18 +1498,17 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
ns->mode_select_block_len =
(1 << (id_ns->lbaf[flbas].ds));
}
- out_dma:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
- mem, dma_addr);
+
+ kfree(id_ns);
}
- out:
- return res;
+
+ return 0;
}
static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
u8 format_prot_info, u8 *nvme_pf_code)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
u8 *parm_list;
u8 pf_usage, pf_code;
@@ -1865,7 +1518,7 @@ static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
goto out;
}
res = nvme_trans_copy_from_user(hdr, parm_list, len);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out_mem;
if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] &
@@ -1915,11 +1568,9 @@ static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 prot_info)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
struct nvme_id_ns *id_ns;
u8 i;
u8 flbas, nlbaf;
@@ -1928,22 +1579,11 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
struct nvme_command c;
/* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
- mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out;
- }
- /* nvme ns identify */
- nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_dma;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_dma;
- }
- id_ns = mem;
+ return res;
+
flbas = (id_ns->flbas) & 0x0F;
nlbaf = id_ns->nlbaf;
@@ -1971,69 +1611,13 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
c.format.nsid = cpu_to_le32(ns->ns_id);
c.format.cdw10 = cpu_to_le32(cdw10);
- nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL);
+ nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out_dma;
- if (nvme_sc)
- res = nvme_sc;
- out_dma:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
- dma_addr);
- out:
+ kfree(id_ns);
return res;
}
-/* Read/Write Helper Functions */
-
-static inline void nvme_trans_get_io_cdb6(u8 *cmd,
- struct nvme_trans_io_cdb *cdb_info)
-{
- cdb_info->fua = 0;
- cdb_info->prot_info = 0;
- cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_6_CDB_LBA_OFFSET) &
- IO_6_CDB_LBA_MASK;
- cdb_info->xfer_len = GET_U8_FROM_CDB(cmd, IO_6_CDB_TX_LEN_OFFSET);
-
- /* sbc3r27 sec 5.32 - TRANSFER LEN of 0 implies a 256 Block transfer */
- if (cdb_info->xfer_len == 0)
- cdb_info->xfer_len = IO_6_DEFAULT_TX_LEN;
-}
-
-static inline void nvme_trans_get_io_cdb10(u8 *cmd,
- struct nvme_trans_io_cdb *cdb_info)
-{
- cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_10_CDB_FUA_OFFSET) &
- IO_CDB_FUA_MASK;
- cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_10_CDB_WP_OFFSET) &
- IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
- cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_10_CDB_LBA_OFFSET);
- cdb_info->xfer_len = GET_U16_FROM_CDB(cmd, IO_10_CDB_TX_LEN_OFFSET);
-}
-
-static inline void nvme_trans_get_io_cdb12(u8 *cmd,
- struct nvme_trans_io_cdb *cdb_info)
-{
- cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_12_CDB_FUA_OFFSET) &
- IO_CDB_FUA_MASK;
- cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_12_CDB_WP_OFFSET) &
- IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
- cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_12_CDB_LBA_OFFSET);
- cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_12_CDB_TX_LEN_OFFSET);
-}
-
-static inline void nvme_trans_get_io_cdb16(u8 *cmd,
- struct nvme_trans_io_cdb *cdb_info)
-{
- cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_16_CDB_FUA_OFFSET) &
- IO_CDB_FUA_MASK;
- cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_16_CDB_WP_OFFSET) &
- IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
- cdb_info->lba = GET_U64_FROM_CDB(cmd, IO_16_CDB_LBA_OFFSET);
- cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_16_CDB_TX_LEN_OFFSET);
-}
-
static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr,
struct nvme_trans_io_cdb *cdb_info,
u32 max_blocks)
@@ -2063,11 +1647,8 @@ static u16 nvme_trans_io_get_control(struct nvme_ns *ns,
static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
struct nvme_trans_io_cdb *cdb_info, u8 is_write)
{
- int res = SNTI_TRANSLATION_SUCCESS;
- int nvme_sc;
- struct nvme_dev *dev = ns->dev;
+ int nvme_sc = NVME_SC_SUCCESS;
u32 num_cmds;
- struct nvme_iod *iod;
u64 unit_len;
u64 unit_num_blocks; /* Number of blocks to xfer in each nvme cmd */
u32 retcode;
@@ -2118,45 +1699,20 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
control = nvme_trans_io_get_control(ns, cdb_info);
c.rw.control = cpu_to_le16(control);
- iod = nvme_map_user_pages(dev,
- (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
- (unsigned long)next_mapping_addr, unit_len);
- if (IS_ERR(iod)) {
- res = PTR_ERR(iod);
- goto out;
- }
- retcode = nvme_setup_prps(dev, iod, unit_len, GFP_KERNEL);
- if (retcode != unit_len) {
- nvme_unmap_user_pages(dev,
- (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
- iod);
- nvme_free_iod(dev, iod);
- res = -ENOMEM;
- goto out;
+ if (get_capacity(ns->disk) - unit_num_blocks <
+ cdb_info->lba + nvme_offset) {
+ nvme_sc = NVME_SC_LBA_RANGE;
+ break;
}
- c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- c.rw.prp2 = cpu_to_le64(iod->first_dma);
+ nvme_sc = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
+ next_mapping_addr, unit_len, NULL, 0);
+ if (nvme_sc)
+ break;
nvme_offset += unit_num_blocks;
-
- nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL);
- if (nvme_sc != NVME_SC_SUCCESS) {
- nvme_unmap_user_pages(dev,
- (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
- iod);
- nvme_free_iod(dev, iod);
- res = nvme_trans_status_code(hdr, nvme_sc);
- goto out;
- }
- nvme_unmap_user_pages(dev,
- (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
- iod);
- nvme_free_iod(dev, iod);
}
- res = nvme_trans_status_code(hdr, NVME_SC_SUCCESS);
- out:
- return res;
+ return nvme_trans_status_code(hdr, nvme_sc);
}
@@ -2165,8 +1721,8 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
- struct nvme_trans_io_cdb cdb_info;
+ int res = 0;
+ struct nvme_trans_io_cdb cdb_info = { 0, };
u8 opcode = cmd[0];
u64 xfer_bytes;
u64 sum_iov_len = 0;
@@ -2174,27 +1730,52 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
int i;
size_t not_copied;
- /* Extract Fields from CDB */
+ /*
+ * The FUA and WPROTECT fields are not supported in 6-byte CDBs,
+ * but always in the same place for all others.
+ */
+ switch (opcode) {
+ case WRITE_6:
+ case READ_6:
+ break;
+ default:
+ cdb_info.fua = cmd[1] & 0x8;
+ cdb_info.prot_info = (cmd[1] & 0xe0) >> 5;
+ if (cdb_info.prot_info && !ns->pi_type) {
+ return nvme_trans_completion(hdr,
+ SAM_STAT_CHECK_CONDITION,
+ ILLEGAL_REQUEST,
+ SCSI_ASC_INVALID_CDB,
+ SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+ }
+ }
+
switch (opcode) {
case WRITE_6:
case READ_6:
- nvme_trans_get_io_cdb6(cmd, &cdb_info);
+ cdb_info.lba = get_unaligned_be24(&cmd[1]);
+ cdb_info.xfer_len = cmd[4];
+ if (cdb_info.xfer_len == 0)
+ cdb_info.xfer_len = 256;
break;
case WRITE_10:
case READ_10:
- nvme_trans_get_io_cdb10(cmd, &cdb_info);
+ cdb_info.lba = get_unaligned_be32(&cmd[2]);
+ cdb_info.xfer_len = get_unaligned_be16(&cmd[7]);
break;
case WRITE_12:
case READ_12:
- nvme_trans_get_io_cdb12(cmd, &cdb_info);
+ cdb_info.lba = get_unaligned_be32(&cmd[2]);
+ cdb_info.xfer_len = get_unaligned_be32(&cmd[6]);
break;
case WRITE_16:
case READ_16:
- nvme_trans_get_io_cdb16(cmd, &cdb_info);
+ cdb_info.lba = get_unaligned_be64(&cmd[2]);
+ cdb_info.xfer_len = get_unaligned_be32(&cmd[10]);
break;
default:
/* Will never really reach here */
- res = SNTI_INTERNAL_ERROR;
+ res = -EIO;
goto out;
}
@@ -2236,7 +1817,7 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
/* Send NVMe IO Command(s) */
res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out;
out:
@@ -2246,17 +1827,18 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res = 0;
u8 evpd;
u8 page_code;
int alloc_len;
u8 *inq_response;
- evpd = GET_INQ_EVPD_BIT(cmd);
- page_code = GET_INQ_PAGE_CODE(cmd);
- alloc_len = GET_INQ_ALLOC_LENGTH(cmd);
+ evpd = cmd[1] & 0x01;
+ page_code = cmd[2];
+ alloc_len = get_unaligned_be16(&cmd[3]);
- inq_response = kmalloc(alloc_len, GFP_KERNEL);
+ inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH),
+ GFP_KERNEL);
if (inq_response == NULL) {
res = -ENOMEM;
goto out_mem;
@@ -2314,29 +1896,27 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
u16 alloc_len;
- u8 sp;
u8 pc;
u8 page_code;
- sp = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_SP_OFFSET);
- if (sp != LOG_SENSE_CDB_SP_NOT_ENABLED) {
+ if (cmd[1] != LOG_SENSE_CDB_SP_NOT_ENABLED) {
res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
goto out;
}
- pc = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_PC_OFFSET);
- page_code = pc & LOG_SENSE_CDB_PAGE_CODE_MASK;
- pc = (pc & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
+
+ page_code = cmd[2] & LOG_SENSE_CDB_PAGE_CODE_MASK;
+ pc = (cmd[2] & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) {
res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
goto out;
}
- alloc_len = GET_U16_FROM_CDB(cmd, LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET);
+ alloc_len = get_unaligned_be16(&cmd[7]);
switch (page_code) {
case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE:
res = nvme_trans_log_supp_pages(ns, hdr, alloc_len);
@@ -2361,24 +1941,18 @@ static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
u8 cdb10 = 0;
u16 parm_list_len;
u8 page_format;
u8 save_pages;
- page_format = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_PAGE_FORMAT_OFFSET);
- page_format &= MODE_SELECT_CDB_PAGE_FORMAT_MASK;
+ page_format = cmd[1] & MODE_SELECT_CDB_PAGE_FORMAT_MASK;
+ save_pages = cmd[1] & MODE_SELECT_CDB_SAVE_PAGES_MASK;
- save_pages = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_SAVE_PAGES_OFFSET);
- save_pages &= MODE_SELECT_CDB_SAVE_PAGES_MASK;
-
- if (GET_OPCODE(cmd) == MODE_SELECT) {
- parm_list_len = GET_U8_FROM_CDB(cmd,
- MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET);
+ if (cmd[0] == MODE_SELECT) {
+ parm_list_len = cmd[4];
} else {
- parm_list_len = GET_U16_FROM_CDB(cmd,
- MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET);
+ parm_list_len = cmd[7];
cdb10 = 1;
}
@@ -2387,42 +1961,36 @@ static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
* According to SPC-4 r24, a paramter list length field of 0
* shall not be considered an error
*/
- res = nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
+ return nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
page_format, save_pages, cdb10);
}
- return res;
+ return 0;
}
static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res = 0;
u16 alloc_len;
u8 cdb10 = 0;
- u8 page_code;
- u8 pc;
- if (GET_OPCODE(cmd) == MODE_SENSE) {
- alloc_len = GET_U8_FROM_CDB(cmd, MODE_SENSE6_ALLOC_LEN_OFFSET);
+ if (cmd[0] == MODE_SENSE) {
+ alloc_len = cmd[4];
} else {
- alloc_len = GET_U16_FROM_CDB(cmd,
- MODE_SENSE10_ALLOC_LEN_OFFSET);
+ alloc_len = get_unaligned_be16(&cmd[7]);
cdb10 = 1;
}
- pc = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CONTROL_OFFSET) &
- MODE_SENSE_PAGE_CONTROL_MASK;
- if (pc != MODE_SENSE_PC_CURRENT_VALUES) {
+ if ((cmd[2] & MODE_SENSE_PAGE_CONTROL_MASK) !=
+ MODE_SENSE_PC_CURRENT_VALUES) {
res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
goto out;
}
- page_code = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CODE_OFFSET) &
- MODE_SENSE_PAGE_CODE_MASK;
- switch (page_code) {
+ switch (cmd[2] & MODE_SENSE_PAGE_CODE_MASK) {
case MODE_PAGE_CACHING:
res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
cdb10,
@@ -2465,47 +2033,34 @@ static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
}
static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
+ u8 *cmd, u8 cdb16)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
- u32 alloc_len = READ_CAP_10_RESP_SIZE;
- u32 resp_size = READ_CAP_10_RESP_SIZE;
+ u32 alloc_len;
+ u32 resp_size;
u32 xfer_len;
- u8 cdb16;
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
struct nvme_id_ns *id_ns;
u8 *response;
- cdb16 = IS_READ_CAP_16(cmd);
if (cdb16) {
- alloc_len = GET_READ_CAP_16_ALLOC_LENGTH(cmd);
+ alloc_len = get_unaligned_be32(&cmd[10]);
resp_size = READ_CAP_16_RESP_SIZE;
+ } else {
+ alloc_len = READ_CAP_10_RESP_SIZE;
+ resp_size = READ_CAP_10_RESP_SIZE;
}
- mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out;
- }
- /* nvme ns identify */
- nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_dma;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_dma;
- }
- id_ns = mem;
+ return res;
response = kzalloc(resp_size, GFP_KERNEL);
if (response == NULL) {
res = -ENOMEM;
- goto out_dma;
+ goto out_free_id;
}
nvme_trans_fill_read_cap(response, id_ns, cdb16);
@@ -2513,72 +2068,53 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
res = nvme_trans_copy_to_user(hdr, response, xfer_len);
kfree(response);
- out_dma:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
- dma_addr);
- out:
+ out_free_id:
+ kfree(id_ns);
return res;
}
static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
u32 alloc_len, xfer_len, resp_size;
- u8 select_report;
u8 *response;
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
struct nvme_id_ctrl *id_ctrl;
u32 ll_length, lun_id;
u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
__be32 tmp_len;
- alloc_len = GET_REPORT_LUNS_ALLOC_LENGTH(cmd);
- select_report = GET_U8_FROM_CDB(cmd, REPORT_LUNS_SR_OFFSET);
-
- if ((select_report != ALL_LUNS_RETURNED) &&
- (select_report != ALL_WELL_KNOWN_LUNS_RETURNED) &&
- (select_report != RESTRICTED_LUNS_RETURNED)) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
+ switch (cmd[2]) {
+ default:
+ return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- } else {
- /* NVMe Controller Identify */
- mem = dma_alloc_coherent(&dev->pci_dev->dev,
- sizeof(struct nvme_id_ctrl),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out;
- }
- nvme_sc = nvme_identify(dev, 0, 1, dma_addr);
+ case ALL_LUNS_RETURNED:
+ case ALL_WELL_KNOWN_LUNS_RETURNED:
+ case RESTRICTED_LUNS_RETURNED:
+ nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_dma;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_dma;
- }
- id_ctrl = mem;
+ return res;
+
ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE;
resp_size = ll_length + LUN_DATA_HEADER_SIZE;
+ alloc_len = get_unaligned_be32(&cmd[6]);
if (alloc_len < resp_size) {
res = nvme_trans_completion(hdr,
SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_dma;
+ goto out_free_id;
}
response = kzalloc(resp_size, GFP_KERNEL);
if (response == NULL) {
res = -ENOMEM;
- goto out_dma;
+ goto out_free_id;
}
/* The first LUN ID will always be 0 per the SAM spec */
@@ -2599,24 +2135,21 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
res = nvme_trans_copy_to_user(hdr, response, xfer_len);
kfree(response);
- out_dma:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem,
- dma_addr);
- out:
+ out_free_id:
+ kfree(id_ctrl);
return res;
}
static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
u8 alloc_len, xfer_len, resp_size;
u8 desc_format;
u8 *response;
- alloc_len = GET_REQUEST_SENSE_ALLOC_LENGTH(cmd);
- desc_format = GET_U8_FROM_CDB(cmd, REQUEST_SENSE_DESC_OFFSET);
- desc_format &= REQUEST_SENSE_DESC_MASK;
+ desc_format = cmd[1] & 0x01;
+ alloc_len = cmd[4];
resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
(FIXED_FMT_SENSE_DATA_SIZE));
@@ -2626,7 +2159,7 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
goto out;
}
- if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) {
+ if (desc_format) {
/* Descriptor Format Sense Data */
response[0] = DESC_FORMAT_SENSE_DATA;
response[1] = NO_SENSE;
@@ -2665,95 +2198,58 @@ static int nvme_trans_security_protocol(struct nvme_ns *ns,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
}
-static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
+static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
+ struct sg_io_hdr *hdr)
{
- int res = SNTI_TRANSLATION_SUCCESS;
int nvme_sc;
struct nvme_command c;
- u8 immed, pcmod, pc, no_flush, start;
- immed = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_IMMED_OFFSET);
- pcmod = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET);
- pc = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_OFFSET);
- no_flush = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_NO_FLUSH_OFFSET);
- start = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_START_OFFSET);
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = nvme_cmd_flush;
+ c.common.nsid = cpu_to_le32(ns->ns_id);
- immed &= START_STOP_UNIT_CDB_IMMED_MASK;
- pcmod &= START_STOP_UNIT_CDB_POWER_COND_MOD_MASK;
- pc = (pc & START_STOP_UNIT_CDB_POWER_COND_MASK) >> NIBBLE_SHIFT;
- no_flush &= START_STOP_UNIT_CDB_NO_FLUSH_MASK;
- start &= START_STOP_UNIT_CDB_START_MASK;
+ nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
+ return nvme_trans_status_code(hdr, nvme_sc);
+}
+
+static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+ u8 *cmd)
+{
+ u8 immed, pcmod, pc, no_flush, start;
+
+ immed = cmd[1] & 0x01;
+ pcmod = cmd[3] & 0x0f;
+ pc = (cmd[4] & 0xf0) >> 4;
+ no_flush = cmd[4] & 0x04;
+ start = cmd[4] & 0x01;
if (immed != 0) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
+ return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
} else {
if (no_flush == 0) {
/* Issue NVME FLUSH command prior to START STOP UNIT */
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_cmd_flush;
- c.common.nsid = cpu_to_le32(ns->ns_id);
-
- nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL);
- res = nvme_trans_status_code(hdr, nvme_sc);
+ int res = nvme_trans_synchronize_cache(ns, hdr);
if (res)
- goto out;
- if (nvme_sc) {
- res = nvme_sc;
- goto out;
- }
+ return res;
}
/* Setup the expected power state transition */
- res = nvme_trans_power_state(ns, hdr, pc, pcmod, start);
+ return nvme_trans_power_state(ns, hdr, pc, pcmod, start);
}
-
- out:
- return res;
-}
-
-static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *cmd)
-{
- int res = SNTI_TRANSLATION_SUCCESS;
- int nvme_sc;
- struct nvme_command c;
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_cmd_flush;
- c.common.nsid = cpu_to_le32(ns->ns_id);
-
- nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL);
-
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out;
- if (nvme_sc)
- res = nvme_sc;
-
- out:
- return res;
}
static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
u8 parm_hdr_len = 0;
u8 nvme_pf_code = 0;
u8 format_prot_info, long_list, format_data;
- format_prot_info = GET_U8_FROM_CDB(cmd,
- FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET);
- long_list = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_LONG_LIST_OFFSET);
- format_data = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET);
-
- format_prot_info = (format_prot_info &
- FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK) >>
- FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT;
- long_list &= FORMAT_UNIT_CDB_LONG_LIST_MASK;
- format_data &= FORMAT_UNIT_CDB_FORMAT_DATA_MASK;
+ format_prot_info = (cmd[1] & 0xc0) >> 6;
+ long_list = cmd[1] & 0x20;
+ format_data = cmd[1] & 0x10;
if (format_data != 0) {
if (format_prot_info != 0) {
@@ -2777,16 +2273,16 @@ static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
if (parm_hdr_len > 0) {
res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len,
format_prot_info, &nvme_pf_code);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out;
}
/* Attempt to activate any previously downloaded firmware image */
- res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, 0, 0, 0);
+ res = nvme_trans_send_activate_fw_cmd(ns, hdr, 0);
/* Determine Block size and count and send format command */
res = nvme_trans_fmt_set_blk_size_count(ns, hdr);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out;
res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code);
@@ -2799,28 +2295,24 @@ static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
struct nvme_dev *dev = ns->dev;
if (!(readl(&dev->bar->csts) & NVME_CSTS_RDY))
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
+ return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
NOT_READY, SCSI_ASC_LUN_NOT_READY,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
else
- res = nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
-
- return res;
+ return nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
}
static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res = 0;
u32 buffer_offset, parm_list_length;
u8 buffer_id, mode;
- parm_list_length =
- GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET);
+ parm_list_length = get_unaligned_be24(&cmd[6]);
if (parm_list_length % BYTES_TO_DWORDS != 0) {
/* NVMe expects Firmware file to be a whole number of DWORDS */
res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
@@ -2828,38 +2320,32 @@ static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
goto out;
}
- buffer_id = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_ID_OFFSET);
+ buffer_id = cmd[2];
if (buffer_id > NVME_MAX_FIRMWARE_SLOT) {
res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
goto out;
}
- mode = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_MODE_OFFSET) &
- WRITE_BUFFER_CDB_MODE_MASK;
- buffer_offset =
- GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET);
+ mode = cmd[1] & 0x1f;
+ buffer_offset = get_unaligned_be24(&cmd[3]);
switch (mode) {
case DOWNLOAD_SAVE_ACTIVATE:
- res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw,
+ res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
parm_list_length, buffer_offset,
buffer_id);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out;
- res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw,
- parm_list_length, buffer_offset,
- buffer_id);
+ res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
break;
case DOWNLOAD_SAVE_DEFER_ACTIVATE:
- res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw,
+ res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
parm_list_length, buffer_offset,
buffer_id);
break;
case ACTIVATE_DEFERRED_MICROCODE:
- res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw,
- parm_list_length, buffer_offset,
- buffer_id);
+ res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
break;
default:
res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
@@ -2888,15 +2374,13 @@ struct scsi_unmap_parm_list {
static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- struct nvme_dev *dev = ns->dev;
struct scsi_unmap_parm_list *plist;
struct nvme_dsm_range *range;
struct nvme_command c;
- int i, nvme_sc, res = -ENOMEM;
+ int i, nvme_sc, res;
u16 ndesc, list_len;
- dma_addr_t dma_addr;
- list_len = GET_U16_FROM_CDB(cmd, UNMAP_CDB_PARAM_LIST_LENGTH_OFFSET);
+ list_len = get_unaligned_be16(&cmd[7]);
if (!list_len)
return -EINVAL;
@@ -2905,7 +2389,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
return -ENOMEM;
res = nvme_trans_copy_from_user(hdr, plist, list_len);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out;
ndesc = be16_to_cpu(plist->unmap_blk_desc_data_len) >> 4;
@@ -2914,10 +2398,11 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
goto out;
}
- range = dma_alloc_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),
- &dma_addr, GFP_KERNEL);
- if (!range)
+ range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL);
+ if (!range) {
+ res = -ENOMEM;
goto out;
+ }
for (i = 0; i < ndesc; i++) {
range[i].nlb = cpu_to_le32(be32_to_cpu(plist->desc[i].nlb));
@@ -2928,15 +2413,14 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
memset(&c, 0, sizeof(c));
c.dsm.opcode = nvme_cmd_dsm;
c.dsm.nsid = cpu_to_le32(ns->ns_id);
- c.dsm.prp1 = cpu_to_le64(dma_addr);
c.dsm.nr = cpu_to_le32(ndesc - 1);
c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
- nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL);
+ nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range,
+ ndesc * sizeof(*range));
res = nvme_trans_status_code(hdr, nvme_sc);
- dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),
- range, dma_addr);
+ kfree(range);
out:
kfree(plist);
return res;
@@ -2991,13 +2475,16 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
retcode = nvme_trans_mode_sense(ns, hdr, cmd);
break;
case READ_CAPACITY:
- retcode = nvme_trans_read_capacity(ns, hdr, cmd);
+ retcode = nvme_trans_read_capacity(ns, hdr, cmd, 0);
break;
case SERVICE_ACTION_IN_16:
- if (IS_READ_CAP_16(cmd))
- retcode = nvme_trans_read_capacity(ns, hdr, cmd);
- else
+ switch (cmd[1]) {
+ case SAI_READ_CAPACITY_16:
+ retcode = nvme_trans_read_capacity(ns, hdr, cmd, 1);
+ break;
+ default:
goto out;
+ }
break;
case REPORT_LUNS:
retcode = nvme_trans_report_luns(ns, hdr, cmd);
@@ -3013,7 +2500,7 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
retcode = nvme_trans_start_stop(ns, hdr, cmd);
break;
case SYNCHRONIZE_CACHE:
- retcode = nvme_trans_synchronize_cache(ns, hdr, cmd);
+ retcode = nvme_trans_synchronize_cache(ns, hdr);
break;
case FORMAT_UNIT:
retcode = nvme_trans_format_unit(ns, hdr, cmd);
@@ -3051,15 +2538,16 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
if (hdr.cmd_len > BLK_MAX_CDB)
return -EINVAL;
+ /*
+ * A positive return code means a NVMe status, which has been
+ * translated to sense data.
+ */
retcode = nvme_scsi_translate(ns, &hdr);
if (retcode < 0)
return retcode;
- if (retcode > 0)
- retcode = SNTI_TRANSLATION_SUCCESS;
if (copy_to_user(u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0)
return -EFAULT;
-
- return retcode;
+ return 0;
}
int nvme_sg_get_version_num(int __user *ip)
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index d48715b287e6..dbb4da1cdca8 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -442,7 +442,7 @@ static char *pd_buf; /* buffer for request in progress */
static enum action do_pd_io_start(void)
{
- if (pd_req->cmd_type == REQ_TYPE_SPECIAL) {
+ if (pd_req->cmd_type == REQ_TYPE_DRV_PRIV) {
phase = pd_special;
return pd_special();
}
@@ -725,7 +725,7 @@ static int pd_special_command(struct pd_unit *disk,
if (IS_ERR(rq))
return PTR_ERR(rq);
- rq->cmd_type = REQ_TYPE_SPECIAL;
+ rq->cmd_type = REQ_TYPE_DRV_PRIV;
rq->special = func;
err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 09e628dafd9d..4c20c228184c 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -61,6 +61,7 @@
#include <linux/freezer.h>
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/backing-dev.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_ioctl.h>
#include <scsi/scsi.h>
diff --git a/drivers/block/pmem.c b/drivers/block/pmem.c
index eabf4a8d0085..095dfaadcaa5 100644
--- a/drivers/block/pmem.c
+++ b/drivers/block/pmem.c
@@ -139,11 +139,11 @@ static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res)
}
/*
- * Map the memory as non-cachable, as we can't write back the contents
+ * Map the memory as write-through, as we can't write back the contents
* of the CPU caches in case of a crash.
*/
err = -ENOMEM;
- pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size);
+ pmem->virt_addr = ioremap_wt(pmem->phys_addr, pmem->size);
if (!pmem->virt_addr)
goto out_release_region;
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index ef45cfb98fd2..b1612eb16172 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -1,5 +1,5 @@
/*
- * ps3vram - Use extra PS3 video ram as MTD block device.
+ * ps3vram - Use extra PS3 video ram as block device.
*
* Copyright 2009 Sony Corporation
*
@@ -73,8 +73,8 @@ struct ps3vram_priv {
u64 memory_handle;
u64 context_handle;
- u32 *ctrl;
- void *reports;
+ u32 __iomem *ctrl;
+ void __iomem *reports;
u8 *xdr_buf;
u32 *fifo_base;
@@ -104,7 +104,7 @@ static char *size = "256M";
module_param(size, charp, 0);
MODULE_PARM_DESC(size, "memory size");
-static u32 *ps3vram_get_notifier(void *reports, int notifier)
+static u32 __iomem *ps3vram_get_notifier(void __iomem *reports, int notifier)
{
return reports + DMA_NOTIFIER_OFFSET_BASE +
DMA_NOTIFIER_SIZE * notifier;
@@ -113,22 +113,22 @@ static u32 *ps3vram_get_notifier(void *reports, int notifier)
static void ps3vram_notifier_reset(struct ps3_system_bus_device *dev)
{
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
- u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+ u32 __iomem *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
int i;
for (i = 0; i < 4; i++)
- notify[i] = 0xffffffff;
+ iowrite32be(0xffffffff, notify + i);
}
static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev,
unsigned int timeout_ms)
{
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
- u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+ u32 __iomem *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
unsigned long timeout;
for (timeout = 20; timeout; timeout--) {
- if (!notify[3])
+ if (!ioread32be(notify + 3))
return 0;
udelay(10);
}
@@ -136,7 +136,7 @@ static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev,
timeout = jiffies + msecs_to_jiffies(timeout_ms);
do {
- if (!notify[3])
+ if (!ioread32be(notify + 3))
return 0;
msleep(1);
} while (time_before(jiffies, timeout));
@@ -148,8 +148,8 @@ static void ps3vram_init_ring(struct ps3_system_bus_device *dev)
{
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
- priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
- priv->ctrl[CTRL_GET] = FIFO_BASE + FIFO_OFFSET;
+ iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_PUT);
+ iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_GET);
}
static int ps3vram_wait_ring(struct ps3_system_bus_device *dev,
@@ -159,14 +159,14 @@ static int ps3vram_wait_ring(struct ps3_system_bus_device *dev,
unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
do {
- if (priv->ctrl[CTRL_PUT] == priv->ctrl[CTRL_GET])
+ if (ioread32be(priv->ctrl + CTRL_PUT) == ioread32be(priv->ctrl + CTRL_GET))
return 0;
msleep(1);
} while (time_before(jiffies, timeout));
dev_warn(&dev->core, "FIFO timeout (%08x/%08x/%08x)\n",
- priv->ctrl[CTRL_PUT], priv->ctrl[CTRL_GET],
- priv->ctrl[CTRL_TOP]);
+ ioread32be(priv->ctrl + CTRL_PUT), ioread32be(priv->ctrl + CTRL_GET),
+ ioread32be(priv->ctrl + CTRL_TOP));
return -ETIMEDOUT;
}
@@ -189,7 +189,7 @@ static void ps3vram_rewind_ring(struct ps3_system_bus_device *dev)
ps3vram_out_ring(priv, 0x20000000 | (FIFO_BASE + FIFO_OFFSET));
- priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
+ iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_PUT);
/* asking the HV for a blit will kick the FIFO */
status = lv1_gpu_fb_blit(priv->context_handle, 0, 0, 0, 0);
@@ -207,8 +207,8 @@ static void ps3vram_fire_ring(struct ps3_system_bus_device *dev)
mutex_lock(&ps3_gpu_mutex);
- priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET +
- (priv->fifo_ptr - priv->fifo_base) * sizeof(u32);
+ iowrite32be(FIFO_BASE + FIFO_OFFSET + (priv->fifo_ptr - priv->fifo_base)
+ * sizeof(u32), priv->ctrl + CTRL_PUT);
/* asking the HV for a blit will kick the FIFO */
status = lv1_gpu_fb_blit(priv->context_handle, 0, 0, 0, 0);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 812523330a78..ec6c5c6e1ac9 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2264,6 +2264,11 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
result, xferred);
if (!img_request->result)
img_request->result = result;
+ /*
+ * Need to end I/O on the entire obj_request worth of
+ * bytes in case of error.
+ */
+ xferred = obj_request->length;
}
/* Image object requests don't own their page array */
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 5d552857de41..59c91d49b14b 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -620,7 +620,7 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
spin_unlock_irq(&host->lock);
DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
- crq->rq->cmd_type = REQ_TYPE_SPECIAL;
+ crq->rq->cmd_type = REQ_TYPE_DRV_PRIV;
crq->rq->special = crq;
blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
@@ -661,7 +661,7 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
crq->msg_bucket = (u32) rc;
DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
- crq->rq->cmd_type = REQ_TYPE_SPECIAL;
+ crq->rq->cmd_type = REQ_TYPE_DRV_PRIV;
crq->rq->special = crq;
blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5ea2f0bbbc7c..d4d05f064d39 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -124,7 +124,7 @@ static inline void virtblk_request_done(struct request *req)
req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
- } else if (req->cmd_type == REQ_TYPE_SPECIAL) {
+ } else if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
req->errors = (error != 0);
}
@@ -188,7 +188,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
vbr->out_hdr.sector = 0;
vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
break;
- case REQ_TYPE_SPECIAL:
+ case REQ_TYPE_DRV_PRIV:
vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID);
vbr->out_hdr.sector = 0;
vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
@@ -251,7 +251,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
return PTR_ERR(req);
}
- req->cmd_type = REQ_TYPE_SPECIAL;
+ req->cmd_type = REQ_TYPE_DRV_PRIV;
err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
blk_put_request(req);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index bd2b3bbbb22c..713fc9ff1149 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -265,17 +265,6 @@ static void put_persistent_gnt(struct xen_blkif *blkif,
atomic_dec(&blkif->persistent_gnt_in_use);
}
-static void free_persistent_gnts_unmap_callback(int result,
- struct gntab_unmap_queue_data *data)
-{
- struct completion *c = data->data;
-
- /* BUG_ON used to reproduce existing behaviour,
- but is this the best way to deal with this? */
- BUG_ON(result);
- complete(c);
-}
-
static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
unsigned int num)
{
@@ -285,12 +274,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
struct rb_node *n;
int segs_to_unmap = 0;
struct gntab_unmap_queue_data unmap_data;
- struct completion unmap_completion;
- init_completion(&unmap_completion);
-
- unmap_data.data = &unmap_completion;
- unmap_data.done = &free_persistent_gnts_unmap_callback;
unmap_data.pages = pages;
unmap_data.unmap_ops = unmap;
unmap_data.kunmap_ops = NULL;
@@ -310,8 +294,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
!rb_next(&persistent_gnt->node)) {
unmap_data.count = segs_to_unmap;
- gnttab_unmap_refs_async(&unmap_data);
- wait_for_completion(&unmap_completion);
+ BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
put_free_pages(blkif, pages, segs_to_unmap);
segs_to_unmap = 0;
@@ -329,8 +312,13 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct persistent_gnt *persistent_gnt;
- int ret, segs_to_unmap = 0;
+ int segs_to_unmap = 0;
struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
+ struct gntab_unmap_queue_data unmap_data;
+
+ unmap_data.pages = pages;
+ unmap_data.unmap_ops = unmap;
+ unmap_data.kunmap_ops = NULL;
while(!list_empty(&blkif->persistent_purge_list)) {
persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
@@ -346,17 +334,16 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
pages[segs_to_unmap] = persistent_gnt->page;
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
- ret = gnttab_unmap_refs(unmap, NULL, pages,
- segs_to_unmap);
- BUG_ON(ret);
+ unmap_data.count = segs_to_unmap;
+ BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
put_free_pages(blkif, pages, segs_to_unmap);
segs_to_unmap = 0;
}
kfree(persistent_gnt);
}
if (segs_to_unmap > 0) {
- ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap);
- BUG_ON(ret);
+ unmap_data.count = segs_to_unmap;
+ BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
put_free_pages(blkif, pages, segs_to_unmap);
}
}
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index 6489c0fd0ea6..386ba3d1a6ee 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -23,12 +23,4 @@ config ZRAM_LZ4_COMPRESS
default n
help
This option enables LZ4 compression algorithm support. Compression
- algorithm can be changed using `comp_algorithm' device attribute.
-
-config ZRAM_DEBUG
- bool "Compressed RAM block device debug support"
- depends on ZRAM
- default n
- help
- This option adds additional debugging code to the compressed
- RAM block device driver.
+ algorithm can be changed using `comp_algorithm' device attribute. \ No newline at end of file
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index f1ff39a3d1c1..965d1afb0eaa 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -274,7 +274,7 @@ ssize_t zcomp_available_show(const char *comp, char *buf)
int i = 0;
while (backends[i]) {
- if (sysfs_streq(comp, backends[i]->name))
+ if (!strcmp(comp, backends[i]->name))
sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
"[%s] ", backends[i]->name);
else
@@ -286,6 +286,11 @@ ssize_t zcomp_available_show(const char *comp, char *buf)
return sz;
}
+bool zcomp_available_algorithm(const char *comp)
+{
+ return find_backend(comp) != NULL;
+}
+
bool zcomp_set_max_streams(struct zcomp *comp, int num_strm)
{
return comp->set_max_streams(comp, num_strm);
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index c59d1fca72c0..46e2b9f8f1f0 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -51,6 +51,7 @@ struct zcomp {
};
ssize_t zcomp_available_show(const char *comp, char *buf);
+bool zcomp_available_algorithm(const char *comp);
struct zcomp *zcomp_create(const char *comp, int max_strm);
void zcomp_destroy(struct zcomp *comp);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index c94386aa563d..fb655e8d1e3b 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -15,10 +15,6 @@
#define KMSG_COMPONENT "zram"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-#ifdef CONFIG_ZRAM_DEBUG
-#define DEBUG
-#endif
-
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/bio.h>
@@ -32,12 +28,16 @@
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/sysfs.h>
#include "zram_drv.h"
-/* Globals */
+static DEFINE_IDR(zram_index_idr);
+/* idr index must be protected */
+static DEFINE_MUTEX(zram_index_mutex);
+
static int zram_major;
-static struct zram *zram_devices;
static const char *default_compressor = "lzo";
/* Module params (documentation at end) */
@@ -53,7 +53,7 @@ static inline void deprecated_attr_warn(const char *name)
}
#define ZRAM_ATTR_RO(name) \
-static ssize_t name##_show(struct device *d, \
+static ssize_t name##_show(struct device *d, \
struct device_attribute *attr, char *b) \
{ \
struct zram *zram = dev_to_zram(d); \
@@ -74,12 +74,117 @@ static inline struct zram *dev_to_zram(struct device *dev)
return (struct zram *)dev_to_disk(dev)->private_data;
}
-static ssize_t disksize_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+/* flag operations require table entry bit_spin_lock() being held */
+static int zram_test_flag(struct zram_meta *meta, u32 index,
+ enum zram_pageflags flag)
{
- struct zram *zram = dev_to_zram(dev);
+ return meta->table[index].value & BIT(flag);
+}
- return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
+static void zram_set_flag(struct zram_meta *meta, u32 index,
+ enum zram_pageflags flag)
+{
+ meta->table[index].value |= BIT(flag);
+}
+
+static void zram_clear_flag(struct zram_meta *meta, u32 index,
+ enum zram_pageflags flag)
+{
+ meta->table[index].value &= ~BIT(flag);
+}
+
+static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+{
+ return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+}
+
+static void zram_set_obj_size(struct zram_meta *meta,
+ u32 index, size_t size)
+{
+ unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
+
+ meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
+}
+
+static inline int is_partial_io(struct bio_vec *bvec)
+{
+ return bvec->bv_len != PAGE_SIZE;
+}
+
+/*
+ * Check if request is within bounds and aligned on zram logical blocks.
+ */
+static inline int valid_io_request(struct zram *zram,
+ sector_t start, unsigned int size)
+{
+ u64 end, bound;
+
+ /* unaligned request */
+ if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
+ return 0;
+ if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
+ return 0;
+
+ end = start + (size >> SECTOR_SHIFT);
+ bound = zram->disksize >> SECTOR_SHIFT;
+ /* out of range range */
+ if (unlikely(start >= bound || end > bound || start > end))
+ return 0;
+
+ /* I/O request is valid */
+ return 1;
+}
+
+static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
+{
+ if (*offset + bvec->bv_len >= PAGE_SIZE)
+ (*index)++;
+ *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
+}
+
+static inline void update_used_max(struct zram *zram,
+ const unsigned long pages)
+{
+ unsigned long old_max, cur_max;
+
+ old_max = atomic_long_read(&zram->stats.max_used_pages);
+
+ do {
+ cur_max = old_max;
+ if (pages > cur_max)
+ old_max = atomic_long_cmpxchg(
+ &zram->stats.max_used_pages, cur_max, pages);
+ } while (old_max != cur_max);
+}
+
+static int page_zero_filled(void *ptr)
+{
+ unsigned int pos;
+ unsigned long *page;
+
+ page = (unsigned long *)ptr;
+
+ for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
+ if (page[pos])
+ return 0;
+ }
+
+ return 1;
+}
+
+static void handle_zero_page(struct bio_vec *bvec)
+{
+ struct page *page = bvec->bv_page;
+ void *user_mem;
+
+ user_mem = kmap_atomic(page);
+ if (is_partial_io(bvec))
+ memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
+ else
+ clear_page(user_mem);
+ kunmap_atomic(user_mem);
+
+ flush_dcache_page(page);
}
static ssize_t initstate_show(struct device *dev,
@@ -95,6 +200,14 @@ static ssize_t initstate_show(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%u\n", val);
}
+static ssize_t disksize_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
+}
+
static ssize_t orig_data_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -122,19 +235,6 @@ static ssize_t mem_used_total_show(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
}
-static ssize_t max_comp_streams_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- int val;
- struct zram *zram = dev_to_zram(dev);
-
- down_read(&zram->init_lock);
- val = zram->max_comp_streams;
- up_read(&zram->init_lock);
-
- return scnprintf(buf, PAGE_SIZE, "%d\n", val);
-}
-
static ssize_t mem_limit_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -204,6 +304,19 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}
+static ssize_t max_comp_streams_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int val;
+ struct zram *zram = dev_to_zram(dev);
+
+ down_read(&zram->init_lock);
+ val = zram->max_comp_streams;
+ up_read(&zram->init_lock);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
static ssize_t max_comp_streams_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
@@ -250,6 +363,8 @@ static ssize_t comp_algorithm_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
+ size_t sz;
+
down_write(&zram->init_lock);
if (init_done(zram)) {
up_write(&zram->init_lock);
@@ -257,69 +372,108 @@ static ssize_t comp_algorithm_store(struct device *dev,
return -EBUSY;
}
strlcpy(zram->compressor, buf, sizeof(zram->compressor));
+
+ /* ignore trailing newline */
+ sz = strlen(zram->compressor);
+ if (sz > 0 && zram->compressor[sz - 1] == '\n')
+ zram->compressor[sz - 1] = 0x00;
+
+ if (!zcomp_available_algorithm(zram->compressor))
+ len = -EINVAL;
+
up_write(&zram->init_lock);
return len;
}
-/* flag operations needs meta->tb_lock */
-static int zram_test_flag(struct zram_meta *meta, u32 index,
- enum zram_pageflags flag)
+static ssize_t compact_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
{
- return meta->table[index].value & BIT(flag);
-}
+ unsigned long nr_migrated;
+ struct zram *zram = dev_to_zram(dev);
+ struct zram_meta *meta;
-static void zram_set_flag(struct zram_meta *meta, u32 index,
- enum zram_pageflags flag)
-{
- meta->table[index].value |= BIT(flag);
-}
+ down_read(&zram->init_lock);
+ if (!init_done(zram)) {
+ up_read(&zram->init_lock);
+ return -EINVAL;
+ }
-static void zram_clear_flag(struct zram_meta *meta, u32 index,
- enum zram_pageflags flag)
-{
- meta->table[index].value &= ~BIT(flag);
+ meta = zram->meta;
+ nr_migrated = zs_compact(meta->mem_pool);
+ atomic64_add(nr_migrated, &zram->stats.num_migrated);
+ up_read(&zram->init_lock);
+
+ return len;
}
-static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+static ssize_t io_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+ struct zram *zram = dev_to_zram(dev);
+ ssize_t ret;
+
+ down_read(&zram->init_lock);
+ ret = scnprintf(buf, PAGE_SIZE,
+ "%8llu %8llu %8llu %8llu\n",
+ (u64)atomic64_read(&zram->stats.failed_reads),
+ (u64)atomic64_read(&zram->stats.failed_writes),
+ (u64)atomic64_read(&zram->stats.invalid_io),
+ (u64)atomic64_read(&zram->stats.notify_free));
+ up_read(&zram->init_lock);
+
+ return ret;
}
-static void zram_set_obj_size(struct zram_meta *meta,
- u32 index, size_t size)
+static ssize_t mm_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
+ struct zram *zram = dev_to_zram(dev);
+ u64 orig_size, mem_used = 0;
+ long max_used;
+ ssize_t ret;
- meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
+ down_read(&zram->init_lock);
+ if (init_done(zram))
+ mem_used = zs_get_total_pages(zram->meta->mem_pool);
+
+ orig_size = atomic64_read(&zram->stats.pages_stored);
+ max_used = atomic_long_read(&zram->stats.max_used_pages);
+
+ ret = scnprintf(buf, PAGE_SIZE,
+ "%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n",
+ orig_size << PAGE_SHIFT,
+ (u64)atomic64_read(&zram->stats.compr_data_size),
+ mem_used << PAGE_SHIFT,
+ zram->limit_pages << PAGE_SHIFT,
+ max_used << PAGE_SHIFT,
+ (u64)atomic64_read(&zram->stats.zero_pages),
+ (u64)atomic64_read(&zram->stats.num_migrated));
+ up_read(&zram->init_lock);
+
+ return ret;
}
-static inline int is_partial_io(struct bio_vec *bvec)
+static DEVICE_ATTR_RO(io_stat);
+static DEVICE_ATTR_RO(mm_stat);
+ZRAM_ATTR_RO(num_reads);
+ZRAM_ATTR_RO(num_writes);
+ZRAM_ATTR_RO(failed_reads);
+ZRAM_ATTR_RO(failed_writes);
+ZRAM_ATTR_RO(invalid_io);
+ZRAM_ATTR_RO(notify_free);
+ZRAM_ATTR_RO(zero_pages);
+ZRAM_ATTR_RO(compr_data_size);
+
+static inline bool zram_meta_get(struct zram *zram)
{
- return bvec->bv_len != PAGE_SIZE;
+ if (atomic_inc_not_zero(&zram->refcount))
+ return true;
+ return false;
}
-/*
- * Check if request is within bounds and aligned on zram logical blocks.
- */
-static inline int valid_io_request(struct zram *zram,
- sector_t start, unsigned int size)
+static inline void zram_meta_put(struct zram *zram)
{
- u64 end, bound;
-
- /* unaligned request */
- if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
- return 0;
- if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
- return 0;
-
- end = start + (size >> SECTOR_SHIFT);
- bound = zram->disksize >> SECTOR_SHIFT;
- /* out of range range */
- if (unlikely(start >= bound || end > bound || start > end))
- return 0;
-
- /* I/O request is valid */
- return 1;
+ atomic_dec(&zram->refcount);
}
static void zram_meta_free(struct zram_meta *meta, u64 disksize)
@@ -373,56 +527,6 @@ out_error:
return NULL;
}
-static inline bool zram_meta_get(struct zram *zram)
-{
- if (atomic_inc_not_zero(&zram->refcount))
- return true;
- return false;
-}
-
-static inline void zram_meta_put(struct zram *zram)
-{
- atomic_dec(&zram->refcount);
-}
-
-static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
-{
- if (*offset + bvec->bv_len >= PAGE_SIZE)
- (*index)++;
- *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
-}
-
-static int page_zero_filled(void *ptr)
-{
- unsigned int pos;
- unsigned long *page;
-
- page = (unsigned long *)ptr;
-
- for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
- if (page[pos])
- return 0;
- }
-
- return 1;
-}
-
-static void handle_zero_page(struct bio_vec *bvec)
-{
- struct page *page = bvec->bv_page;
- void *user_mem;
-
- user_mem = kmap_atomic(page);
- if (is_partial_io(bvec))
- memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
- else
- clear_page(user_mem);
- kunmap_atomic(user_mem);
-
- flush_dcache_page(page);
-}
-
-
/*
* To protect concurrent access to the same index entry,
* caller should hold this table index entry's bit_spinlock to
@@ -540,21 +644,6 @@ out_cleanup:
return ret;
}
-static inline void update_used_max(struct zram *zram,
- const unsigned long pages)
-{
- unsigned long old_max, cur_max;
-
- old_max = atomic_long_read(&zram->stats.max_used_pages);
-
- do {
- cur_max = old_max;
- if (pages > cur_max)
- old_max = atomic_long_cmpxchg(
- &zram->stats.max_used_pages, cur_max, pages);
- } while (old_max != cur_max);
-}
-
static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
int offset)
{
@@ -564,8 +653,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
struct page *page;
unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
struct zram_meta *meta = zram->meta;
- struct zcomp_strm *zstrm;
- bool locked = false;
+ struct zcomp_strm *zstrm = NULL;
unsigned long alloced_pages;
page = bvec->bv_page;
@@ -585,7 +673,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
}
zstrm = zcomp_strm_find(zram->comp);
- locked = true;
user_mem = kmap_atomic(page);
if (is_partial_io(bvec)) {
@@ -657,7 +744,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
}
zcomp_strm_release(zram->comp, zstrm);
- locked = false;
+ zstrm = NULL;
zs_unmap_object(meta->mem_pool, handle);
/*
@@ -675,42 +762,13 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
atomic64_add(clen, &zram->stats.compr_data_size);
atomic64_inc(&zram->stats.pages_stored);
out:
- if (locked)
+ if (zstrm)
zcomp_strm_release(zram->comp, zstrm);
if (is_partial_io(bvec))
kfree(uncmem);
return ret;
}
-static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset, int rw)
-{
- unsigned long start_time = jiffies;
- int ret;
-
- generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT,
- &zram->disk->part0);
-
- if (rw == READ) {
- atomic64_inc(&zram->stats.num_reads);
- ret = zram_bvec_read(zram, bvec, index, offset);
- } else {
- atomic64_inc(&zram->stats.num_writes);
- ret = zram_bvec_write(zram, bvec, index, offset);
- }
-
- generic_end_io_acct(rw, &zram->disk->part0, start_time);
-
- if (unlikely(ret)) {
- if (rw == READ)
- atomic64_inc(&zram->stats.failed_reads);
- else
- atomic64_inc(&zram->stats.failed_writes);
- }
-
- return ret;
-}
-
/*
* zram_bio_discard - handler on discard request
* @index: physical block index in PAGE_SIZE units
@@ -750,149 +808,32 @@ static void zram_bio_discard(struct zram *zram, u32 index,
}
}
-static void zram_reset_device(struct zram *zram)
-{
- struct zram_meta *meta;
- struct zcomp *comp;
- u64 disksize;
-
- down_write(&zram->init_lock);
-
- zram->limit_pages = 0;
-
- if (!init_done(zram)) {
- up_write(&zram->init_lock);
- return;
- }
-
- meta = zram->meta;
- comp = zram->comp;
- disksize = zram->disksize;
- /*
- * Refcount will go down to 0 eventually and r/w handler
- * cannot handle further I/O so it will bail out by
- * check zram_meta_get.
- */
- zram_meta_put(zram);
- /*
- * We want to free zram_meta in process context to avoid
- * deadlock between reclaim path and any other locks.
- */
- wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
-
- /* Reset stats */
- memset(&zram->stats, 0, sizeof(zram->stats));
- zram->disksize = 0;
- zram->max_comp_streams = 1;
- set_capacity(zram->disk, 0);
-
- up_write(&zram->init_lock);
- /* I/O operation under all of CPU are done so let's free */
- zram_meta_free(meta, disksize);
- zcomp_destroy(comp);
-}
-
-static ssize_t disksize_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
-{
- u64 disksize;
- struct zcomp *comp;
- struct zram_meta *meta;
- struct zram *zram = dev_to_zram(dev);
- int err;
-
- disksize = memparse(buf, NULL);
- if (!disksize)
- return -EINVAL;
-
- disksize = PAGE_ALIGN(disksize);
- meta = zram_meta_alloc(zram->disk->first_minor, disksize);
- if (!meta)
- return -ENOMEM;
-
- comp = zcomp_create(zram->compressor, zram->max_comp_streams);
- if (IS_ERR(comp)) {
- pr_info("Cannot initialise %s compressing backend\n",
- zram->compressor);
- err = PTR_ERR(comp);
- goto out_free_meta;
- }
-
- down_write(&zram->init_lock);
- if (init_done(zram)) {
- pr_info("Cannot change disksize for initialized device\n");
- err = -EBUSY;
- goto out_destroy_comp;
- }
-
- init_waitqueue_head(&zram->io_done);
- atomic_set(&zram->refcount, 1);
- zram->meta = meta;
- zram->comp = comp;
- zram->disksize = disksize;
- set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
- up_write(&zram->init_lock);
-
- /*
- * Revalidate disk out of the init_lock to avoid lockdep splat.
- * It's okay because disk's capacity is protected by init_lock
- * so that revalidate_disk always sees up-to-date capacity.
- */
- revalidate_disk(zram->disk);
-
- return len;
-
-out_destroy_comp:
- up_write(&zram->init_lock);
- zcomp_destroy(comp);
-out_free_meta:
- zram_meta_free(meta, disksize);
- return err;
-}
-
-static ssize_t reset_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
+ int offset, int rw)
{
+ unsigned long start_time = jiffies;
int ret;
- unsigned short do_reset;
- struct zram *zram;
- struct block_device *bdev;
- zram = dev_to_zram(dev);
- bdev = bdget_disk(zram->disk, 0);
-
- if (!bdev)
- return -ENOMEM;
+ generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT,
+ &zram->disk->part0);
- mutex_lock(&bdev->bd_mutex);
- /* Do not reset an active device! */
- if (bdev->bd_openers) {
- ret = -EBUSY;
- goto out;
+ if (rw == READ) {
+ atomic64_inc(&zram->stats.num_reads);
+ ret = zram_bvec_read(zram, bvec, index, offset);
+ } else {
+ atomic64_inc(&zram->stats.num_writes);
+ ret = zram_bvec_write(zram, bvec, index, offset);
}
- ret = kstrtou16(buf, 10, &do_reset);
- if (ret)
- goto out;
+ generic_end_io_acct(rw, &zram->disk->part0, start_time);
- if (!do_reset) {
- ret = -EINVAL;
- goto out;
+ if (unlikely(ret)) {
+ if (rw == READ)
+ atomic64_inc(&zram->stats.failed_reads);
+ else
+ atomic64_inc(&zram->stats.failed_writes);
}
- /* Make sure all pending I/O is finished */
- fsync_bdev(bdev);
- zram_reset_device(zram);
-
- mutex_unlock(&bdev->bd_mutex);
- revalidate_disk(zram->disk);
- bdput(bdev);
-
- return len;
-
-out:
- mutex_unlock(&bdev->bd_mutex);
- bdput(bdev);
return ret;
}
@@ -1032,79 +973,185 @@ out:
return err;
}
-static const struct block_device_operations zram_devops = {
- .swap_slot_free_notify = zram_slot_free_notify,
- .rw_page = zram_rw_page,
- .owner = THIS_MODULE
-};
+static void zram_reset_device(struct zram *zram)
+{
+ struct zram_meta *meta;
+ struct zcomp *comp;
+ u64 disksize;
-static DEVICE_ATTR_RW(disksize);
-static DEVICE_ATTR_RO(initstate);
-static DEVICE_ATTR_WO(reset);
-static DEVICE_ATTR_RO(orig_data_size);
-static DEVICE_ATTR_RO(mem_used_total);
-static DEVICE_ATTR_RW(mem_limit);
-static DEVICE_ATTR_RW(mem_used_max);
-static DEVICE_ATTR_RW(max_comp_streams);
-static DEVICE_ATTR_RW(comp_algorithm);
+ down_write(&zram->init_lock);
-static ssize_t io_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ zram->limit_pages = 0;
+
+ if (!init_done(zram)) {
+ up_write(&zram->init_lock);
+ return;
+ }
+
+ meta = zram->meta;
+ comp = zram->comp;
+ disksize = zram->disksize;
+ /*
+ * Refcount will go down to 0 eventually and r/w handler
+ * cannot handle further I/O so it will bail out by
+ * check zram_meta_get.
+ */
+ zram_meta_put(zram);
+ /*
+ * We want to free zram_meta in process context to avoid
+ * deadlock between reclaim path and any other locks.
+ */
+ wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
+
+ /* Reset stats */
+ memset(&zram->stats, 0, sizeof(zram->stats));
+ zram->disksize = 0;
+ zram->max_comp_streams = 1;
+
+ set_capacity(zram->disk, 0);
+ part_stat_set_all(&zram->disk->part0, 0);
+
+ up_write(&zram->init_lock);
+ /* I/O operation under all of CPU are done so let's free */
+ zram_meta_free(meta, disksize);
+ zcomp_destroy(comp);
+}
+
+static ssize_t disksize_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
{
+ u64 disksize;
+ struct zcomp *comp;
+ struct zram_meta *meta;
struct zram *zram = dev_to_zram(dev);
- ssize_t ret;
+ int err;
- down_read(&zram->init_lock);
- ret = scnprintf(buf, PAGE_SIZE,
- "%8llu %8llu %8llu %8llu\n",
- (u64)atomic64_read(&zram->stats.failed_reads),
- (u64)atomic64_read(&zram->stats.failed_writes),
- (u64)atomic64_read(&zram->stats.invalid_io),
- (u64)atomic64_read(&zram->stats.notify_free));
- up_read(&zram->init_lock);
+ disksize = memparse(buf, NULL);
+ if (!disksize)
+ return -EINVAL;
- return ret;
+ disksize = PAGE_ALIGN(disksize);
+ meta = zram_meta_alloc(zram->disk->first_minor, disksize);
+ if (!meta)
+ return -ENOMEM;
+
+ comp = zcomp_create(zram->compressor, zram->max_comp_streams);
+ if (IS_ERR(comp)) {
+ pr_info("Cannot initialise %s compressing backend\n",
+ zram->compressor);
+ err = PTR_ERR(comp);
+ goto out_free_meta;
+ }
+
+ down_write(&zram->init_lock);
+ if (init_done(zram)) {
+ pr_info("Cannot change disksize for initialized device\n");
+ err = -EBUSY;
+ goto out_destroy_comp;
+ }
+
+ init_waitqueue_head(&zram->io_done);
+ atomic_set(&zram->refcount, 1);
+ zram->meta = meta;
+ zram->comp = comp;
+ zram->disksize = disksize;
+ set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
+ up_write(&zram->init_lock);
+
+ /*
+ * Revalidate disk out of the init_lock to avoid lockdep splat.
+ * It's okay because disk's capacity is protected by init_lock
+ * so that revalidate_disk always sees up-to-date capacity.
+ */
+ revalidate_disk(zram->disk);
+
+ return len;
+
+out_destroy_comp:
+ up_write(&zram->init_lock);
+ zcomp_destroy(comp);
+out_free_meta:
+ zram_meta_free(meta, disksize);
+ return err;
}
-static ssize_t mm_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t reset_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
{
- struct zram *zram = dev_to_zram(dev);
- u64 orig_size, mem_used = 0;
- long max_used;
- ssize_t ret;
+ int ret;
+ unsigned short do_reset;
+ struct zram *zram;
+ struct block_device *bdev;
- down_read(&zram->init_lock);
- if (init_done(zram))
- mem_used = zs_get_total_pages(zram->meta->mem_pool);
+ ret = kstrtou16(buf, 10, &do_reset);
+ if (ret)
+ return ret;
- orig_size = atomic64_read(&zram->stats.pages_stored);
- max_used = atomic_long_read(&zram->stats.max_used_pages);
+ if (!do_reset)
+ return -EINVAL;
- ret = scnprintf(buf, PAGE_SIZE,
- "%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n",
- orig_size << PAGE_SHIFT,
- (u64)atomic64_read(&zram->stats.compr_data_size),
- mem_used << PAGE_SHIFT,
- zram->limit_pages << PAGE_SHIFT,
- max_used << PAGE_SHIFT,
- (u64)atomic64_read(&zram->stats.zero_pages),
- (u64)atomic64_read(&zram->stats.num_migrated));
- up_read(&zram->init_lock);
+ zram = dev_to_zram(dev);
+ bdev = bdget_disk(zram->disk, 0);
+ if (!bdev)
+ return -ENOMEM;
+
+ mutex_lock(&bdev->bd_mutex);
+ /* Do not reset an active device or claimed device */
+ if (bdev->bd_openers || zram->claim) {
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);
+ return -EBUSY;
+ }
+
+ /* From now on, anyone can't open /dev/zram[0-9] */
+ zram->claim = true;
+ mutex_unlock(&bdev->bd_mutex);
+
+ /* Make sure all the pending I/O are finished */
+ fsync_bdev(bdev);
+ zram_reset_device(zram);
+ revalidate_disk(zram->disk);
+ bdput(bdev);
+
+ mutex_lock(&bdev->bd_mutex);
+ zram->claim = false;
+ mutex_unlock(&bdev->bd_mutex);
+
+ return len;
+}
+
+static int zram_open(struct block_device *bdev, fmode_t mode)
+{
+ int ret = 0;
+ struct zram *zram;
+
+ WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
+
+ zram = bdev->bd_disk->private_data;
+ /* zram was claimed to reset so open request fails */
+ if (zram->claim)
+ ret = -EBUSY;
return ret;
}
-static DEVICE_ATTR_RO(io_stat);
-static DEVICE_ATTR_RO(mm_stat);
-ZRAM_ATTR_RO(num_reads);
-ZRAM_ATTR_RO(num_writes);
-ZRAM_ATTR_RO(failed_reads);
-ZRAM_ATTR_RO(failed_writes);
-ZRAM_ATTR_RO(invalid_io);
-ZRAM_ATTR_RO(notify_free);
-ZRAM_ATTR_RO(zero_pages);
-ZRAM_ATTR_RO(compr_data_size);
+static const struct block_device_operations zram_devops = {
+ .open = zram_open,
+ .swap_slot_free_notify = zram_slot_free_notify,
+ .rw_page = zram_rw_page,
+ .owner = THIS_MODULE
+};
+
+static DEVICE_ATTR_WO(compact);
+static DEVICE_ATTR_RW(disksize);
+static DEVICE_ATTR_RO(initstate);
+static DEVICE_ATTR_WO(reset);
+static DEVICE_ATTR_RO(orig_data_size);
+static DEVICE_ATTR_RO(mem_used_total);
+static DEVICE_ATTR_RW(mem_limit);
+static DEVICE_ATTR_RW(mem_used_max);
+static DEVICE_ATTR_RW(max_comp_streams);
+static DEVICE_ATTR_RW(comp_algorithm);
static struct attribute *zram_disk_attrs[] = {
&dev_attr_disksize.attr,
@@ -1114,6 +1161,7 @@ static struct attribute *zram_disk_attrs[] = {
&dev_attr_num_writes.attr,
&dev_attr_failed_reads.attr,
&dev_attr_failed_writes.attr,
+ &dev_attr_compact.attr,
&dev_attr_invalid_io.attr,
&dev_attr_notify_free.attr,
&dev_attr_zero_pages.attr,
@@ -1133,10 +1181,24 @@ static struct attribute_group zram_disk_attr_group = {
.attrs = zram_disk_attrs,
};
-static int create_device(struct zram *zram, int device_id)
+/*
+ * Allocate and initialize new zram device. the function returns
+ * '>= 0' device_id upon success, and negative value otherwise.
+ */
+static int zram_add(void)
{
+ struct zram *zram;
struct request_queue *queue;
- int ret = -ENOMEM;
+ int ret, device_id;
+
+ zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
+ if (!zram)
+ return -ENOMEM;
+
+ ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
+ if (ret < 0)
+ goto out_free_dev;
+ device_id = ret;
init_rwsem(&zram->init_lock);
@@ -1144,12 +1206,13 @@ static int create_device(struct zram *zram, int device_id)
if (!queue) {
pr_err("Error allocating disk queue for device %d\n",
device_id);
- goto out;
+ ret = -ENOMEM;
+ goto out_free_idr;
}
blk_queue_make_request(queue, zram_make_request);
- /* gendisk structure */
+ /* gendisk structure */
zram->disk = alloc_disk(1);
if (!zram->disk) {
pr_warn("Error allocating disk structure for device %d\n",
@@ -1207,90 +1270,177 @@ static int create_device(struct zram *zram, int device_id)
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
zram->meta = NULL;
zram->max_comp_streams = 1;
- return 0;
+
+ pr_info("Added device: %s\n", zram->disk->disk_name);
+ return device_id;
out_free_disk:
del_gendisk(zram->disk);
put_disk(zram->disk);
out_free_queue:
blk_cleanup_queue(queue);
-out:
+out_free_idr:
+ idr_remove(&zram_index_idr, device_id);
+out_free_dev:
+ kfree(zram);
return ret;
}
-static void destroy_devices(unsigned int nr)
+static int zram_remove(struct zram *zram)
+{
+ struct block_device *bdev;
+
+ bdev = bdget_disk(zram->disk, 0);
+ if (!bdev)
+ return -ENOMEM;
+
+ mutex_lock(&bdev->bd_mutex);
+ if (bdev->bd_openers || zram->claim) {
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);
+ return -EBUSY;
+ }
+
+ zram->claim = true;
+ mutex_unlock(&bdev->bd_mutex);
+
+ /*
+ * Remove sysfs first, so no one will perform a disksize
+ * store while we destroy the devices. This also helps during
+ * hot_remove -- zram_reset_device() is the last holder of
+ * ->init_lock, no later/concurrent disksize_store() or any
+ * other sysfs handlers are possible.
+ */
+ sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
+ &zram_disk_attr_group);
+
+ /* Make sure all the pending I/O are finished */
+ fsync_bdev(bdev);
+ zram_reset_device(zram);
+ bdput(bdev);
+
+ pr_info("Removed device: %s\n", zram->disk->disk_name);
+
+ idr_remove(&zram_index_idr, zram->disk->first_minor);
+ blk_cleanup_queue(zram->disk->queue);
+ del_gendisk(zram->disk);
+ put_disk(zram->disk);
+ kfree(zram);
+ return 0;
+}
+
+/* zram-control sysfs attributes */
+static ssize_t hot_add_show(struct class *class,
+ struct class_attribute *attr,
+ char *buf)
+{
+ int ret;
+
+ mutex_lock(&zram_index_mutex);
+ ret = zram_add();
+ mutex_unlock(&zram_index_mutex);
+
+ if (ret < 0)
+ return ret;
+ return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+}
+
+static ssize_t hot_remove_store(struct class *class,
+ struct class_attribute *attr,
+ const char *buf,
+ size_t count)
{
struct zram *zram;
- unsigned int i;
+ int ret, dev_id;
- for (i = 0; i < nr; i++) {
- zram = &zram_devices[i];
- /*
- * Remove sysfs first, so no one will perform a disksize
- * store while we destroy the devices
- */
- sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
- &zram_disk_attr_group);
+ /* dev_id is gendisk->first_minor, which is `int' */
+ ret = kstrtoint(buf, 10, &dev_id);
+ if (ret)
+ return ret;
+ if (dev_id < 0)
+ return -EINVAL;
- zram_reset_device(zram);
+ mutex_lock(&zram_index_mutex);
- blk_cleanup_queue(zram->disk->queue);
- del_gendisk(zram->disk);
- put_disk(zram->disk);
- }
+ zram = idr_find(&zram_index_idr, dev_id);
+ if (zram)
+ ret = zram_remove(zram);
+ else
+ ret = -ENODEV;
+
+ mutex_unlock(&zram_index_mutex);
+ return ret ? ret : count;
+}
+
+static struct class_attribute zram_control_class_attrs[] = {
+ __ATTR_RO(hot_add),
+ __ATTR_WO(hot_remove),
+ __ATTR_NULL,
+};
+
+static struct class zram_control_class = {
+ .name = "zram-control",
+ .owner = THIS_MODULE,
+ .class_attrs = zram_control_class_attrs,
+};
+
+static int zram_remove_cb(int id, void *ptr, void *data)
+{
+ zram_remove(ptr);
+ return 0;
+}
- kfree(zram_devices);
+static void destroy_devices(void)
+{
+ class_unregister(&zram_control_class);
+ idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
+ idr_destroy(&zram_index_idr);
unregister_blkdev(zram_major, "zram");
- pr_info("Destroyed %u device(s)\n", nr);
}
static int __init zram_init(void)
{
- int ret, dev_id;
+ int ret;
- if (num_devices > max_num_devices) {
- pr_warn("Invalid value for num_devices: %u\n",
- num_devices);
- return -EINVAL;
+ ret = class_register(&zram_control_class);
+ if (ret) {
+ pr_warn("Unable to register zram-control class\n");
+ return ret;
}
zram_major = register_blkdev(0, "zram");
if (zram_major <= 0) {
pr_warn("Unable to get major number\n");
+ class_unregister(&zram_control_class);
return -EBUSY;
}
- /* Allocate the device array and initialize each one */
- zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL);
- if (!zram_devices) {
- unregister_blkdev(zram_major, "zram");
- return -ENOMEM;
- }
-
- for (dev_id = 0; dev_id < num_devices; dev_id++) {
- ret = create_device(&zram_devices[dev_id], dev_id);
- if (ret)
+ while (num_devices != 0) {
+ mutex_lock(&zram_index_mutex);
+ ret = zram_add();
+ mutex_unlock(&zram_index_mutex);
+ if (ret < 0)
goto out_error;
+ num_devices--;
}
- pr_info("Created %u device(s)\n", num_devices);
return 0;
out_error:
- destroy_devices(dev_id);
+ destroy_devices();
return ret;
}
static void __exit zram_exit(void)
{
- destroy_devices(num_devices);
+ destroy_devices();
}
module_init(zram_init);
module_exit(zram_exit);
module_param(num_devices, uint, 0);
-MODULE_PARM_DESC(num_devices, "Number of zram devices");
+MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 570c598f4ce9..6dbe2df506bf 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -20,12 +20,6 @@
#include "zcomp.h"
-/*
- * Some arbitrary value. This is just to catch
- * invalid value for num_devices module parameter.
- */
-static const unsigned max_num_devices = 32;
-
/*-- Configurable parameters */
/*
@@ -121,5 +115,9 @@ struct zram {
*/
u64 disksize; /* bytes */
char compressor[10];
+ /*
+ * zram is claimed so open request will be failed
+ */
+ bool claim; /* Protected by bdev->bd_mutex */
};
#endif