27 files changed, 1614 insertions, 1944 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index eb1fed5bd516..3ccef9eba6f9 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -406,6 +406,7 @@ config BLK_DEV_RAM_DAX
 
 config BLK_DEV_PMEM
 	tristate "Persistent memory block device support"
+	depends on HAS_IOMEM
 	help
 	  Saying Y here will allow you to use a contiguous range of reserved
 	  memory as one or more persistent block devices.
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index ff20f192b0f6..0422c47261c3 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -139,8 +139,6 @@ static struct board_type products[] = {
 	{0x3214103C, "Smart Array E200i", &SA5_access},
 	{0x3215103C, "Smart Array E200i", &SA5_access},
 	{0x3237103C, "Smart Array E500", &SA5_access},
-	{0x3223103C, "Smart Array P800", &SA5_access},
-	{0x3234103C, "Smart Array P400", &SA5_access},
 	{0x323D103C, "Smart Array P700m", &SA5_access},
 };
 
@@ -574,8 +572,6 @@ static void cciss_procinit(ctlr_info_t *h)
 
 /* List of controllers which cannot be hard reset on kexec with reset_devices */
 static u32 unresettable_controller[] = {
-	0x324a103C, /* Smart Array P712m */
-	0x324b103C, /* SmartArray P711m */
 	0x3223103C, /* Smart Array P800 */
 	0x3234103C, /* Smart Array P400 */
 	0x3235103C, /* Smart Array P400i */
@@ -586,12 +582,32 @@ static u32 unresettable_controller[] = {
 	0x3215103C, /* Smart Array E200i */
 	0x3237103C, /* Smart Array E500 */
 	0x323D103C, /* Smart Array P700m */
+	0x40800E11, /* Smart Array 5i */
 	0x409C0E11, /* Smart Array 6400 */
 	0x409D0E11, /* Smart Array 6400 EM */
+	0x40700E11, /* Smart Array 5300 */
+	0x40820E11, /* Smart Array 532 */
+	0x40830E11, /* Smart Array 5312 */
+	0x409A0E11, /* Smart Array 641 */
+	0x409B0E11, /* Smart Array 642 */
+	0x40910E11, /* Smart Array 6i */
 };
 
 /* List of controllers which cannot even be soft reset */
 static u32 soft_unresettable_controller[] = {
+	0x40800E11, /* Smart Array 5i */
+	0x40700E11, /* Smart Array 5300 */
+	0x40820E11, /* Smart Array 532 */
+	0x40830E11, /* Smart Array 5312 */
+	0x409A0E11, /* Smart Array 641 */
+	0x409B0E11, /* Smart Array 642 */
+	0x40910E11, /* Smart Array 6i */
+	/* Exclude 640x boards.  These are two pci devices in one slot
+	 * which share a battery backed cache module.  One controls the
+	 * cache, the other accesses the cache through the one that controls
+	 * it.  If we reset the one controlling the cache, the other will
+	 * likely not be happy.  Just forbid resetting this conjoined mess.
+	 */
 	0x409C0E11, /* Smart Array 6400 */
 	0x409D0E11, /* Smart Array 6400 EM */
 };
@@ -4667,8 +4683,7 @@ static int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
 	 */
 	cciss_lookup_board_id(pdev, &board_id);
 	if (!ctlr_is_resettable(board_id)) {
-		dev_warn(&pdev->dev, "Cannot reset Smart Array 640x "
-				"due to shared cache module.");
+		dev_warn(&pdev->dev, "Controller not resettable\n");
 		return -ENODEV;
 	}
 
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index ecd845cd28d8..1537302e56e3 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -84,7 +84,6 @@ static struct scsi_host_template cciss_driver_template = {
 	.show_info		= cciss_scsi_show_info,
 	.queuecommand		= cciss_scsi_queue_command,
 	.this_id		= 7,
-	.cmd_per_lun		= 1,
 	.use_clustering		= DISABLE_CLUSTERING,
 	/* Can't have eh_bus_reset_handler or eh_host_reset_handler for cciss */
 	.eh_device_reset_handler= cciss_eh_device_reset_handler,
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index b905e9888b88..efd19c2da9c2 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -38,6 +38,7 @@
 #include <linux/mutex.h>
 #include <linux/major.h>
 #include <linux/blkdev.h>
+#include <linux/backing-dev.h>
 #include <linux/genhd.h>
 #include <linux/idr.h>
 #include <net/tcp.h>
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 81fde9ef7f8e..a1518539b858 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2359,7 +2359,7 @@ static void drbd_cleanup(void)
  * @congested_data:	User data
  * @bdi_bits:		Bits the BDI flusher thread is currently interested in
  *
- * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
+ * Returns 1<<WB_async_congested and/or 1<<WB_sync_congested if we are congested.
  */
 static int drbd_congested(void *congested_data, int bdi_bits)
 {
@@ -2376,14 +2376,14 @@ static int drbd_congested(void *congested_data, int bdi_bits)
 	}
 
 	if (test_bit(CALLBACK_PENDING, &first_peer_device(device)->connection->flags)) {
-		r |= (1 << BDI_async_congested);
+		r |= (1 << WB_async_congested);
 		/* Without good local data, we would need to read from remote,
 		 * and that would need the worker thread as well, which is
 		 * currently blocked waiting for that usermode helper to
 		 * finish.
 		 */
 		if (!get_ldev_if_state(device, D_UP_TO_DATE))
-			r |= (1 << BDI_sync_congested);
+			r |= (1 << WB_sync_congested);
 		else
 			put_ldev(device);
 		r &= bdi_bits;
@@ -2399,9 +2399,9 @@ static int drbd_congested(void *congested_data, int bdi_bits)
 			reason = 'b';
 	}
 
-	if (bdi_bits & (1 << BDI_async_congested) &&
+	if (bdi_bits & (1 << WB_async_congested) &&
 	    test_bit(NET_CONGESTED, &first_peer_device(device)->connection->flags)) {
-		r |= (1 << BDI_async_congested);
+		r |= (1 << WB_async_congested);
 		reason = reason == 'b' ? 'a' : 'n';
 	}
 
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index cee20354ac37..c097909c589c 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -598,7 +598,7 @@ static struct socket *drbd_try_connect(struct drbd_connection *connection)
 	memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
 
 	what = "sock_create_kern";
-	err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
+	err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
 			       SOCK_STREAM, IPPROTO_TCP, &sock);
 	if (err < 0) {
 		sock = NULL;
@@ -693,7 +693,7 @@ static int prepare_listen_socket(struct drbd_connection *connection, struct acce
 	memcpy(&my_addr, &connection->my_addr, my_addr_len);
 
 	what = "sock_create_kern";
-	err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
+	err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
 			       SOCK_STREAM, IPPROTO_TCP, &s_listen);
 	if (err) {
 		s_listen = NULL;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ae3fcb4199e9..40580dc7f41c 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -86,8 +86,6 @@ static DEFINE_MUTEX(loop_index_mutex);
 static int max_part;
 static int part_shift;
 
-static struct workqueue_struct *loop_wq;
-
 static int transfer_xor(struct loop_device *lo, int cmd,
 			struct page *raw_page, unsigned raw_off,
 			struct page *loop_page, unsigned loop_off,
@@ -476,6 +474,28 @@ static int loop_flush(struct loop_device *lo)
 	return loop_switch(lo, NULL);
 }
 
+static void loop_reread_partitions(struct loop_device *lo,
+				   struct block_device *bdev)
+{
+	int rc;
+
+	/*
+	 * bd_mutex has been held already in release path, so don't
+	 * acquire it if this function is called in such case.
+	 *
+	 * If the reread partition isn't from release path, lo_refcnt
+	 * must be at least one and it can only become zero when the
+	 * current holder is released.
+	 */
+	if (!atomic_read(&lo->lo_refcnt))
+		rc = __blkdev_reread_part(bdev);
+	else
+		rc = blkdev_reread_part(bdev);
+	if (rc)
+		pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
+			__func__, lo->lo_number, lo->lo_file_name, rc);
+}
+
 /*
  * loop_change_fd switched the backing store of a loopback device to
  * a new file. This is useful for operating system installers to free up
@@ -524,7 +544,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 
 	fput(old_file);
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN)
-		ioctl_by_bdev(bdev, BLKRRPART, 0);
+		loop_reread_partitions(lo, bdev);
 	return 0;
 
  out_putf:
@@ -725,6 +745,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	size = get_loop_size(lo, file);
 	if ((loff_t)(sector_t)size != size)
 		goto out_putf;
+	error = -ENOMEM;
+	lo->wq = alloc_workqueue("kloopd%d",
+			WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16,
+			lo->lo_number);
+	if (!lo->wq)
+		goto out_putf;
 
 	error = 0;
 
@@ -755,7 +781,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	if (part_shift)
 		lo->lo_flags |= LO_FLAGS_PARTSCAN;
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN)
-		ioctl_by_bdev(bdev, BLKRRPART, 0);
+		loop_reread_partitions(lo, bdev);
 
 	/* Grab the block_device to prevent its destruction after we
 	 * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev).
@@ -827,7 +853,7 @@ static int loop_clr_fd(struct loop_device *lo)
 	 * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
 	 * command to fail with EBUSY.
 	 */
-	if (lo->lo_refcnt > 1) {
+	if (atomic_read(&lo->lo_refcnt) > 1) {
 		lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
 		mutex_unlock(&lo->lo_ctl_mutex);
 		return 0;
@@ -836,6 +862,9 @@ static int loop_clr_fd(struct loop_device *lo)
 	if (filp == NULL)
 		return -EINVAL;
 
+	/* freeze request queue during the transition */
+	blk_mq_freeze_queue(lo->lo_queue);
+
 	spin_lock_irq(&lo->lo_lock);
 	lo->lo_state = Lo_rundown;
 	lo->lo_backing_file = NULL;
@@ -867,11 +896,15 @@ static int loop_clr_fd(struct loop_device *lo)
 	lo->lo_state = Lo_unbound;
 	/* This is safe: open() is still holding a reference. */
 	module_put(THIS_MODULE);
+	blk_mq_unfreeze_queue(lo->lo_queue);
+
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
-		ioctl_by_bdev(bdev, BLKRRPART, 0);
+		loop_reread_partitions(lo, bdev);
 	lo->lo_flags = 0;
 	if (!part_shift)
 		lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
+	destroy_workqueue(lo->wq);
+	lo->wq = NULL;
 	mutex_unlock(&lo->lo_ctl_mutex);
 	/*
 	 * Need not hold lo_ctl_mutex to fput backing file.
@@ -943,7 +976,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	     !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
 		lo->lo_flags |= LO_FLAGS_PARTSCAN;
 		lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
-		ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
+		loop_reread_partitions(lo, lo->lo_device);
 	}
 
 	lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
@@ -1324,9 +1357,7 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
 		goto out;
 	}
 
-	mutex_lock(&lo->lo_ctl_mutex);
-	lo->lo_refcnt++;
-	mutex_unlock(&lo->lo_ctl_mutex);
+	atomic_inc(&lo->lo_refcnt);
 out:
 	mutex_unlock(&loop_index_mutex);
 	return err;
@@ -1337,11 +1368,10 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
 	struct loop_device *lo = disk->private_data;
 	int err;
 
-	mutex_lock(&lo->lo_ctl_mutex);
-
-	if (--lo->lo_refcnt)
-		goto out;
+	if (atomic_dec_return(&lo->lo_refcnt))
+		return;
 
+	mutex_lock(&lo->lo_ctl_mutex);
 	if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
 		/*
 		 * In autoclear mode, stop the loop thread
@@ -1358,7 +1388,6 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
 		loop_flush(lo);
 	}
 
-out:
 	mutex_unlock(&lo->lo_ctl_mutex);
 }
 
@@ -1425,9 +1454,13 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+	struct loop_device *lo = cmd->rq->q->queuedata;
 
 	blk_mq_start_request(bd->rq);
 
+	if (lo->lo_state != Lo_bound)
+		return -EIO;
+
 	if (cmd->rq->cmd_flags & REQ_WRITE) {
 		struct loop_device *lo = cmd->rq->q->queuedata;
 		bool need_sched = true;
@@ -1441,9 +1474,9 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		spin_unlock_irq(&lo->lo_lock);
 
 		if (need_sched)
-			queue_work(loop_wq, &lo->write_work);
+			queue_work(lo->wq, &lo->write_work);
 	} else {
-		queue_work(loop_wq, &cmd->read_work);
+		queue_work(lo->wq, &cmd->read_work);
 	}
 
 	return BLK_MQ_RQ_QUEUE_OK;
@@ -1455,9 +1488,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
 	struct loop_device *lo = cmd->rq->q->queuedata;
 	int ret = -EIO;
 
-	if (lo->lo_state != Lo_bound)
-		goto failed;
-
 	if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY))
 		goto failed;
 
@@ -1594,6 +1624,7 @@ static int loop_add(struct loop_device **l, int i)
 		disk->flags |= GENHD_FL_NO_PART_SCAN;
 	disk->flags |= GENHD_FL_EXT_DEVT;
 	mutex_init(&lo->lo_ctl_mutex);
+	atomic_set(&lo->lo_refcnt, 0);
 	lo->lo_number		= i;
 	spin_lock_init(&lo->lo_lock);
 	disk->major		= LOOP_MAJOR;
@@ -1620,8 +1651,8 @@ out:
 
 static void loop_remove(struct loop_device *lo)
 {
-	del_gendisk(lo->lo_disk);
 	blk_cleanup_queue(lo->lo_queue);
+	del_gendisk(lo->lo_disk);
 	blk_mq_free_tag_set(&lo->tag_set);
 	put_disk(lo->lo_disk);
 	kfree(lo);
@@ -1711,7 +1742,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
 			mutex_unlock(&lo->lo_ctl_mutex);
 			break;
 		}
-		if (lo->lo_refcnt > 0) {
+		if (atomic_read(&lo->lo_refcnt) > 0) {
 			ret = -EBUSY;
 			mutex_unlock(&lo->lo_ctl_mutex);
 			break;
@@ -1806,13 +1837,6 @@ static int __init loop_init(void)
 		goto misc_out;
 	}
 
-	loop_wq = alloc_workqueue("kloopd",
-			WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0);
-	if (!loop_wq) {
-		err = -ENOMEM;
-		goto misc_out;
-	}
-
 	blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
 				  THIS_MODULE, loop_probe, NULL, NULL);
 
@@ -1850,8 +1874,6 @@ static void __exit loop_exit(void)
 	blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
 	unregister_blkdev(LOOP_MAJOR, "loop");
 
-	destroy_workqueue(loop_wq);
-
 	misc_deregister(&loop_misc);
 }
 
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 301c27f8323f..25e8997ed246 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -28,7 +28,7 @@ struct loop_func_table;
 
 struct loop_device {
 	int		lo_number;
-	int		lo_refcnt;
+	atomic_t	lo_refcnt;
 	loff_t		lo_offset;
 	loff_t		lo_sizelimit;
 	int		lo_flags;
@@ -54,6 +54,7 @@ struct loop_device {
 	gfp_t		old_gfp_mask;
 
 	spinlock_t		lo_lock;
+	struct workqueue_struct *wq;
 	struct list_head	write_cmd_head;
 	struct work_struct	write_work;
 	bool			write_started;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3bd7ca9853a8..4a2ef09e6704 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -163,12 +163,6 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev)
 		else
 			dev_warn(&dd->pdev->dev,
 				"%s: dd->queue is NULL\n", __func__);
-		if (dd->port) {
-			set_bit(MTIP_PF_SR_CLEANUP_BIT, &dd->port->flags);
-			wake_up_interruptible(&dd->port->svc_wait);
-		} else
-			dev_warn(&dd->pdev->dev,
-				"%s: dd->port is NULL\n", __func__);
 		return true; /* device removed */
 	}
 
@@ -269,8 +263,11 @@ static int mtip_hba_reset(struct driver_data *dd)
 	/* Flush */
 	readl(dd->mmio + HOST_CTL);
 
-	/* Spin for up to 2 seconds, waiting for reset acknowledgement */
-	timeout = jiffies + msecs_to_jiffies(2000);
+	/*
+	 * Spin for up to 10 seconds waiting for reset acknowledgement. Spec
+	 * is 1 sec but in LUN failure conditions, up to 10 secs are required
+	 */
+	timeout = jiffies + msecs_to_jiffies(10000);
 	do {
 		mdelay(10);
 		if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
@@ -623,8 +620,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 
 	set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
 
-	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
-			test_bit(MTIP_TAG_INTERNAL, port->allocated)) {
+	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
 		cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
 		dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
 
@@ -896,6 +892,10 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
 
 		/* Acknowledge the interrupt status on the port.*/
 		port_stat = readl(port->mmio + PORT_IRQ_STAT);
+		if (unlikely(port_stat == 0xFFFFFFFF)) {
+			mtip_check_surprise_removal(dd->pdev);
+			return IRQ_HANDLED;
+		}
 		writel(port_stat, port->mmio + PORT_IRQ_STAT);
 
 		/* Demux port status */
@@ -991,15 +991,10 @@ static bool mtip_pause_ncq(struct mtip_port *port,
 	reply = port->rxfis + RX_FIS_D2H_REG;
 	task_file_data = readl(port->mmio+PORT_TFDATA);
 
-	if (fis->command == ATA_CMD_SEC_ERASE_UNIT)
-		clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
-
 	if ((task_file_data & 1))
 		return false;
 
 	if (fis->command == ATA_CMD_SEC_ERASE_PREP) {
-		set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
-		set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
 		port->ic_pause_timer = jiffies;
 		return true;
 	} else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) &&
@@ -1011,8 +1006,10 @@ static bool mtip_pause_ncq(struct mtip_port *port,
 		((fis->command == 0xFC) &&
 			(fis->features == 0x27 || fis->features == 0x72 ||
 			 fis->features == 0x62 || fis->features == 0x26))) {
+		clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
 		/* Com reset after secure erase or lowlevel format */
 		mtip_restart_port(port);
+		clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
 		return false;
 	}
 
@@ -1112,9 +1109,10 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 	int_cmd = mtip_get_int_command(dd);
 
 	set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
-	port->ic_pause_timer = 0;
 
-	clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
+	if (fis->command == ATA_CMD_SEC_ERASE_PREP)
+		set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
+
 	clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
 
 	if (atomic == GFP_KERNEL) {
@@ -1251,11 +1249,11 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 exec_ic_exit:
 	/* Clear the allocated and active bits for the internal command. */
 	mtip_put_int_command(dd, int_cmd);
+	clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
 	if (rv >= 0 && mtip_pause_ncq(port, fis)) {
 		/* NCQ paused */
 		return rv;
 	}
-	clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
 	wake_up_interruptible(&port->svc_wait);
 
 	return rv;
@@ -2625,18 +2623,6 @@ static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
 				readl(dd->mmio + HOST_IRQ_STAT));
 	size += sprintf(&buf[size], "\n");
 
-	size += sprintf(&buf[size], "L/ Allocated     : [ 0x");
-
-	for (n = dd->slot_groups-1; n >= 0; n--) {
-		if (sizeof(long) > sizeof(u32))
-			group_allocated =
-				dd->port->allocated[n/2] >> (32*(n&1));
-		else
-			group_allocated = dd->port->allocated[n];
-		size += sprintf(&buf[size], "%08X ", group_allocated);
-	}
-	size += sprintf(&buf[size], "]\n");
-
 	size += sprintf(&buf[size], "L/ Commands in Q : [ 0x");
 
 	for (n = dd->slot_groups-1; n >= 0; n--) {
@@ -2780,48 +2766,6 @@ static void mtip_hw_debugfs_exit(struct driver_data *dd)
 		debugfs_remove_recursive(dd->dfs_node);
 }
 
-static int mtip_free_orphan(struct driver_data *dd)
-{
-	struct kobject *kobj;
-
-	if (dd->bdev) {
-		if (dd->bdev->bd_holders >= 1)
-			return -2;
-
-		bdput(dd->bdev);
-		dd->bdev = NULL;
-	}
-
-	mtip_hw_debugfs_exit(dd);
-
-	spin_lock(&rssd_index_lock);
-	ida_remove(&rssd_index_ida, dd->index);
-	spin_unlock(&rssd_index_lock);
-
-	if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag) &&
-			test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) {
-		put_disk(dd->disk);
-	} else {
-		if (dd->disk) {
-			kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
-			if (kobj) {
-				mtip_hw_sysfs_exit(dd, kobj);
-				kobject_put(kobj);
-			}
-			del_gendisk(dd->disk);
-			dd->disk = NULL;
-		}
-		if (dd->queue) {
-			dd->queue->queuedata = NULL;
-			blk_cleanup_queue(dd->queue);
-			blk_mq_free_tag_set(&dd->tags);
-			dd->queue = NULL;
-		}
-	}
-	kfree(dd);
-	return 0;
-}
-
 /*
  * Perform any init/resume time hardware setup
  *
@@ -2944,7 +2888,6 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd)
 			mtip_block_initialize(dd);
 			return 0;
 		}
-		ssleep(10);
 	} while (time_before(jiffies, timeout));
 
 	/* Check for timeout */
@@ -2969,7 +2912,6 @@ static int mtip_service_thread(void *data)
 	unsigned long slot, slot_start, slot_wrap;
 	unsigned int num_cmd_slots = dd->slot_groups * 32;
 	struct mtip_port *port = dd->port;
-	int ret;
 
 	while (1) {
 		if (kthread_should_stop() ||
@@ -2990,10 +2932,6 @@ static int mtip_service_thread(void *data)
 			test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
 			goto st_out;
 
-		/* If I am an orphan, start self cleanup */
-		if (test_bit(MTIP_PF_SR_CLEANUP_BIT, &port->flags))
-			break;
-
 		if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
 				&dd->dd_flag)))
 			goto st_out;
@@ -3047,26 +2985,6 @@ restart_eh:
 		}
 	}
 
-	/* wait for pci remove to exit */
-	while (1) {
-		if (test_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag))
-			break;
-		msleep_interruptible(1000);
-		if (kthread_should_stop())
-			goto st_out;
-	}
-
-	while (1) {
-		ret = mtip_free_orphan(dd);
-		if (!ret) {
-			/* NOTE: All data structures are invalid, do not
-			 * access any here */
-			return 0;
-		}
-		msleep_interruptible(1000);
-		if (kthread_should_stop())
-			goto st_out;
-	}
 st_out:
 	return 0;
 }
@@ -3394,6 +3312,7 @@ static int mtip_hw_exit(struct driver_data *dd)
 	/* Release the IRQ. */
 	irq_set_affinity_hint(dd->pdev->irq, NULL);
 	devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
+	msleep(1000);
 
 	/* Free dma regions */
 	mtip_dma_free(dd);
@@ -3699,6 +3618,26 @@ static const struct block_device_operations mtip_block_ops = {
 	.owner		= THIS_MODULE
 };
 
+static inline bool is_se_active(struct driver_data *dd)
+{
+	if (unlikely(test_bit(MTIP_PF_SE_ACTIVE_BIT, &dd->port->flags))) {
+		if (dd->port->ic_pause_timer) {
+			unsigned long to = dd->port->ic_pause_timer +
+							msecs_to_jiffies(1000);
+			if (time_after(jiffies, to)) {
+				clear_bit(MTIP_PF_SE_ACTIVE_BIT,
+							&dd->port->flags);
+				clear_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
+				dd->port->ic_pause_timer = 0;
+				wake_up_interruptible(&dd->port->svc_wait);
+				return false;
+			}
+		}
+		return true;
+	}
+	return false;
+}
+
 /*
  * Block layer make request function.
  *
@@ -3716,6 +3655,9 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
 	unsigned int nents;
 
+	if (is_se_active(dd))
+		return -ENODATA;
+
 	if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
 		if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
 							&dd->dd_flag))) {
@@ -3900,7 +3842,8 @@ static int mtip_block_initialize(struct driver_data *dd)
 
 	dd->disk->driverfs_dev	= &dd->pdev->dev;
 	dd->disk->major		= dd->major;
-	dd->disk->first_minor	= dd->instance * MTIP_MAX_MINORS;
+	dd->disk->first_minor	= index * MTIP_MAX_MINORS;
+	dd->disk->minors 	= MTIP_MAX_MINORS;
 	dd->disk->fops		= &mtip_block_ops;
 	dd->disk->private_data	= dd;
 	dd->index		= index;
@@ -4066,52 +4009,51 @@ static int mtip_block_remove(struct driver_data *dd)
 {
 	struct kobject *kobj;
 
-	if (!dd->sr) {
-		mtip_hw_debugfs_exit(dd);
+	mtip_hw_debugfs_exit(dd);
 
-		if (dd->mtip_svc_handler) {
-			set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
-			wake_up_interruptible(&dd->port->svc_wait);
-			kthread_stop(dd->mtip_svc_handler);
-		}
+	if (dd->mtip_svc_handler) {
+		set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
+		wake_up_interruptible(&dd->port->svc_wait);
+		kthread_stop(dd->mtip_svc_handler);
+	}
 
-		/* Clean up the sysfs attributes, if created */
-		if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
-			kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
-			if (kobj) {
-				mtip_hw_sysfs_exit(dd, kobj);
-				kobject_put(kobj);
-			}
+	/* Clean up the sysfs attributes, if created */
+	if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
+		kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
+		if (kobj) {
+			mtip_hw_sysfs_exit(dd, kobj);
+			kobject_put(kobj);
 		}
+	}
 
+	if (!dd->sr)
 		mtip_standby_drive(dd);
-
-		/*
-		 * Delete our gendisk structure. This also removes the device
-		 * from /dev
-		 */
-		if (dd->bdev) {
-			bdput(dd->bdev);
-			dd->bdev = NULL;
-		}
-		if (dd->disk) {
-			if (dd->disk->queue) {
-				del_gendisk(dd->disk);
-				blk_cleanup_queue(dd->queue);
-				blk_mq_free_tag_set(&dd->tags);
-				dd->queue = NULL;
-			} else
-				put_disk(dd->disk);
-		}
-		dd->disk  = NULL;
-
-		spin_lock(&rssd_index_lock);
-		ida_remove(&rssd_index_ida, dd->index);
-		spin_unlock(&rssd_index_lock);
-	} else {
+	else
 		dev_info(&dd->pdev->dev, "device %s surprise removal\n",
 						dd->disk->disk_name);
+
+	/*
+	 * Delete our gendisk structure. This also removes the device
+	 * from /dev
+	 */
+	if (dd->bdev) {
+		bdput(dd->bdev);
+		dd->bdev = NULL;
 	}
+	if (dd->disk) {
+		del_gendisk(dd->disk);
+		if (dd->disk->queue) {
+			blk_cleanup_queue(dd->queue);
+			blk_mq_free_tag_set(&dd->tags);
+			dd->queue = NULL;
+		}
+		put_disk(dd->disk);
+	}
+	dd->disk  = NULL;
+
+	spin_lock(&rssd_index_lock);
+	ida_remove(&rssd_index_ida, dd->index);
+	spin_unlock(&rssd_index_lock);
 
 	/* De-initialize the protocol layer. */
 	mtip_hw_exit(dd);
@@ -4140,12 +4082,12 @@ static int mtip_block_shutdown(struct driver_data *dd)
 		dev_info(&dd->pdev->dev,
 			"Shutting down %s ...\n", dd->disk->disk_name);
 
+		del_gendisk(dd->disk);
 		if (dd->disk->queue) {
-			del_gendisk(dd->disk);
 			blk_cleanup_queue(dd->queue);
 			blk_mq_free_tag_set(&dd->tags);
-		} else
-			put_disk(dd->disk);
+		}
+		put_disk(dd->disk);
 		dd->disk  = NULL;
 		dd->queue = NULL;
 	}
@@ -4507,6 +4449,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
 			"Completion workers still active!\n");
 	}
 
+	blk_mq_stop_hw_queues(dd->queue);
 	/* Clean up the block layer. */
 	mtip_block_remove(dd);
 
@@ -4524,10 +4467,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
 	list_del_init(&dd->remove_list);
 	spin_unlock_irqrestore(&dev_lock, flags);
 
-	if (!dd->sr)
-		kfree(dd);
-	else
-		set_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag);
+	kfree(dd);
 
 	pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
 	pci_set_drvdata(pdev, NULL);
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index ba1b31ee22ec..3274784008eb 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -142,7 +142,6 @@ enum {
 	MTIP_PF_SVC_THD_ACTIVE_BIT  = 4,
 	MTIP_PF_ISSUE_CMDS_BIT      = 5,
 	MTIP_PF_REBUILD_BIT         = 6,
-	MTIP_PF_SR_CLEANUP_BIT      = 7,
 	MTIP_PF_SVC_THD_STOP_BIT    = 8,
 
 	/* below are bit numbers in 'dd_flag' defined in driver_data */
@@ -150,7 +149,6 @@ enum {
 	MTIP_DDF_REMOVE_PENDING_BIT = 1,
 	MTIP_DDF_OVER_TEMP_BIT      = 2,
 	MTIP_DDF_WRITE_PROTECT_BIT  = 3,
-	MTIP_DDF_REMOVE_DONE_BIT    = 4,
 	MTIP_DDF_CLEANUP_BIT        = 5,
 	MTIP_DDF_RESUME_BIT         = 6,
 	MTIP_DDF_INIT_DONE_BIT      = 7,
@@ -412,19 +410,13 @@ struct mtip_port {
 	 * by the DMA when the driver issues internal commands.
 	 */
 	dma_addr_t sector_buffer_dma;
-	/*
-	 * Bit significant, used to determine if a command slot has
-	 * been allocated. i.e. the slot is in use.  Bits are cleared
-	 * when the command slot and all associated data structures
-	 * are no longer needed.
-	 */
+
 	u16 *log_buf;
 	dma_addr_t log_buf_dma;
 
 	u8 *smart_buf;
 	dma_addr_t smart_buf_dma;
 
-	unsigned long allocated[SLOTBITS_IN_LONGS];
 	/*
 	 * used to queue commands when an internal command is in progress
 	 * or error handling is active
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 39e5f7fae3ef..0e385d8e9b86 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -230,29 +230,40 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
 	int result, flags;
 	struct nbd_request request;
 	unsigned long size = blk_rq_bytes(req);
+	u32 type;
+
+	if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+		type = NBD_CMD_DISC;
+	else if (req->cmd_flags & REQ_DISCARD)
+		type = NBD_CMD_TRIM;
+	else if (req->cmd_flags & REQ_FLUSH)
+		type = NBD_CMD_FLUSH;
+	else if (rq_data_dir(req) == WRITE)
+		type = NBD_CMD_WRITE;
+	else
+		type = NBD_CMD_READ;
 
 	memset(&request, 0, sizeof(request));
 	request.magic = htonl(NBD_REQUEST_MAGIC);
-	request.type = htonl(nbd_cmd(req));
-
-	if (nbd_cmd(req) != NBD_CMD_FLUSH && nbd_cmd(req) != NBD_CMD_DISC) {
+	request.type = htonl(type);
+	if (type != NBD_CMD_FLUSH && type != NBD_CMD_DISC) {
 		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
 		request.len = htonl(size);
 	}
 	memcpy(request.handle, &req, sizeof(req));
 
 	dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
-		req, nbdcmd_to_ascii(nbd_cmd(req)),
+		req, nbdcmd_to_ascii(type),
 		(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
 	result = sock_xmit(nbd, 1, &request, sizeof(request),
-			(nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
+			(type == NBD_CMD_WRITE) ? MSG_MORE : 0);
 	if (result <= 0) {
 		dev_err(disk_to_dev(nbd->disk),
 			"Send control failed (result %d)\n", result);
 		return -EIO;
 	}
 
-	if (nbd_cmd(req) == NBD_CMD_WRITE) {
+	if (type == NBD_CMD_WRITE) {
 		struct req_iterator iter;
 		struct bio_vec bvec;
 		/*
@@ -352,7 +363,7 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
 	}
 
 	dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
-	if (nbd_cmd(req) == NBD_CMD_READ) {
+	if (rq_data_dir(req) != WRITE) {
 		struct req_iterator iter;
 		struct bio_vec bvec;
 
@@ -452,23 +463,11 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
 	if (req->cmd_type != REQ_TYPE_FS)
 		goto error_out;
 
-	nbd_cmd(req) = NBD_CMD_READ;
-	if (rq_data_dir(req) == WRITE) {
-		if ((req->cmd_flags & REQ_DISCARD)) {
-			WARN_ON(!(nbd->flags & NBD_FLAG_SEND_TRIM));
-			nbd_cmd(req) = NBD_CMD_TRIM;
-		} else
-			nbd_cmd(req) = NBD_CMD_WRITE;
-		if (nbd->flags & NBD_FLAG_READ_ONLY) {
-			dev_err(disk_to_dev(nbd->disk),
-				"Write on read-only\n");
-			goto error_out;
-		}
-	}
-
-	if (req->cmd_flags & REQ_FLUSH) {
-		BUG_ON(unlikely(blk_rq_sectors(req)));
-		nbd_cmd(req) = NBD_CMD_FLUSH;
+	if (rq_data_dir(req) == WRITE &&
+	    (nbd->flags & NBD_FLAG_READ_ONLY)) {
+		dev_err(disk_to_dev(nbd->disk),
+			"Write on read-only\n");
+		goto error_out;
 	}
 
 	req->errors = 0;
@@ -592,8 +591,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		fsync_bdev(bdev);
 		mutex_lock(&nbd->tx_lock);
 		blk_rq_init(NULL, &sreq);
-		sreq.cmd_type = REQ_TYPE_SPECIAL;
-		nbd_cmd(&sreq) = NBD_CMD_DISC;
+		sreq.cmd_type = REQ_TYPE_DRV_PRIV;
 
 		/* Check again after getting mutex back.  */
 		if (!nbd->sock)
@@ -713,7 +711,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		bdev->bd_inode->i_size = 0;
 		set_capacity(nbd->disk, 0);
 		if (max_part > 0)
-			ioctl_by_bdev(bdev, BLKRRPART, 0);
+			blkdev_reread_part(bdev);
 		if (nbd->disconnect) /* user requested, ignore socket errors */
 			return 0;
 		return nbd->harderror;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 65cd61a4145e..6f9b7534928e 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -243,6 +243,17 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
 			cmd = container_of(entry, struct nullb_cmd, ll_list);
 			entry = entry->next;
 			end_cmd(cmd);
+
+			if (cmd->rq) {
+				struct request_queue *q = cmd->rq->q;
+
+				if (!q->mq_ops && blk_queue_stopped(q)) {
+					spin_lock(q->queue_lock);
+					if (blk_queue_stopped(q))
+						blk_start_queue(q);
+					spin_unlock(q->queue_lock);
+				}
+			}
 		} while (entry);
 	}
 
@@ -257,7 +268,7 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
 	if (llist_add(&cmd->ll_list, &cq->list)) {
 		ktime_t kt = ktime_set(0, completion_nsec);
 
-		hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL);
+		hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED);
 	}
 
 	put_cpu();
@@ -334,6 +345,7 @@ static int null_rq_prep_fn(struct request_queue *q, struct request *req)
 		req->special = cmd;
 		return BLKPREP_OK;
 	}
+	blk_stop_queue(q);
 
 	return BLKPREP_DEFER;
 }
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 85b8036deaa3..e5112714188f 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -29,6 +29,7 @@
 #include <linux/kdev_t.h>
 #include <linux/kthread.h>
 #include <linux/kernel.h>
+#include <linux/list_sort.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -80,6 +81,7 @@ static wait_queue_head_t nvme_kthread_wait;
 static struct class *nvme_class;
 
 static void nvme_reset_failed_dev(struct work_struct *ws);
+static int nvme_reset(struct nvme_dev *dev);
 static int nvme_process_cq(struct nvme_queue *nvmeq);
 
 struct async_cmd_info {
@@ -102,6 +104,7 @@ struct nvme_queue {
 	spinlock_t q_lock;
 	struct nvme_command *sq_cmds;
 	volatile struct nvme_completion *cqes;
+	struct blk_mq_tags **tags;
 	dma_addr_t sq_dma_addr;
 	dma_addr_t cq_dma_addr;
 	u32 __iomem *q_db;
@@ -114,7 +117,6 @@ struct nvme_queue {
 	u8 cq_phase;
 	u8 cqe_seen;
 	struct async_cmd_info cmdinfo;
-	struct blk_mq_hw_ctx *hctx;
 };
 
 /*
@@ -182,9 +184,12 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	struct nvme_dev *dev = data;
 	struct nvme_queue *nvmeq = dev->queues[0];
 
-	WARN_ON(nvmeq->hctx);
-	nvmeq->hctx = hctx;
+	WARN_ON(hctx_idx != 0);
+	WARN_ON(dev->admin_tagset.tags[0] != hctx->tags);
+	WARN_ON(nvmeq->tags);
+
 	hctx->driver_data = nvmeq;
+	nvmeq->tags = &dev->admin_tagset.tags[0];
 	return 0;
 }
 
@@ -201,27 +206,16 @@ static int nvme_admin_init_request(void *data, struct request *req,
 	return 0;
 }
 
-static void nvme_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
-{
-	struct nvme_queue *nvmeq = hctx->driver_data;
-
-	nvmeq->hctx = NULL;
-}
-
 static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 			  unsigned int hctx_idx)
 {
 	struct nvme_dev *dev = data;
-	struct nvme_queue *nvmeq = dev->queues[
-					(hctx_idx % dev->queue_count) + 1];
-
-	if (!nvmeq->hctx)
-		nvmeq->hctx = hctx;
+	struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
 
-	/* nvmeq queues are shared between namespaces. We assume here that
-	 * blk-mq map the tags so they match up with the nvme queue tags. */
-	WARN_ON(nvmeq->hctx->tags != hctx->tags);
+	if (!nvmeq->tags)
+		nvmeq->tags = &dev->tagset.tags[hctx_idx];
 
+	WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags);
 	hctx->driver_data = nvmeq;
 	return 0;
 }
@@ -307,9 +301,16 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
 
 	if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
 		++nvmeq->dev->event_limit;
-	if (status == NVME_SC_SUCCESS)
-		dev_warn(nvmeq->q_dmadev,
-			"async event result %08x\n", result);
+	if (status != NVME_SC_SUCCESS)
+		return;
+
+	switch (result & 0xff07) {
+	case NVME_AER_NOTICE_NS_CHANGED:
+		dev_info(nvmeq->q_dmadev, "rescanning\n");
+		schedule_work(&nvmeq->dev->scan_work);
+	default:
+		dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
+	}
 }
 
 static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -320,7 +321,7 @@ static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
 	u32 result = le32_to_cpup(&cqe->result);
 
-	blk_mq_free_hctx_request(nvmeq->hctx, req);
+	blk_mq_free_request(req);
 
 	dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
 	++nvmeq->dev->abort_limit;
@@ -333,14 +334,13 @@ static void async_completion(struct nvme_queue *nvmeq, void *ctx,
 	cmdinfo->result = le32_to_cpup(&cqe->result);
 	cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
 	queue_kthread_work(cmdinfo->worker, &cmdinfo->work);
-	blk_mq_free_hctx_request(nvmeq->hctx, cmdinfo->req);
+	blk_mq_free_request(cmdinfo->req);
 }
 
 static inline struct nvme_cmd_info *get_cmd_from_tag(struct nvme_queue *nvmeq,
 				  unsigned int tag)
 {
-	struct blk_mq_hw_ctx *hctx = nvmeq->hctx;
-	struct request *req = blk_mq_tag_to_rq(hctx->tags, tag);
+	struct request *req = blk_mq_tag_to_rq(*nvmeq->tags, tag);
 
 	return blk_mq_rq_to_pdu(req);
 }
@@ -445,7 +445,7 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
 				(unsigned long) rq, gfp);
 }
 
-void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
+static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
 {
 	const int last_prp = dev->page_size / 8 - 1;
 	int i;
@@ -605,22 +605,30 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 			spin_unlock_irqrestore(req->q->queue_lock, flags);
 			return;
 		}
-		req->errors = nvme_error_status(status);
+		if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
+			req->errors = status;
+		} else {
+			req->errors = nvme_error_status(status);
+		}
 	} else
 		req->errors = 0;
+	if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
+		u32 result = le32_to_cpup(&cqe->result);
+		req->special = (void *)(uintptr_t)result;
+	}
 
 	if (cmd_rq->aborted)
-		dev_warn(&nvmeq->dev->pci_dev->dev,
+		dev_warn(nvmeq->dev->dev,
 			"completing aborted command with status:%04x\n",
 			status);
 
 	if (iod->nents) {
-		dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents,
+		dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
 			rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 		if (blk_integrity_rq(req)) {
 			if (!rq_data_dir(req))
 				nvme_dif_remap(req, nvme_dif_complete);
-			dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->meta_sg, 1,
+			dma_unmap_sg(nvmeq->dev->dev, iod->meta_sg, 1,
 				rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 		}
 	}
@@ -630,8 +638,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 }
 
 /* length is in bytes.  gfp flags indicates whether we may sleep. */
-int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
-								gfp_t gfp)
+static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
+		int total_len, gfp_t gfp)
 {
 	struct dma_pool *pool;
 	int length = total_len;
@@ -709,6 +717,23 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
 	return total_len;
 }
 
+static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
+		struct nvme_iod *iod)
+{
+	struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
+
+	memcpy(cmnd, req->cmd, sizeof(struct nvme_command));
+	cmnd->rw.command_id = req->tag;
+	if (req->nr_phys_segments) {
+		cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+		cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
+	}
+
+	if (++nvmeq->sq_tail == nvmeq->q_depth)
+		nvmeq->sq_tail = 0;
+	writel(nvmeq->sq_tail, nvmeq->q_db);
+}
+
 /*
  * We reuse the small pool to allocate the 16-byte range here as it is not
  * worth having a special pool for these or additional cases to handle freeing
@@ -807,11 +832,15 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
 	return 0;
 }
 
+/*
+ * NOTE: ns is NULL when called on the admin queue.
+ */
 static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct nvme_ns *ns = hctx->queue->queuedata;
 	struct nvme_queue *nvmeq = hctx->driver_data;
+	struct nvme_dev *dev = nvmeq->dev;
 	struct request *req = bd->rq;
 	struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
 	struct nvme_iod *iod;
@@ -822,15 +851,16 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	 * unless this namespace is formated such that the metadata can be
 	 * stripped/generated by the controller with PRACT=1.
 	 */
-	if (ns->ms && !blk_integrity_rq(req)) {
-		if (!(ns->pi_type && ns->ms == 8)) {
+	if (ns && ns->ms && !blk_integrity_rq(req)) {
+		if (!(ns->pi_type && ns->ms == 8) &&
+					req->cmd_type != REQ_TYPE_DRV_PRIV) {
 			req->errors = -EFAULT;
 			blk_mq_complete_request(req);
 			return BLK_MQ_RQ_QUEUE_OK;
 		}
 	}
 
-	iod = nvme_alloc_iod(req, ns->dev, GFP_ATOMIC);
+	iod = nvme_alloc_iod(req, dev, GFP_ATOMIC);
 	if (!iod)
 		return BLK_MQ_RQ_QUEUE_BUSY;
 
@@ -841,8 +871,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 		 * as it is not worth having a special pool for these or
 		 * additional cases to handle freeing the iod.
 		 */
-		range = dma_pool_alloc(nvmeq->dev->prp_small_pool,
-						GFP_ATOMIC,
+		range = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC,
 						&iod->first_dma);
 		if (!range)
 			goto retry_cmd;
@@ -860,9 +889,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 			goto retry_cmd;
 
 		if (blk_rq_bytes(req) !=
-                    nvme_setup_prps(nvmeq->dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
-			dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg,
-					iod->nents, dma_dir);
+                    nvme_setup_prps(dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
+			dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
 			goto retry_cmd;
 		}
 		if (blk_integrity_rq(req)) {
@@ -884,7 +912,9 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	nvme_set_info(cmd, iod, req_completion);
 	spin_lock_irq(&nvmeq->q_lock);
-	if (req->cmd_flags & REQ_DISCARD)
+	if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+		nvme_submit_priv(nvmeq, req, iod);
+	else if (req->cmd_flags & REQ_DISCARD)
 		nvme_submit_discard(nvmeq, ns, req, iod);
 	else if (req->cmd_flags & REQ_FLUSH)
 		nvme_submit_flush(nvmeq, ns, req->tag);
@@ -896,10 +926,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return BLK_MQ_RQ_QUEUE_OK;
 
  error_cmd:
-	nvme_free_iod(nvmeq->dev, iod);
+	nvme_free_iod(dev, iod);
 	return BLK_MQ_RQ_QUEUE_ERROR;
  retry_cmd:
-	nvme_free_iod(nvmeq->dev, iod);
+	nvme_free_iod(dev, iod);
 	return BLK_MQ_RQ_QUEUE_BUSY;
 }
 
@@ -942,15 +972,6 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
 	return 1;
 }
 
-/* Admin queue isn't initialized as a request queue. If at some point this
- * happens anyway, make sure to notify the user */
-static int nvme_admin_queue_rq(struct blk_mq_hw_ctx *hctx,
-			       const struct blk_mq_queue_data *bd)
-{
-	WARN_ON_ONCE(1);
-	return BLK_MQ_RQ_QUEUE_ERROR;
-}
-
 static irqreturn_t nvme_irq(int irq, void *data)
 {
 	irqreturn_t result;
@@ -972,46 +993,61 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
 	return IRQ_WAKE_THREAD;
 }
 
-struct sync_cmd_info {
-	struct task_struct *task;
-	u32 result;
-	int status;
-};
-
-static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
-						struct nvme_completion *cqe)
-{
-	struct sync_cmd_info *cmdinfo = ctx;
-	cmdinfo->result = le32_to_cpup(&cqe->result);
-	cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
-	wake_up_process(cmdinfo->task);
-}
-
 /*
  * Returns 0 on success.  If the result is negative, it's a Linux error code;
  * if the result is positive, it's an NVM Express status code
  */
-static int nvme_submit_sync_cmd(struct request *req, struct nvme_command *cmd,
-						u32 *result, unsigned timeout)
+int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void *buffer, void __user *ubuffer, unsigned bufflen,
+		u32 *result, unsigned timeout)
 {
-	struct sync_cmd_info cmdinfo;
-	struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
-	struct nvme_queue *nvmeq = cmd_rq->nvmeq;
+	bool write = cmd->common.opcode & 1;
+	struct bio *bio = NULL;
+	struct request *req;
+	int ret;
 
-	cmdinfo.task = current;
-	cmdinfo.status = -EINTR;
+	req = blk_mq_alloc_request(q, write, GFP_KERNEL, false);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
 
-	cmd->common.command_id = req->tag;
+	req->cmd_type = REQ_TYPE_DRV_PRIV;
+	req->cmd_flags |= REQ_FAILFAST_DRIVER;
+	req->__data_len = 0;
+	req->__sector = (sector_t) -1;
+	req->bio = req->biotail = NULL;
 
-	nvme_set_info(cmd_rq, &cmdinfo, sync_completion);
+	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
 
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	nvme_submit_cmd(nvmeq, cmd);
-	schedule();
+	req->cmd = (unsigned char *)cmd;
+	req->cmd_len = sizeof(struct nvme_command);
+	req->special = (void *)0;
 
+	if (buffer && bufflen) {
+		ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT);
+		if (ret)
+			goto out;
+	} else if (ubuffer && bufflen) {
+		ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, __GFP_WAIT);
+		if (ret)
+			goto out;
+		bio = req->bio;
+	}
+
+	blk_execute_rq(req->q, NULL, req, 0);
+	if (bio)
+		blk_rq_unmap_user(bio);
 	if (result)
-		*result = cmdinfo.result;
-	return cmdinfo.status;
+		*result = (u32)(uintptr_t)req->special;
+	ret = req->errors;
+ out:
+	blk_mq_free_request(req);
+	return ret;
+}
+
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void *buffer, unsigned bufflen)
+{
+	return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
 }
 
 static int nvme_submit_async_admin_req(struct nvme_dev *dev)
@@ -1033,7 +1069,7 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
 	c.common.opcode = nvme_admin_async_event;
 	c.common.command_id = req->tag;
 
-	blk_mq_free_hctx_request(nvmeq->hctx, req);
+	blk_mq_free_request(req);
 	return __nvme_submit_cmd(nvmeq, &c);
 }
 
@@ -1060,41 +1096,6 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
 	return nvme_submit_cmd(nvmeq, cmd);
 }
 
-static int __nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
-						u32 *result, unsigned timeout)
-{
-	int res;
-	struct request *req;
-
-	req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-	res = nvme_submit_sync_cmd(req, cmd, result, timeout);
-	blk_mq_free_request(req);
-	return res;
-}
-
-int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
-								u32 *result)
-{
-	return __nvme_submit_admin_cmd(dev, cmd, result, ADMIN_TIMEOUT);
-}
-
-int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
-					struct nvme_command *cmd, u32 *result)
-{
-	int res;
-	struct request *req;
-
-	req = blk_mq_alloc_request(ns->queue, WRITE, (GFP_KERNEL|__GFP_WAIT),
-									false);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-	res = nvme_submit_sync_cmd(req, cmd, result, NVME_IO_TIMEOUT);
-	blk_mq_free_request(req);
-	return res;
-}
-
 static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 {
 	struct nvme_command c;
@@ -1103,7 +1104,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 	c.delete_queue.opcode = opcode;
 	c.delete_queue.qid = cpu_to_le16(id);
 
-	return nvme_submit_admin_cmd(dev, &c, NULL);
+	return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
 }
 
 static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
@@ -1112,6 +1113,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
 	struct nvme_command c;
 	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
 
+	/*
+	 * Note: we (ab)use the fact the the prp fields survive if no data
+	 * is attached to the request.
+	 */
 	memset(&c, 0, sizeof(c));
 	c.create_cq.opcode = nvme_admin_create_cq;
 	c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
@@ -1120,7 +1125,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
 	c.create_cq.cq_flags = cpu_to_le16(flags);
 	c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
 
-	return nvme_submit_admin_cmd(dev, &c, NULL);
+	return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
 }
 
 static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
@@ -1129,6 +1134,10 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
 	struct nvme_command c;
 	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
 
+	/*
+	 * Note: we (ab)use the fact the the prp fields survive if no data
+	 * is attached to the request.
+	 */
 	memset(&c, 0, sizeof(c));
 	c.create_sq.opcode = nvme_admin_create_sq;
 	c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
@@ -1137,7 +1146,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
 	c.create_sq.sq_flags = cpu_to_le16(flags);
 	c.create_sq.cqid = cpu_to_le16(qid);
 
-	return nvme_submit_admin_cmd(dev, &c, NULL);
+	return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
 }
 
 static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
@@ -1150,18 +1159,43 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
 	return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
 }
 
-int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns,
-							dma_addr_t dma_addr)
+int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
 {
-	struct nvme_command c;
+	struct nvme_command c = {
+		.identify.opcode = nvme_admin_identify,
+		.identify.cns = cpu_to_le32(1),
+	};
+	int error;
 
-	memset(&c, 0, sizeof(c));
-	c.identify.opcode = nvme_admin_identify;
-	c.identify.nsid = cpu_to_le32(nsid);
-	c.identify.prp1 = cpu_to_le64(dma_addr);
-	c.identify.cns = cpu_to_le32(cns);
+	*id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
+	if (!*id)
+		return -ENOMEM;
+
+	error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+			sizeof(struct nvme_id_ctrl));
+	if (error)
+		kfree(*id);
+	return error;
+}
+
+int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+		struct nvme_id_ns **id)
+{
+	struct nvme_command c = {
+		.identify.opcode = nvme_admin_identify,
+		.identify.nsid = cpu_to_le32(nsid),
+	};
+	int error;
+
+	*id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
+	if (!*id)
+		return -ENOMEM;
 
-	return nvme_submit_admin_cmd(dev, &c, NULL);
+	error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+			sizeof(struct nvme_id_ns));
+	if (error)
+		kfree(*id);
+	return error;
 }
 
 int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
@@ -1175,7 +1209,8 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
 	c.features.prp1 = cpu_to_le64(dma_addr);
 	c.features.fid = cpu_to_le32(fid);
 
-	return nvme_submit_admin_cmd(dev, &c, result);
+	return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
+			result, 0);
 }
 
 int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
@@ -1189,7 +1224,30 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
 	c.features.fid = cpu_to_le32(fid);
 	c.features.dword11 = cpu_to_le32(dword11);
 
-	return nvme_submit_admin_cmd(dev, &c, result);
+	return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
+			result, 0);
+}
+
+int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
+{
+	struct nvme_command c = {
+		.common.opcode = nvme_admin_get_log_page,
+		.common.nsid = cpu_to_le32(0xFFFFFFFF),
+		.common.cdw10[0] = cpu_to_le32(
+			(((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
+			 NVME_LOG_SMART),
+	};
+	int error;
+
+	*log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
+	if (!*log)
+		return -ENOMEM;
+
+	error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
+			sizeof(struct nvme_smart_log));
+	if (error)
+		kfree(*log);
+	return error;
 }
 
 /**
@@ -1214,8 +1272,7 @@ static void nvme_abort_req(struct request *req)
 		if (work_busy(&dev->reset_work))
 			goto out;
 		list_del_init(&dev->node);
-		dev_warn(&dev->pci_dev->dev,
-			"I/O %d QID %d timeout, reset controller\n",
+		dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n",
 							req->tag, nvmeq->qid);
 		dev->reset_workfn = nvme_reset_failed_dev;
 		queue_work(nvme_workq, &dev->reset_work);
@@ -1254,8 +1311,7 @@ static void nvme_abort_req(struct request *req)
 	}
 }
 
-static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx,
-				struct request *req, void *data, bool reserved)
+static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved)
 {
 	struct nvme_queue *nvmeq = data;
 	void *ctx;
@@ -1352,11 +1408,9 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 
 static void nvme_clear_queue(struct nvme_queue *nvmeq)
 {
-	struct blk_mq_hw_ctx *hctx = nvmeq->hctx;
-
 	spin_lock_irq(&nvmeq->q_lock);
-	if (hctx && hctx->tags)
-		blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq);
+	if (nvmeq->tags && *nvmeq->tags)
+		blk_mq_all_tag_busy_iter(*nvmeq->tags, nvme_cancel_queue_ios, nvmeq);
 	spin_unlock_irq(&nvmeq->q_lock);
 }
 
@@ -1384,22 +1438,21 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 							int depth)
 {
-	struct device *dmadev = &dev->pci_dev->dev;
 	struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
 	if (!nvmeq)
 		return NULL;
 
-	nvmeq->cqes = dma_zalloc_coherent(dmadev, CQ_SIZE(depth),
+	nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth),
 					  &nvmeq->cq_dma_addr, GFP_KERNEL);
 	if (!nvmeq->cqes)
 		goto free_nvmeq;
 
-	nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth),
+	nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
 					&nvmeq->sq_dma_addr, GFP_KERNEL);
 	if (!nvmeq->sq_cmds)
 		goto free_cqdma;
 
-	nvmeq->q_dmadev = dmadev;
+	nvmeq->q_dmadev = dev->dev;
 	nvmeq->dev = dev;
 	snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
 			dev->instance, qid);
@@ -1409,13 +1462,16 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 	nvmeq->q_depth = depth;
 	nvmeq->qid = qid;
-	dev->queue_count++;
 	dev->queues[qid] = nvmeq;
 
+	/* make sure queue descriptor is set before queue count, for kthread */
+	mb();
+	dev->queue_count++;
+
 	return nvmeq;
 
  free_cqdma:
-	dma_free_coherent(dmadev, CQ_SIZE(depth), (void *)nvmeq->cqes,
+	dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes,
 							nvmeq->cq_dma_addr);
  free_nvmeq:
 	kfree(nvmeq);
@@ -1487,7 +1543,7 @@ static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
 		if (fatal_signal_pending(current))
 			return -EINTR;
 		if (time_after(jiffies, timeout)) {
-			dev_err(&dev->pci_dev->dev,
+			dev_err(dev->dev,
 				"Device not ready; aborting %s\n", enabled ?
 						"initialisation" : "reset");
 			return -ENODEV;
@@ -1537,7 +1593,7 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev)
 		if (fatal_signal_pending(current))
 			return -EINTR;
 		if (time_after(jiffies, timeout)) {
-			dev_err(&dev->pci_dev->dev,
+			dev_err(dev->dev,
 				"Device shutdown incomplete; abort shutdown\n");
 			return -ENODEV;
 		}
@@ -1547,10 +1603,9 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev)
 }
 
 static struct blk_mq_ops nvme_mq_admin_ops = {
-	.queue_rq	= nvme_admin_queue_rq,
+	.queue_rq	= nvme_queue_rq,
 	.map_queue	= blk_mq_map_queue,
 	.init_hctx	= nvme_admin_init_hctx,
-	.exit_hctx	= nvme_exit_hctx,
 	.init_request	= nvme_admin_init_request,
 	.timeout	= nvme_timeout,
 };
@@ -1559,7 +1614,6 @@ static struct blk_mq_ops nvme_mq_ops = {
 	.queue_rq	= nvme_queue_rq,
 	.map_queue	= blk_mq_map_queue,
 	.init_hctx	= nvme_init_hctx,
-	.exit_hctx	= nvme_exit_hctx,
 	.init_request	= nvme_init_request,
 	.timeout	= nvme_timeout,
 };
@@ -1580,7 +1634,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 		dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
 		dev->admin_tagset.reserved_tags = 1;
 		dev->admin_tagset.timeout = ADMIN_TIMEOUT;
-		dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
+		dev->admin_tagset.numa_node = dev_to_node(dev->dev);
 		dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
 		dev->admin_tagset.driver_data = dev;
 
@@ -1613,14 +1667,14 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
 
 	if (page_shift < dev_page_min) {
-		dev_err(&dev->pci_dev->dev,
+		dev_err(dev->dev,
 				"Minimum device page size (%u) too large for "
 				"host (%u)\n", 1 << dev_page_min,
 				1 << page_shift);
 		return -ENODEV;
 	}
 	if (page_shift > dev_page_max) {
-		dev_info(&dev->pci_dev->dev,
+		dev_info(dev->dev,
 				"Device maximum page size (%u) smaller than "
 				"host (%u); enabling work-around\n",
 				1 << dev_page_max, 1 << page_shift);
@@ -1668,131 +1722,51 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	return result;
 }
 
-struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
-				unsigned long addr, unsigned length)
-{
-	int i, err, count, nents, offset;
-	struct scatterlist *sg;
-	struct page **pages;
-	struct nvme_iod *iod;
-
-	if (addr & 3)
-		return ERR_PTR(-EINVAL);
-	if (!length || length > INT_MAX - PAGE_SIZE)
-		return ERR_PTR(-EINVAL);
-
-	offset = offset_in_page(addr);
-	count = DIV_ROUND_UP(offset + length, PAGE_SIZE);
-	pages = kcalloc(count, sizeof(*pages), GFP_KERNEL);
-	if (!pages)
-		return ERR_PTR(-ENOMEM);
-
-	err = get_user_pages_fast(addr, count, 1, pages);
-	if (err < count) {
-		count = err;
-		err = -EFAULT;
-		goto put_pages;
-	}
-
-	err = -ENOMEM;
-	iod = __nvme_alloc_iod(count, length, dev, 0, GFP_KERNEL);
-	if (!iod)
-		goto put_pages;
-
-	sg = iod->sg;
-	sg_init_table(sg, count);
-	for (i = 0; i < count; i++) {
-		sg_set_page(&sg[i], pages[i],
-			    min_t(unsigned, length, PAGE_SIZE - offset),
-			    offset);
-		length -= (PAGE_SIZE - offset);
-		offset = 0;
-	}
-	sg_mark_end(&sg[i - 1]);
-	iod->nents = count;
-
-	nents = dma_map_sg(&dev->pci_dev->dev, sg, count,
-				write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-	if (!nents)
-		goto free_iod;
-
-	kfree(pages);
-	return iod;
-
- free_iod:
-	kfree(iod);
- put_pages:
-	for (i = 0; i < count; i++)
-		put_page(pages[i]);
-	kfree(pages);
-	return ERR_PTR(err);
-}
-
-void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
-			struct nvme_iod *iod)
-{
-	int i;
-
-	dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
-				write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
-	for (i = 0; i < iod->nents; i++)
-		put_page(sg_page(&iod->sg[i]));
-}
-
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 {
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_user_io io;
 	struct nvme_command c;
-	unsigned length, meta_len, prp_len;
+	unsigned length, meta_len;
 	int status, write;
-	struct nvme_iod *iod;
 	dma_addr_t meta_dma = 0;
 	void *meta = NULL;
+	void __user *metadata;
 
 	if (copy_from_user(&io, uio, sizeof(io)))
 		return -EFAULT;
-	length = (io.nblocks + 1) << ns->lba_shift;
-	meta_len = (io.nblocks + 1) * ns->ms;
-
-	if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext)
-		return -EINVAL;
-	else if (meta_len && ns->ext) {
-		length += meta_len;
-		meta_len = 0;
-	}
-
-	write = io.opcode & 1;
 
 	switch (io.opcode) {
 	case nvme_cmd_write:
 	case nvme_cmd_read:
 	case nvme_cmd_compare:
-		iod = nvme_map_user_pages(dev, write, io.addr, length);
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	if (IS_ERR(iod))
-		return PTR_ERR(iod);
+	length = (io.nblocks + 1) << ns->lba_shift;
+	meta_len = (io.nblocks + 1) * ns->ms;
+	metadata = (void __user *)(unsigned long)io.metadata;
+	write = io.opcode & 1;
 
-	prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
-	if (length != prp_len) {
-		status = -ENOMEM;
-		goto unmap;
+	if (ns->ext) {
+		length += meta_len;
+		meta_len = 0;
 	}
 	if (meta_len) {
-		meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
+		if (((io.metadata & 3) || !io.metadata) && !ns->ext)
+			return -EINVAL;
+
+		meta = dma_alloc_coherent(dev->dev, meta_len,
 						&meta_dma, GFP_KERNEL);
+
 		if (!meta) {
 			status = -ENOMEM;
 			goto unmap;
 		}
 		if (write) {
-			if (copy_from_user(meta, (void __user *)io.metadata,
-								meta_len)) {
+			if (copy_from_user(meta, metadata, meta_len)) {
 				status = -EFAULT;
 				goto unmap;
 			}
@@ -1810,20 +1784,17 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	c.rw.reftag = cpu_to_le32(io.reftag);
 	c.rw.apptag = cpu_to_le16(io.apptag);
 	c.rw.appmask = cpu_to_le16(io.appmask);
-	c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-	c.rw.prp2 = cpu_to_le64(iod->first_dma);
 	c.rw.metadata = cpu_to_le64(meta_dma);
-	status = nvme_submit_io_cmd(dev, ns, &c, NULL);
+
+	status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
+			(void __user *)io.addr, length, NULL, 0);
  unmap:
-	nvme_unmap_user_pages(dev, write, iod);
-	nvme_free_iod(dev, iod);
 	if (meta) {
 		if (status == NVME_SC_SUCCESS && !write) {
-			if (copy_to_user((void __user *)io.metadata, meta,
-								meta_len))
+			if (copy_to_user(metadata, meta, meta_len))
 				status = -EFAULT;
 		}
-		dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma);
+		dma_free_coherent(dev->dev, meta_len, meta, meta_dma);
 	}
 	return status;
 }
@@ -1833,9 +1804,8 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
 {
 	struct nvme_passthru_cmd cmd;
 	struct nvme_command c;
-	int status, length;
-	struct nvme_iod *uninitialized_var(iod);
-	unsigned timeout;
+	unsigned timeout = 0;
+	int status;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
@@ -1855,46 +1825,17 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
 	c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
 	c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
 
-	length = cmd.data_len;
-	if (cmd.data_len) {
-		iod = nvme_map_user_pages(dev, cmd.opcode & 1, cmd.addr,
-								length);
-		if (IS_ERR(iod))
-			return PTR_ERR(iod);
-		length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
-		c.common.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-		c.common.prp2 = cpu_to_le64(iod->first_dma);
-	}
-
-	timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) :
-								ADMIN_TIMEOUT;
-
-	if (length != cmd.data_len)
-		status = -ENOMEM;
-	else if (ns) {
-		struct request *req;
-
-		req = blk_mq_alloc_request(ns->queue, WRITE,
-						(GFP_KERNEL|__GFP_WAIT), false);
-		if (IS_ERR(req))
-			status = PTR_ERR(req);
-		else {
-			status = nvme_submit_sync_cmd(req, &c, &cmd.result,
-								timeout);
-			blk_mq_free_request(req);
-		}
-	} else
-		status = __nvme_submit_admin_cmd(dev, &c, &cmd.result, timeout);
+	if (cmd.timeout_ms)
+		timeout = msecs_to_jiffies(cmd.timeout_ms);
 
-	if (cmd.data_len) {
-		nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
-		nvme_free_iod(dev, iod);
+	status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
+			NULL, (void __user *)cmd.addr, cmd.data_len,
+			&cmd.result, timeout);
+	if (status >= 0) {
+		if (put_user(cmd.result, &ucmd->result))
+			return -EFAULT;
 	}
 
-	if ((status >= 0) && copy_to_user(&ucmd->result, &cmd.result,
-							sizeof(cmd.result)))
-		status = -EFAULT;
-
 	return status;
 }
 
@@ -1986,23 +1927,18 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 	struct nvme_ns *ns = disk->private_data;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_id_ns *id;
-	dma_addr_t dma_addr;
 	u8 lbaf, pi_type;
 	u16 old_ms;
 	unsigned short bs;
 
-	id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
-								GFP_KERNEL);
-	if (!id) {
-		dev_warn(&dev->pci_dev->dev, "%s: Memory alocation failure\n",
-								__func__);
-		return 0;
+	if (nvme_identify_ns(dev, ns->ns_id, &id)) {
+		dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__,
+						dev->instance, ns->ns_id);
+		return -ENODEV;
 	}
-	if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) {
-		dev_warn(&dev->pci_dev->dev,
-			"identify failed ns:%d, setting capacity to 0\n",
-			ns->ns_id);
-		memset(id, 0, sizeof(*id));
+	if (id->ncap == 0) {
+		kfree(id);
+		return -ENODEV;
 	}
 
 	old_ms = ns->ms;
@@ -2036,7 +1972,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 								!ns->ext)
 		nvme_init_integrity(ns);
 
-	if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
+	if (ns->ms && !blk_get_integrity(disk))
 		set_capacity(disk, 0);
 	else
 		set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
@@ -2044,7 +1980,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 	if (dev->oncs & NVME_CTRL_ONCS_DSM)
 		nvme_config_discard(ns);
 
-	dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
+	kfree(id);
 	return 0;
 }
 
@@ -2071,7 +2007,7 @@ static int nvme_kthread(void *data)
 				if (work_busy(&dev->reset_work))
 					continue;
 				list_del_init(&dev->node);
-				dev_warn(&dev->pci_dev->dev,
+				dev_warn(dev->dev,
 					"Failed status: %x, reset controller\n",
 					readl(&dev->bar->csts));
 				dev->reset_workfn = nvme_reset_failed_dev;
@@ -2103,7 +2039,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 {
 	struct nvme_ns *ns;
 	struct gendisk *disk;
-	int node = dev_to_node(&dev->pci_dev->dev);
+	int node = dev_to_node(dev->dev);
 
 	ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
 	if (!ns)
@@ -2151,11 +2087,16 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 	 * requires it.
 	 */
 	set_capacity(disk, 0);
-	nvme_revalidate_disk(ns->disk);
+	if (nvme_revalidate_disk(ns->disk))
+		goto out_free_disk;
+
 	add_disk(ns->disk);
 	if (ns->ms)
 		revalidate_disk(ns->disk);
 	return;
+ out_free_disk:
+	kfree(disk);
+	list_del(&ns->list);
  out_free_queue:
 	blk_cleanup_queue(ns->queue);
  out_free_ns:
@@ -2186,8 +2127,7 @@ static int set_queue_count(struct nvme_dev *dev, int count)
 	if (status < 0)
 		return status;
 	if (status > 0) {
-		dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n",
-									status);
+		dev_err(dev->dev, "Could not set queue count (%d)\n", status);
 		return 0;
 	}
 	return min(result & 0xffff, result >> 16) + 1;
@@ -2201,7 +2141,7 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
 static int nvme_setup_io_queues(struct nvme_dev *dev)
 {
 	struct nvme_queue *adminq = dev->queues[0];
-	struct pci_dev *pdev = dev->pci_dev;
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	int result, i, vecs, nr_io_queues, size;
 
 	nr_io_queues = num_possible_cpus();
@@ -2273,6 +2213,99 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	return result;
 }
 
+static void nvme_free_namespace(struct nvme_ns *ns)
+{
+	list_del(&ns->list);
+
+	spin_lock(&dev_list_lock);
+	ns->disk->private_data = NULL;
+	spin_unlock(&dev_list_lock);
+
+	put_disk(ns->disk);
+	kfree(ns);
+}
+
+static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
+	struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
+
+	return nsa->ns_id - nsb->ns_id;
+}
+
+static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
+{
+	struct nvme_ns *ns;
+
+	list_for_each_entry(ns, &dev->namespaces, list) {
+		if (ns->ns_id == nsid)
+			return ns;
+		if (ns->ns_id > nsid)
+			break;
+	}
+	return NULL;
+}
+
+static inline bool nvme_io_incapable(struct nvme_dev *dev)
+{
+	return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS ||
+							dev->online_queues < 2);
+}
+
+static void nvme_ns_remove(struct nvme_ns *ns)
+{
+	bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue);
+
+	if (kill)
+		blk_set_queue_dying(ns->queue);
+	if (ns->disk->flags & GENHD_FL_UP) {
+		if (blk_get_integrity(ns->disk))
+			blk_integrity_unregister(ns->disk);
+		del_gendisk(ns->disk);
+	}
+	if (kill || !blk_queue_dying(ns->queue)) {
+		blk_mq_abort_requeue_list(ns->queue);
+		blk_cleanup_queue(ns->queue);
+        }
+}
+
+static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
+{
+	struct nvme_ns *ns, *next;
+	unsigned i;
+
+	for (i = 1; i <= nn; i++) {
+		ns = nvme_find_ns(dev, i);
+		if (ns) {
+			if (revalidate_disk(ns->disk)) {
+				nvme_ns_remove(ns);
+				nvme_free_namespace(ns);
+			}
+		} else
+			nvme_alloc_ns(dev, i);
+	}
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+		if (ns->ns_id > nn) {
+			nvme_ns_remove(ns);
+			nvme_free_namespace(ns);
+		}
+	}
+	list_sort(NULL, &dev->namespaces, ns_cmp);
+}
+
+static void nvme_dev_scan(struct work_struct *work)
+{
+	struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
+	struct nvme_id_ctrl *ctrl;
+
+	if (!dev->tagset.tags)
+		return;
+	if (nvme_identify_ctrl(dev, &ctrl))
+		return;
+	nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
+	kfree(ctrl);
+}
+
 /*
  * Return: error value if an error occurred setting up the queues or calling
  * Identify Device.  0 if these succeeded, even if adding some of the
@@ -2281,26 +2314,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
  */
 static int nvme_dev_add(struct nvme_dev *dev)
 {
-	struct pci_dev *pdev = dev->pci_dev;
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	int res;
-	unsigned nn, i;
+	unsigned nn;
 	struct nvme_id_ctrl *ctrl;
-	void *mem;
-	dma_addr_t dma_addr;
 	int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
 
-	mem = dma_alloc_coherent(&pdev->dev, 4096, &dma_addr, GFP_KERNEL);
-	if (!mem)
-		return -ENOMEM;
-
-	res = nvme_identify(dev, 0, 1, dma_addr);
+	res = nvme_identify_ctrl(dev, &ctrl);
 	if (res) {
-		dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res);
-		dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr);
+		dev_err(dev->dev, "Identify Controller failed (%d)\n", res);
 		return -EIO;
 	}
 
-	ctrl = mem;
 	nn = le32_to_cpup(&ctrl->nn);
 	dev->oncs = le16_to_cpup(&ctrl->oncs);
 	dev->abort_limit = ctrl->acl + 1;
@@ -2322,12 +2347,12 @@ static int nvme_dev_add(struct nvme_dev *dev)
 		} else
 			dev->max_hw_sectors = max_hw_sectors;
 	}
-	dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr);
+	kfree(ctrl);
 
 	dev->tagset.ops = &nvme_mq_ops;
 	dev->tagset.nr_hw_queues = dev->online_queues - 1;
 	dev->tagset.timeout = NVME_IO_TIMEOUT;
-	dev->tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
+	dev->tagset.numa_node = dev_to_node(dev->dev);
 	dev->tagset.queue_depth =
 				min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
 	dev->tagset.cmd_size = nvme_cmd_size(dev);
@@ -2337,9 +2362,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	if (blk_mq_alloc_tag_set(&dev->tagset))
 		return 0;
 
-	for (i = 1; i <= nn; i++)
-		nvme_alloc_ns(dev, i);
-
+	schedule_work(&dev->scan_work);
 	return 0;
 }
 
@@ -2347,7 +2370,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
 {
 	u64 cap;
 	int bars, result = -ENOMEM;
-	struct pci_dev *pdev = dev->pci_dev;
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
 	if (pci_enable_device_mem(pdev))
 		return result;
@@ -2361,8 +2384,8 @@ static int nvme_dev_map(struct nvme_dev *dev)
 	if (pci_request_selected_regions(pdev, bars, "nvme"))
 		goto disable_pci;
 
-	if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) &&
-	    dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
+	if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
+	    dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
 		goto disable;
 
 	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
@@ -2403,19 +2426,21 @@ static int nvme_dev_map(struct nvme_dev *dev)
 
 static void nvme_dev_unmap(struct nvme_dev *dev)
 {
-	if (dev->pci_dev->msi_enabled)
-		pci_disable_msi(dev->pci_dev);
-	else if (dev->pci_dev->msix_enabled)
-		pci_disable_msix(dev->pci_dev);
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+	if (pdev->msi_enabled)
+		pci_disable_msi(pdev);
+	else if (pdev->msix_enabled)
+		pci_disable_msix(pdev);
 
 	if (dev->bar) {
 		iounmap(dev->bar);
 		dev->bar = NULL;
-		pci_release_regions(dev->pci_dev);
+		pci_release_regions(pdev);
 	}
 
-	if (pci_is_enabled(dev->pci_dev))
-		pci_disable_device(dev->pci_dev);
+	if (pci_is_enabled(pdev))
+		pci_disable_device(pdev);
 }
 
 struct nvme_delq_ctx {
@@ -2534,7 +2559,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev)
 					&worker, "nvme%d", dev->instance);
 
 	if (IS_ERR(kworker_task)) {
-		dev_err(&dev->pci_dev->dev,
+		dev_err(dev->dev,
 			"Failed to create queue del task\n");
 		for (i = dev->queue_count - 1; i > 0; i--)
 			nvme_disable_queue(dev, i);
@@ -2585,9 +2610,9 @@ static void nvme_freeze_queues(struct nvme_dev *dev)
 	list_for_each_entry(ns, &dev->namespaces, list) {
 		blk_mq_freeze_queue_start(ns->queue);
 
-		spin_lock(ns->queue->queue_lock);
+		spin_lock_irq(ns->queue->queue_lock);
 		queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
-		spin_unlock(ns->queue->queue_lock);
+		spin_unlock_irq(ns->queue->queue_lock);
 
 		blk_mq_cancel_requeue_work(ns->queue);
 		blk_mq_stop_hw_queues(ns->queue);
@@ -2637,29 +2662,19 @@ static void nvme_dev_remove(struct nvme_dev *dev)
 {
 	struct nvme_ns *ns;
 
-	list_for_each_entry(ns, &dev->namespaces, list) {
-		if (ns->disk->flags & GENHD_FL_UP) {
-			if (blk_get_integrity(ns->disk))
-				blk_integrity_unregister(ns->disk);
-			del_gendisk(ns->disk);
-		}
-		if (!blk_queue_dying(ns->queue)) {
-			blk_mq_abort_requeue_list(ns->queue);
-			blk_cleanup_queue(ns->queue);
-		}
-	}
+	list_for_each_entry(ns, &dev->namespaces, list)
+		nvme_ns_remove(ns);
 }
 
 static int nvme_setup_prp_pools(struct nvme_dev *dev)
 {
-	struct device *dmadev = &dev->pci_dev->dev;
-	dev->prp_page_pool = dma_pool_create("prp list page", dmadev,
+	dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
 						PAGE_SIZE, PAGE_SIZE, 0);
 	if (!dev->prp_page_pool)
 		return -ENOMEM;
 
 	/* Optimisation for I/Os between 4k and 128k */
-	dev->prp_small_pool = dma_pool_create("prp list 256", dmadev,
+	dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev,
 						256, 256, 0);
 	if (!dev->prp_small_pool) {
 		dma_pool_destroy(dev->prp_page_pool);
@@ -2707,23 +2722,15 @@ static void nvme_free_namespaces(struct nvme_dev *dev)
 {
 	struct nvme_ns *ns, *next;
 
-	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
-		list_del(&ns->list);
-
-		spin_lock(&dev_list_lock);
-		ns->disk->private_data = NULL;
-		spin_unlock(&dev_list_lock);
-
-		put_disk(ns->disk);
-		kfree(ns);
-	}
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list)
+		nvme_free_namespace(ns);
 }
 
 static void nvme_free_dev(struct kref *kref)
 {
 	struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
 
-	pci_dev_put(dev->pci_dev);
+	put_device(dev->dev);
 	put_device(dev->device);
 	nvme_free_namespaces(dev);
 	nvme_release_instance(dev);
@@ -2779,6 +2786,9 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			return -ENOTTY;
 		ns = list_first_entry(&dev->namespaces, struct nvme_ns, list);
 		return nvme_user_cmd(dev, ns, (void __user *)arg);
+	case NVME_IOCTL_RESET:
+		dev_warn(dev->dev, "resetting controller\n");
+		return nvme_reset(dev);
 	default:
 		return -ENOTTY;
 	}
@@ -2800,11 +2810,11 @@ static void nvme_set_irq_hints(struct nvme_dev *dev)
 	for (i = 0; i < dev->online_queues; i++) {
 		nvmeq = dev->queues[i];
 
-		if (!nvmeq->hctx)
+		if (!nvmeq->tags || !(*nvmeq->tags))
 			continue;
 
 		irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
-							nvmeq->hctx->cpumask);
+					blk_mq_tags_cpumask(*nvmeq->tags));
 	}
 }
 
@@ -2867,7 +2877,7 @@ static int nvme_dev_start(struct nvme_dev *dev)
 static int nvme_remove_dead_ctrl(void *arg)
 {
 	struct nvme_dev *dev = (struct nvme_dev *)arg;
-	struct pci_dev *pdev = dev->pci_dev;
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
 	if (pci_get_drvdata(pdev))
 		pci_stop_and_remove_bus_device_locked(pdev);
@@ -2897,6 +2907,7 @@ static int nvme_dev_resume(struct nvme_dev *dev)
 		spin_unlock(&dev_list_lock);
 	} else {
 		nvme_unfreeze_queues(dev);
+		schedule_work(&dev->scan_work);
 		nvme_set_irq_hints(dev);
 	}
 	return 0;
@@ -2906,11 +2917,11 @@ static void nvme_dev_reset(struct nvme_dev *dev)
 {
 	nvme_dev_shutdown(dev);
 	if (nvme_dev_resume(dev)) {
-		dev_warn(&dev->pci_dev->dev, "Device failed to resume\n");
+		dev_warn(dev->dev, "Device failed to resume\n");
 		kref_get(&dev->kref);
 		if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
 							dev->instance))) {
-			dev_err(&dev->pci_dev->dev,
+			dev_err(dev->dev,
 				"Failed to start controller remove task\n");
 			kref_put(&dev->kref, nvme_free_dev);
 		}
@@ -2929,6 +2940,44 @@ static void nvme_reset_workfn(struct work_struct *work)
 	dev->reset_workfn(work);
 }
 
+static int nvme_reset(struct nvme_dev *dev)
+{
+	int ret = -EBUSY;
+
+	if (!dev->admin_q || blk_queue_dying(dev->admin_q))
+		return -ENODEV;
+
+	spin_lock(&dev_list_lock);
+	if (!work_pending(&dev->reset_work)) {
+		dev->reset_workfn = nvme_reset_failed_dev;
+		queue_work(nvme_workq, &dev->reset_work);
+		ret = 0;
+	}
+	spin_unlock(&dev_list_lock);
+
+	if (!ret) {
+		flush_work(&dev->reset_work);
+		return 0;
+	}
+
+	return ret;
+}
+
+static ssize_t nvme_sysfs_reset(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
+{
+	struct nvme_dev *ndev = dev_get_drvdata(dev);
+	int ret;
+
+	ret = nvme_reset(ndev);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
 static void nvme_async_probe(struct work_struct *work);
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
@@ -2954,7 +3003,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	INIT_LIST_HEAD(&dev->namespaces);
 	dev->reset_workfn = nvme_reset_failed_dev;
 	INIT_WORK(&dev->reset_work, nvme_reset_workfn);
-	dev->pci_dev = pci_dev_get(pdev);
+	dev->dev = get_device(&pdev->dev);
 	pci_set_drvdata(pdev, dev);
 	result = nvme_set_instance(dev);
 	if (result)
@@ -2973,18 +3022,27 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto release_pools;
 	}
 	get_device(dev->device);
+	dev_set_drvdata(dev->device, dev);
+
+	result = device_create_file(dev->device, &dev_attr_reset_controller);
+	if (result)
+		goto put_dev;
 
 	INIT_LIST_HEAD(&dev->node);
+	INIT_WORK(&dev->scan_work, nvme_dev_scan);
 	INIT_WORK(&dev->probe_work, nvme_async_probe);
 	schedule_work(&dev->probe_work);
 	return 0;
 
+ put_dev:
+	device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
+	put_device(dev->device);
  release_pools:
 	nvme_release_prp_pools(dev);
  release:
 	nvme_release_instance(dev);
  put_pci:
-	pci_dev_put(dev->pci_dev);
+	put_device(dev->dev);
  free:
 	kfree(dev->queues);
 	kfree(dev->entry);
@@ -3009,10 +3067,12 @@ static void nvme_async_probe(struct work_struct *work)
 	nvme_set_irq_hints(dev);
 	return;
  reset:
+	spin_lock(&dev_list_lock);
 	if (!work_busy(&dev->reset_work)) {
 		dev->reset_workfn = nvme_reset_failed_dev;
 		queue_work(nvme_workq, &dev->reset_work);
 	}
+	spin_unlock(&dev_list_lock);
 }
 
 static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
@@ -3042,6 +3102,8 @@ static void nvme_remove(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->probe_work);
 	flush_work(&dev->reset_work);
+	flush_work(&dev->scan_work);
+	device_remove_file(dev->device, &dev_attr_reset_controller);
 	nvme_dev_shutdown(dev);
 	nvme_dev_remove(dev);
 	nvme_dev_remove_admin(dev);
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 6b736b00f63e..e5a63f06fb0f 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -41,15 +41,13 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <asm/unaligned.h>
 #include <scsi/sg.h>
 #include <scsi/scsi.h>
 
 
 static int sg_version_num = 30534;	/* 2 digits for each component */
 
-#define SNTI_TRANSLATION_SUCCESS			0
-#define SNTI_INTERNAL_ERROR				1
-
 /* VPD Page Codes */
 #define VPD_SUPPORTED_PAGES				0x00
 #define VPD_SERIAL_NUMBER				0x80
@@ -58,49 +56,14 @@ static int sg_version_num = 30534;	/* 2 digits for each component */
 #define VPD_BLOCK_LIMITS				0xB0
 #define VPD_BLOCK_DEV_CHARACTERISTICS			0xB1
 
-/* CDB offsets */
-#define REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET		6
-#define REPORT_LUNS_SR_OFFSET				2
-#define READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET		10
-#define REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET		4
-#define REQUEST_SENSE_DESC_OFFSET			1
-#define REQUEST_SENSE_DESC_MASK				0x01
-#define DESCRIPTOR_FORMAT_SENSE_DATA_TYPE		1
-#define INQUIRY_EVPD_BYTE_OFFSET			1
-#define INQUIRY_PAGE_CODE_BYTE_OFFSET			2
-#define INQUIRY_EVPD_BIT_MASK				1
-#define INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET		3
-#define START_STOP_UNIT_CDB_IMMED_OFFSET		1
-#define START_STOP_UNIT_CDB_IMMED_MASK			0x1
-#define START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET	3
-#define START_STOP_UNIT_CDB_POWER_COND_MOD_MASK		0xF
-#define START_STOP_UNIT_CDB_POWER_COND_OFFSET		4
-#define START_STOP_UNIT_CDB_POWER_COND_MASK		0xF0
-#define START_STOP_UNIT_CDB_NO_FLUSH_OFFSET		4
-#define START_STOP_UNIT_CDB_NO_FLUSH_MASK		0x4
-#define START_STOP_UNIT_CDB_START_OFFSET		4
-#define START_STOP_UNIT_CDB_START_MASK			0x1
-#define WRITE_BUFFER_CDB_MODE_OFFSET			1
-#define WRITE_BUFFER_CDB_MODE_MASK			0x1F
-#define WRITE_BUFFER_CDB_BUFFER_ID_OFFSET		2
-#define WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET		3
-#define WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET	6
-#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET		1
-#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK		0xC0
-#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT		6
-#define FORMAT_UNIT_CDB_LONG_LIST_OFFSET		1
-#define FORMAT_UNIT_CDB_LONG_LIST_MASK			0x20
-#define FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET		1
-#define FORMAT_UNIT_CDB_FORMAT_DATA_MASK		0x10
+/* format unit paramter list offsets */
 #define FORMAT_UNIT_SHORT_PARM_LIST_LEN			4
 #define FORMAT_UNIT_LONG_PARM_LIST_LEN			8
 #define FORMAT_UNIT_PROT_INT_OFFSET			3
 #define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET		0
 #define FORMAT_UNIT_PROT_FIELD_USAGE_MASK		0x07
-#define UNMAP_CDB_PARAM_LIST_LENGTH_OFFSET		7
 
 /* Misc. defines */
-#define NIBBLE_SHIFT					4
 #define FIXED_SENSE_DATA				0x70
 #define DESC_FORMAT_SENSE_DATA				0x72
 #define FIXED_SENSE_DATA_ADD_LENGTH			10
@@ -144,27 +107,6 @@ static int sg_version_num = 30534;	/* 2 digits for each component */
 #define EXTENDED_INQUIRY_DATA_PAGE_LENGTH		0x3C
 #define RESERVED_FIELD					0
 
-/* SCSI READ/WRITE Defines */
-#define IO_CDB_WP_MASK					0xE0
-#define IO_CDB_WP_SHIFT					5
-#define IO_CDB_FUA_MASK					0x8
-#define IO_6_CDB_LBA_OFFSET				0
-#define IO_6_CDB_LBA_MASK				0x001FFFFF
-#define IO_6_CDB_TX_LEN_OFFSET				4
-#define IO_6_DEFAULT_TX_LEN				256
-#define IO_10_CDB_LBA_OFFSET				2
-#define IO_10_CDB_TX_LEN_OFFSET				7
-#define IO_10_CDB_WP_OFFSET				1
-#define IO_10_CDB_FUA_OFFSET				1
-#define IO_12_CDB_LBA_OFFSET				2
-#define IO_12_CDB_TX_LEN_OFFSET				6
-#define IO_12_CDB_WP_OFFSET				1
-#define IO_12_CDB_FUA_OFFSET				1
-#define IO_16_CDB_FUA_OFFSET				1
-#define IO_16_CDB_WP_OFFSET				1
-#define IO_16_CDB_LBA_OFFSET				2
-#define IO_16_CDB_TX_LEN_OFFSET				10
-
 /* Mode Sense/Select defines */
 #define MODE_PAGE_INFO_EXCEP				0x1C
 #define MODE_PAGE_CACHING				0x08
@@ -179,23 +121,14 @@ static int sg_version_num = 30534;	/* 2 digits for each component */
 #define MODE_PAGE_INF_EXC_LEN				0x0C
 #define MODE_PAGE_ALL_LEN				0x54
 #define MODE_SENSE6_MPH_SIZE				4
-#define MODE_SENSE6_ALLOC_LEN_OFFSET			4
-#define MODE_SENSE_PAGE_CONTROL_OFFSET			2
 #define MODE_SENSE_PAGE_CONTROL_MASK			0xC0
 #define MODE_SENSE_PAGE_CODE_OFFSET			2
 #define MODE_SENSE_PAGE_CODE_MASK			0x3F
-#define MODE_SENSE_LLBAA_OFFSET				1
 #define MODE_SENSE_LLBAA_MASK				0x10
 #define MODE_SENSE_LLBAA_SHIFT				4
-#define MODE_SENSE_DBD_OFFSET				1
 #define MODE_SENSE_DBD_MASK				8
 #define MODE_SENSE_DBD_SHIFT				3
 #define MODE_SENSE10_MPH_SIZE				8
-#define MODE_SENSE10_ALLOC_LEN_OFFSET			7
-#define MODE_SELECT_CDB_PAGE_FORMAT_OFFSET		1
-#define MODE_SELECT_CDB_SAVE_PAGES_OFFSET		1
-#define MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET	4
-#define MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET	7
 #define MODE_SELECT_CDB_PAGE_FORMAT_MASK		0x10
 #define MODE_SELECT_CDB_SAVE_PAGES_MASK			0x1
 #define MODE_SELECT_6_BD_OFFSET				3
@@ -221,14 +154,11 @@ static int sg_version_num = 30534;	/* 2 digits for each component */
 #define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH		0x07
 #define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE		0x2F
 #define LOG_PAGE_TEMPERATURE_PAGE			0x0D
-#define LOG_SENSE_CDB_SP_OFFSET				1
 #define LOG_SENSE_CDB_SP_NOT_ENABLED			0
-#define LOG_SENSE_CDB_PC_OFFSET				2
 #define LOG_SENSE_CDB_PC_MASK				0xC0
 #define LOG_SENSE_CDB_PC_SHIFT				6
 #define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES		1
 #define LOG_SENSE_CDB_PAGE_CODE_MASK			0x3F
-#define LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET		7
 #define REMAINING_INFO_EXCP_PAGE_LENGTH			0x8
 #define LOG_INFO_EXCP_PAGE_LENGTH			0xC
 #define REMAINING_TEMP_PAGE_LENGTH			0xC
@@ -278,77 +208,11 @@ static int sg_version_num = 30534;	/* 2 digits for each component */
 #define SCSI_ASCQ_POWER_LOSS_EXPECTED			0x08
 #define SCSI_ASCQ_INVALID_LUN_ID			0x09
 
-/**
- * DEVICE_SPECIFIC_PARAMETER in mode parameter header (see sbc2r16) to
- * enable DPOFUA support type 0x10 value.
- */
-#define DEVICE_SPECIFIC_PARAMETER			0
-#define VPD_ID_DESCRIPTOR_LENGTH sizeof(VPD_IDENTIFICATION_DESCRIPTOR)
-
-/* MACROs to extract information from CDBs */
-
-#define GET_OPCODE(cdb)		cdb[0]
-
-#define GET_U8_FROM_CDB(cdb, index) (cdb[index] << 0)
-
-#define GET_U16_FROM_CDB(cdb, index) ((cdb[index] << 8) | (cdb[index + 1] << 0))
-
-#define GET_U24_FROM_CDB(cdb, index) ((cdb[index] << 16) | \
-(cdb[index + 1] <<  8) | \
-(cdb[index + 2] <<  0))
-
-#define GET_U32_FROM_CDB(cdb, index) ((cdb[index] << 24) | \
-(cdb[index + 1] << 16) | \
-(cdb[index + 2] <<  8) | \
-(cdb[index + 3] <<  0))
-
-#define GET_U64_FROM_CDB(cdb, index) ((((u64)cdb[index]) << 56) | \
-(((u64)cdb[index + 1]) << 48) | \
-(((u64)cdb[index + 2]) << 40) | \
-(((u64)cdb[index + 3]) << 32) | \
-(((u64)cdb[index + 4]) << 24) | \
-(((u64)cdb[index + 5]) << 16) | \
-(((u64)cdb[index + 6]) <<  8) | \
-(((u64)cdb[index + 7]) <<  0))
-
-/* Inquiry Helper Macros */
-#define GET_INQ_EVPD_BIT(cdb) \
-((GET_U8_FROM_CDB(cdb, INQUIRY_EVPD_BYTE_OFFSET) &		\
-INQUIRY_EVPD_BIT_MASK) ? 1 : 0)
-
-#define GET_INQ_PAGE_CODE(cdb)					\
-(GET_U8_FROM_CDB(cdb, INQUIRY_PAGE_CODE_BYTE_OFFSET))
-
-#define GET_INQ_ALLOC_LENGTH(cdb)				\
-(GET_U16_FROM_CDB(cdb, INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET))
-
-/* Report LUNs Helper Macros */
-#define GET_REPORT_LUNS_ALLOC_LENGTH(cdb)			\
-(GET_U32_FROM_CDB(cdb, REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET))
-
-/* Read Capacity Helper Macros */
-#define GET_READ_CAP_16_ALLOC_LENGTH(cdb)			\
-(GET_U32_FROM_CDB(cdb, READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET))
-
-#define IS_READ_CAP_16(cdb)					\
-((cdb[0] == SERVICE_ACTION_IN_16 && cdb[1] == SAI_READ_CAPACITY_16) ? 1 : 0)
-
-/* Request Sense Helper Macros */
-#define GET_REQUEST_SENSE_ALLOC_LENGTH(cdb)			\
-(GET_U8_FROM_CDB(cdb, REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET))
-
-/* Mode Sense Helper Macros */
-#define GET_MODE_SENSE_DBD(cdb)					\
-((GET_U8_FROM_CDB(cdb, MODE_SENSE_DBD_OFFSET) & MODE_SENSE_DBD_MASK) >>	\
-MODE_SENSE_DBD_SHIFT)
-
-#define GET_MODE_SENSE_LLBAA(cdb)				\
-((GET_U8_FROM_CDB(cdb, MODE_SENSE_LLBAA_OFFSET) &		\
-MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT)
-
-#define GET_MODE_SENSE_MPH_SIZE(cdb10)				\
-(cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE)
-
+/* copied from drivers/usb/gadget/function/storage_common.h */
+static inline u32 get_unaligned_be24(u8 *buf)
+{
+	return 0xffffff & (u32) get_unaligned_be32(buf - 1);
+}
 
 /* Struct to gather data that needs to be extracted from a SCSI CDB.
    Not conforming to any particular CDB variant, but compatible with all. */
@@ -369,8 +233,6 @@ struct nvme_trans_io_cdb {
 static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
 								unsigned long n)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
-	unsigned long not_copied;
 	int i;
 	void *index = from;
 	size_t remaining = n;
@@ -380,29 +242,25 @@ static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
 		struct sg_iovec sgl;
 
 		for (i = 0; i < hdr->iovec_count; i++) {
-			not_copied = copy_from_user(&sgl, hdr->dxferp +
+			if (copy_from_user(&sgl, hdr->dxferp +
 						i * sizeof(struct sg_iovec),
-						sizeof(struct sg_iovec));
-			if (not_copied)
+						sizeof(struct sg_iovec)))
 				return -EFAULT;
 			xfer_len = min(remaining, sgl.iov_len);
-			not_copied = copy_to_user(sgl.iov_base, index,
-								xfer_len);
-			if (not_copied) {
-				res = -EFAULT;
-				break;
-			}
+			if (copy_to_user(sgl.iov_base, index, xfer_len))
+				return -EFAULT;
+
 			index += xfer_len;
 			remaining -= xfer_len;
 			if (remaining == 0)
 				break;
 		}
-		return res;
+		return 0;
 	}
-	not_copied = copy_to_user(hdr->dxferp, from, n);
-	if (not_copied)
-		res = -EFAULT;
-	return res;
+
+	if (copy_to_user(hdr->dxferp, from, n))
+		return -EFAULT;
+	return 0;
 }
 
 /* Copy data from userspace memory */
@@ -410,8 +268,6 @@ static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
 static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
 								unsigned long n)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
-	unsigned long not_copied;
 	int i;
 	void *index = to;
 	size_t remaining = n;
@@ -421,30 +277,24 @@ static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
 		struct sg_iovec sgl;
 
 		for (i = 0; i < hdr->iovec_count; i++) {
-			not_copied = copy_from_user(&sgl, hdr->dxferp +
+			if (copy_from_user(&sgl, hdr->dxferp +
 						i * sizeof(struct sg_iovec),
-						sizeof(struct sg_iovec));
-			if (not_copied)
+						sizeof(struct sg_iovec)))
 				return -EFAULT;
 			xfer_len = min(remaining, sgl.iov_len);
-			not_copied = copy_from_user(index, sgl.iov_base,
-								xfer_len);
-			if (not_copied) {
-				res = -EFAULT;
-				break;
-			}
+			if (copy_from_user(index, sgl.iov_base, xfer_len))
+				return -EFAULT;
 			index += xfer_len;
 			remaining -= xfer_len;
 			if (remaining == 0)
 				break;
 		}
-		return res;
+		return 0;
 	}
 
-	not_copied = copy_from_user(to, hdr->dxferp, n);
-	if (not_copied)
-		res = -EFAULT;
-	return res;
+	if (copy_from_user(to, hdr->dxferp, n))
+		return -EFAULT;
+	return 0;
 }
 
 /* Status/Sense Buffer Writeback */
@@ -452,7 +302,6 @@ static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
 static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
 				 u8 asc, u8 ascq)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
 	u8 xfer_len;
 	u8 resp[DESC_FMT_SENSE_DATA_SIZE];
 
@@ -477,25 +326,29 @@ static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
 		xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE);
 		hdr->sb_len_wr = xfer_len;
 		if (copy_to_user(hdr->sbp, resp, xfer_len) > 0)
-			res = -EFAULT;
+			return -EFAULT;
 	}
 
-	return res;
+	return 0;
 }
 
+/*
+ * Take a status code from a lowlevel routine, and if it was a positive NVMe
+ * error code update the sense data based on it.  In either case the passed
+ * in value is returned again, unless an -EFAULT from copy_to_user overrides
+ * it.
+ */
 static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
 {
 	u8 status, sense_key, asc, ascq;
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 
 	/* For non-nvme (Linux) errors, simply return the error code */
 	if (nvme_sc < 0)
 		return nvme_sc;
 
 	/* Mask DNR, More, and reserved fields */
-	nvme_sc &= 0x7FF;
-
-	switch (nvme_sc) {
+	switch (nvme_sc & 0x7FF) {
 	/* Generic Command Status */
 	case NVME_SC_SUCCESS:
 		status = SAM_STAT_GOOD;
@@ -662,8 +515,7 @@ static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
 	}
 
 	res = nvme_trans_completion(hdr, status, sense_key, asc, ascq);
-
-	return res;
+	return res ? res : nvme_sc;
 }
 
 /* INQUIRY Helper Functions */
@@ -673,10 +525,8 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
 					int alloc_len)
 {
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	struct nvme_id_ns *id_ns;
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	int xfer_len;
 	u8 resp_data_format = 0x02;
@@ -684,31 +534,17 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
 	u8 cmdque = 0x01 << 1;
 	u8 fw_offset = sizeof(dev->firmware_rev);
 
-	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
-				&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out_dma;
-	}
-
 	/* nvme ns identify - use DPS value for PROTECT field */
-	nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
-	/*
-	 * If nvme_sc was -ve, res will be -ve here.
-	 * If nvme_sc was +ve, the status would bace been translated, and res
-	 *  can only be 0 or -ve.
-	 *    - If 0 && nvme_sc > 0, then go into next if where res gets nvme_sc
-	 *    - If -ve, return because its a Linux error.
-	 */
 	if (res)
-		goto out_free;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_free;
-	}
-	id_ns = mem;
-	(id_ns->dps) ? (protect = 0x01) : (protect = 0);
+		return res;
+
+	if (id_ns->dps)
+		protect = 0x01;
+	else
+		protect = 0;
+	kfree(id_ns);
 
 	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
 	inq_response[2] = VERSION_SPC_4;
@@ -725,20 +561,13 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
 	strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4);
 
 	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- out_free:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
-			  dma_addr);
- out_dma:
-	return res;
+	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 }
 
 static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr, u8 *inq_response,
 					int alloc_len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
 	int xfer_len;
 
 	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
@@ -752,9 +581,7 @@ static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
 	inq_response[9] = INQ_BDEV_LIMITS_PAGE;
 
 	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
-	return res;
+	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 }
 
 static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
@@ -762,7 +589,6 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
 					int alloc_len)
 {
 	struct nvme_dev *dev = ns->dev;
-	int res = SNTI_TRANSLATION_SUCCESS;
 	int xfer_len;
 
 	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
@@ -771,53 +597,42 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
 	strncpy(&inq_response[4], dev->serial, INQ_SERIAL_NUMBER_LENGTH);
 
 	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
-	return res;
+	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 }
 
 static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					u8 *inq_response, int alloc_len)
 {
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	int xfer_len;
 	__be32 tmp_id = cpu_to_be32(ns->ns_id);
 
-	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
-					&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out_dma;
-	}
-
 	memset(inq_response, 0, alloc_len);
 	inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;    /* Page Code */
 	if (readl(&dev->bar->vs) >= NVME_VS(1, 1)) {
-		struct nvme_id_ns *id_ns = mem;
-		void *eui = id_ns->eui64;
-		int len = sizeof(id_ns->eui64);
+		struct nvme_id_ns *id_ns;
+		void *eui;
+		int len;
 
-		nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+		nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		if (res)
-			goto out_free;
-		if (nvme_sc) {
-			res = nvme_sc;
-			goto out_free;
-		}
+			return res;
 
+		eui = id_ns->eui64;
+		len = sizeof(id_ns->eui64);
 		if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) {
 			if (bitmap_empty(eui, len * 8)) {
 				eui = id_ns->nguid;
 				len = sizeof(id_ns->nguid);
 			}
 		}
-		if (bitmap_empty(eui, len * 8))
+		if (bitmap_empty(eui, len * 8)) {
+			kfree(id_ns);
 			goto scsi_string;
+		}
 
 		inq_response[3] = 4 + len; /* Page Length */
 		/* Designation Descriptor start */
@@ -826,14 +641,14 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		inq_response[6] = 0x00;    /* Rsvd */
 		inq_response[7] = len;     /* Designator Length */
 		memcpy(&inq_response[8], eui, len);
+		kfree(id_ns);
 	} else {
  scsi_string:
 		if (alloc_len < 72) {
-			res = nvme_trans_completion(hdr,
+			return nvme_trans_completion(hdr,
 					SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			goto out_free;
 		}
 		inq_response[3] = 0x48;    /* Page Length */
 		/* Designation Descriptor start */
@@ -842,30 +657,22 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		inq_response[6] = 0x00;    /* Rsvd */
 		inq_response[7] = 0x44;    /* Designator Length */
 
-		sprintf(&inq_response[8], "%04x", dev->pci_dev->vendor);
+		sprintf(&inq_response[8], "%04x", to_pci_dev(dev->dev)->vendor);
 		memcpy(&inq_response[12], dev->model, sizeof(dev->model));
 		sprintf(&inq_response[52], "%04x", tmp_id);
 		memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
 	}
 	xfer_len = alloc_len;
-	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- out_free:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
-			  dma_addr);
- out_dma:
-	return res;
+	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 }
 
 static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					int alloc_len)
 {
 	u8 *inq_response;
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	struct nvme_id_ctrl *id_ctrl;
 	struct nvme_id_ns *id_ns;
 	int xfer_len;
@@ -878,45 +685,32 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	u8 luiclr = 0x01;
 
 	inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
-	if (inq_response == NULL) {
-		res = -ENOMEM;
-		goto out_mem;
-	}
-
-	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
-							&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out_dma;
-	}
+	if (inq_response == NULL)
+		return -ENOMEM;
 
-	/* nvme ns identify */
-	nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out_free;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_free;
-	}
-	id_ns = mem;
-	spt = spt_lut[(id_ns->dpc) & 0x07] << 3;
-	(id_ns->dps) ? (protect = 0x01) : (protect = 0);
+		goto out_free_inq;
+
+	spt = spt_lut[id_ns->dpc & 0x07] << 3;
+	if (id_ns->dps)
+		protect = 0x01;
+	else
+		protect = 0;
+	kfree(id_ns);
+
 	grd_chk = protect << 2;
 	app_chk = protect << 1;
 	ref_chk = protect;
 
-	/* nvme controller identify */
-	nvme_sc = nvme_identify(dev, 0, 1, dma_addr);
+	nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out_free;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_free;
-	}
-	id_ctrl = mem;
+		goto out_free_inq;
+
 	v_sup = id_ctrl->vwc;
+	kfree(id_ctrl);
 
 	memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
 	inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE;    /* Page Code */
@@ -932,19 +726,16 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
 	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 
- out_free:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
-			  dma_addr);
- out_dma:
+ out_free_inq:
 	kfree(inq_response);
- out_mem:
 	return res;
 }
 
 static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					u8 *inq_response, int alloc_len)
 {
-	__be32 max_sectors = cpu_to_be32(queue_max_hw_sectors(ns->queue));
+	__be32 max_sectors = cpu_to_be32(
+		nvme_block_nr(ns, queue_max_hw_sectors(ns->queue)));
 	__be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
 	__be32 discard_desc_count = cpu_to_be32(0x100);
 
@@ -964,7 +755,7 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					int alloc_len)
 {
 	u8 *inq_response;
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int xfer_len;
 
 	inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
@@ -993,7 +784,7 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					int alloc_len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int xfer_len;
 	u8 *log_response;
 
@@ -1021,47 +812,30 @@ static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr, int alloc_len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int xfer_len;
 	u8 *log_response;
-	struct nvme_command c;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_smart_log *smart_log;
-	dma_addr_t dma_addr;
-	void *mem;
 	u8 temp_c;
 	u16 temp_k;
 
 	log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
-	if (log_response == NULL) {
-		res = -ENOMEM;
-		goto out_mem;
-	}
+	if (log_response == NULL)
+		return -ENOMEM;
 
-	mem = dma_alloc_coherent(&dev->pci_dev->dev,
-					sizeof(struct nvme_smart_log),
-					&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out_dma;
-	}
+	res = nvme_get_log_page(dev, &smart_log);
+	if (res < 0)
+		goto out_free_response;
 
-	/* Get SMART Log Page */
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_admin_get_log_page;
-	c.common.nsid = cpu_to_le32(0xFFFFFFFF);
-	c.common.prp1 = cpu_to_le64(dma_addr);
-	c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
-			BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
-	res = nvme_submit_admin_cmd(dev, &c, NULL);
 	if (res != NVME_SC_SUCCESS) {
 		temp_c = LOG_TEMP_UNKNOWN;
 	} else {
-		smart_log = mem;
 		temp_k = (smart_log->temperature[1] << 8) +
 				(smart_log->temperature[0]);
 		temp_c = temp_k - KELVIN_TEMP_FACTOR;
 	}
+	kfree(smart_log);
 
 	log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
 	/* Subpage=0x00, Page Length MSB=0 */
@@ -1077,59 +851,39 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
 	xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH);
 	res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
 
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log),
-			  mem, dma_addr);
- out_dma:
+ out_free_response:
 	kfree(log_response);
- out_mem:
 	return res;
 }
 
 static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					int alloc_len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int xfer_len;
 	u8 *log_response;
-	struct nvme_command c;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_smart_log *smart_log;
-	dma_addr_t dma_addr;
-	void *mem;
 	u32 feature_resp;
 	u8 temp_c_cur, temp_c_thresh;
 	u16 temp_k;
 
 	log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
-	if (log_response == NULL) {
-		res = -ENOMEM;
-		goto out_mem;
-	}
+	if (log_response == NULL)
+		return -ENOMEM;
 
-	mem = dma_alloc_coherent(&dev->pci_dev->dev,
-					sizeof(struct nvme_smart_log),
-					&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out_dma;
-	}
+	res = nvme_get_log_page(dev, &smart_log);
+	if (res < 0)
+		goto out_free_response;
 
-	/* Get SMART Log Page */
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_admin_get_log_page;
-	c.common.nsid = cpu_to_le32(0xFFFFFFFF);
-	c.common.prp1 = cpu_to_le64(dma_addr);
-	c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
-			BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
-	res = nvme_submit_admin_cmd(dev, &c, NULL);
 	if (res != NVME_SC_SUCCESS) {
 		temp_c_cur = LOG_TEMP_UNKNOWN;
 	} else {
-		smart_log = mem;
 		temp_k = (smart_log->temperature[1] << 8) +
 				(smart_log->temperature[0]);
 		temp_c_cur = temp_k - KELVIN_TEMP_FACTOR;
 	}
+	kfree(smart_log);
 
 	/* Get Features for Temp Threshold */
 	res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0,
@@ -1158,11 +912,8 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH);
 	res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
 
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log),
-			  mem, dma_addr);
- out_dma:
+ out_free_response:
 	kfree(log_response);
- out_mem:
 	return res;
 }
 
@@ -1173,59 +924,45 @@ static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa,
 {
 	/* Quick check to make sure I don't stomp on my own memory... */
 	if ((cdb10 && len < 8) || (!cdb10 && len < 4))
-		return SNTI_INTERNAL_ERROR;
+		return -EINVAL;
 
 	if (cdb10) {
 		resp[0] = (mode_data_length & 0xFF00) >> 8;
 		resp[1] = (mode_data_length & 0x00FF);
-		/* resp[2] and [3] are zero */
+		resp[3] = 0x10 /* DPOFUA */;
 		resp[4] = llbaa;
 		resp[5] = RESERVED_FIELD;
 		resp[6] = (blk_desc_len & 0xFF00) >> 8;
 		resp[7] = (blk_desc_len & 0x00FF);
 	} else {
 		resp[0] = (mode_data_length & 0x00FF);
-		/* resp[1] and [2] are zero */
+		resp[2] = 0x10 /* DPOFUA */;
 		resp[3] = (blk_desc_len & 0x00FF);
 	}
 
-	return SNTI_TRANSLATION_SUCCESS;
+	return 0;
 }
 
 static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 				    u8 *resp, int len, u8 llbaa)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	struct nvme_id_ns *id_ns;
 	u8 flbas;
 	u32 lba_length;
 
 	if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN)
-		return SNTI_INTERNAL_ERROR;
+		return -EINVAL;
 	else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
-		return SNTI_INTERNAL_ERROR;
-
-	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
-							&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out;
-	}
+		return -EINVAL;
 
-	/* nvme ns identify */
-	nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out_dma;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_dma;
-	}
-	id_ns = mem;
+		return res;
+
 	flbas = (id_ns->flbas) & 0x0F;
 	lba_length = (1 << (id_ns->lbaf[flbas].ds));
 
@@ -1245,10 +982,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		memcpy(&resp[12], &tmp_len, sizeof(u32));
 	}
 
- out_dma:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
-			  dma_addr);
- out:
+	kfree(id_ns);
 	return res;
 }
 
@@ -1257,7 +991,7 @@ static int nvme_trans_fill_control_page(struct nvme_ns *ns,
 					int len)
 {
 	if (len < MODE_PAGE_CONTROL_LEN)
-		return SNTI_INTERNAL_ERROR;
+		return -EINVAL;
 
 	resp[0] = MODE_PAGE_CONTROL;
 	resp[1] = MODE_PAGE_CONTROL_LEN_FIELD;
@@ -1271,78 +1005,69 @@ static int nvme_trans_fill_control_page(struct nvme_ns *ns,
 	resp[9] = 0xFF;
 	/* Bytes 10,11: Extended selftest completion time = 0x0000 */
 
-	return SNTI_TRANSLATION_SUCCESS;
+	return 0;
 }
 
 static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr,
 					u8 *resp, int len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res = 0;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
 	u32 feature_resp;
 	u8 vwc;
 
 	if (len < MODE_PAGE_CACHING_LEN)
-		return SNTI_INTERNAL_ERROR;
+		return -EINVAL;
 
 	nvme_sc = nvme_get_features(dev, NVME_FEAT_VOLATILE_WC, 0, 0,
 								&feature_resp);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out;
-	}
+		return res;
+
 	vwc = feature_resp & 0x00000001;
 
 	resp[0] = MODE_PAGE_CACHING;
 	resp[1] = MODE_PAGE_CACHING_LEN_FIELD;
 	resp[2] = vwc << 2;
-
- out:
-	return res;
+	return 0;
 }
 
 static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr, u8 *resp,
 					int len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
-
 	if (len < MODE_PAGE_POW_CND_LEN)
-		return SNTI_INTERNAL_ERROR;
+		return -EINVAL;
 
 	resp[0] = MODE_PAGE_POWER_CONDITION;
 	resp[1] = MODE_PAGE_POW_CND_LEN_FIELD;
 	/* All other bytes are zero */
 
-	return res;
+	return 0;
 }
 
 static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr, u8 *resp,
 					int len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
-
 	if (len < MODE_PAGE_INF_EXC_LEN)
-		return SNTI_INTERNAL_ERROR;
+		return -EINVAL;
 
 	resp[0] = MODE_PAGE_INFO_EXCEP;
 	resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD;
 	resp[2] = 0x88;
 	/* All other bytes are zero */
 
-	return res;
+	return 0;
 }
 
 static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 				     u8 *resp, int len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	u16 mode_pages_offset_1 = 0;
 	u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4;
 
@@ -1352,23 +1077,18 @@ static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 
 	res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1],
 					MODE_PAGE_CACHING_LEN);
-	if (res != SNTI_TRANSLATION_SUCCESS)
-		goto out;
+	if (res)
+		return res;
 	res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2],
 					MODE_PAGE_CONTROL_LEN);
-	if (res != SNTI_TRANSLATION_SUCCESS)
-		goto out;
+	if (res)
+		return res;
 	res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3],
 					MODE_PAGE_POW_CND_LEN);
-	if (res != SNTI_TRANSLATION_SUCCESS)
-		goto out;
-	res = nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
+	if (res)
+		return res;
+	return nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
 					MODE_PAGE_INF_EXC_LEN);
-	if (res != SNTI_TRANSLATION_SUCCESS)
-		goto out;
-
- out:
-	return res;
 }
 
 static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa)
@@ -1389,7 +1109,7 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr, u8 *, int),
 					u16 mode_pages_tot_len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int xfer_len;
 	u8 *response;
 	u8 dbd, llbaa;
@@ -1398,9 +1118,10 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
 	u16 mode_pages_offset_1;
 	u16 blk_desc_len, blk_desc_offset, mode_data_length;
 
-	dbd = GET_MODE_SENSE_DBD(cmd);
-	llbaa = GET_MODE_SENSE_LLBAA(cmd);
-	mph_size = GET_MODE_SENSE_MPH_SIZE(cdb10);
+	dbd = (cmd[1] & MODE_SENSE_DBD_MASK) >> MODE_SENSE_DBD_SHIFT;
+	llbaa = (cmd[1] & MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT;
+	mph_size = cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE;
+
 	blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa);
 
 	resp_size = mph_size + blk_desc_len + mode_pages_tot_len;
@@ -1418,18 +1139,18 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
 
 	res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
 					llbaa, mode_data_length, blk_desc_len);
-	if (res != SNTI_TRANSLATION_SUCCESS)
+	if (res)
 		goto out_free;
 	if (blk_desc_len > 0) {
 		res = nvme_trans_fill_blk_desc(ns, hdr,
 					       &response[blk_desc_offset],
 					       blk_desc_len, llbaa);
-		if (res != SNTI_TRANSLATION_SUCCESS)
+		if (res)
 			goto out_free;
 	}
 	res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1],
 					mode_pages_tot_len);
-	if (res != SNTI_TRANSLATION_SUCCESS)
+	if (res)
 		goto out_free;
 
 	xfer_len = min(alloc_len, resp_size);
@@ -1484,33 +1205,20 @@ static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns,
 static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 						u8 pc, u8 pcmod, u8 start)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	struct nvme_id_ctrl *id_ctrl;
 	int lowest_pow_st;	/* max npss = lowest power consumption */
 	unsigned ps_desired = 0;
 
-	/* NVMe Controller Identify */
-	mem = dma_alloc_coherent(&dev->pci_dev->dev,
-				sizeof(struct nvme_id_ctrl),
-				&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out;
-	}
-	nvme_sc = nvme_identify(dev, 0, 1, dma_addr);
+	nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out_dma;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_dma;
-	}
-	id_ctrl = mem;
+		return res;
+
 	lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1));
+	kfree(id_ctrl);
 
 	switch (pc) {
 	case NVME_POWER_STATE_START_VALID:
@@ -1550,79 +1258,48 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	}
 	nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0,
 				    NULL);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		goto out_dma;
-	if (nvme_sc)
-		res = nvme_sc;
- out_dma:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem,
-			  dma_addr);
- out:
-	return res;
+	return nvme_trans_status_code(hdr, nvme_sc);
 }
 
-/* Write Buffer Helper Functions */
-/* Also using this for Format Unit with hdr passed as NULL, and buffer_id, 0 */
+static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+					u8 buffer_id)
+{
+	struct nvme_command c;
+	int nvme_sc;
 
-static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+	memset(&c, 0, sizeof(c));
+	c.common.opcode = nvme_admin_activate_fw;
+	c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV);
+
+	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
+	return nvme_trans_status_code(hdr, nvme_sc);
+}
+
+static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					u8 opcode, u32 tot_len, u32 offset,
 					u8 buffer_id)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_command c;
-	struct nvme_iod *iod = NULL;
-	unsigned length;
 
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = opcode;
-	if (opcode == nvme_admin_download_fw) {
-		if (hdr->iovec_count > 0) {
-			/* Assuming SGL is not allowed for this command */
-			res = nvme_trans_completion(hdr,
-						SAM_STAT_CHECK_CONDITION,
-						ILLEGAL_REQUEST,
-						SCSI_ASC_INVALID_CDB,
-						SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			goto out;
-		}
-		iod = nvme_map_user_pages(dev, DMA_TO_DEVICE,
-				(unsigned long)hdr->dxferp, tot_len);
-		if (IS_ERR(iod)) {
-			res = PTR_ERR(iod);
-			goto out;
-		}
-		length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL);
-		if (length != tot_len) {
-			res = -ENOMEM;
-			goto out_unmap;
-		}
-
-		c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-		c.dlfw.prp2 = cpu_to_le64(iod->first_dma);
-		c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
-		c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
-	} else if (opcode == nvme_admin_activate_fw) {
-		u32 cdw10 = buffer_id | NVME_FWACT_REPL_ACTV;
-		c.common.cdw10[0] = cpu_to_le32(cdw10);
+	if (hdr->iovec_count > 0) {
+		/* Assuming SGL is not allowed for this command */
+		return nvme_trans_completion(hdr,
+					SAM_STAT_CHECK_CONDITION,
+					ILLEGAL_REQUEST,
+					SCSI_ASC_INVALID_CDB,
+					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 	}
 
-	nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		goto out_unmap;
-	if (nvme_sc)
-		res = nvme_sc;
-
- out_unmap:
-	if (opcode == nvme_admin_download_fw) {
-		nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod);
-		nvme_free_iod(dev, iod);
-	}
- out:
-	return res;
+	memset(&c, 0, sizeof(c));
+	c.common.opcode = nvme_admin_download_fw;
+	c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
+	c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
+
+	nvme_sc = __nvme_submit_sync_cmd(dev->admin_q, &c, NULL,
+			hdr->dxferp, tot_len, NULL, 0);
+	return nvme_trans_status_code(hdr, nvme_sc);
 }
 
 /* Mode Select Helper Functions */
@@ -1685,7 +1362,7 @@ static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list,
 static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					u8 *mode_page, u8 page_code)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res = 0;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
 	unsigned dword11;
@@ -1696,12 +1373,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		nvme_sc = nvme_set_features(dev, NVME_FEAT_VOLATILE_WC, dword11,
 					    0, NULL);
 		res = nvme_trans_status_code(hdr, nvme_sc);
-		if (res)
-			break;
-		if (nvme_sc) {
-			res = nvme_sc;
-			break;
-		}
 		break;
 	case MODE_PAGE_CONTROL:
 		break;
@@ -1713,8 +1384,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 						ILLEGAL_REQUEST,
 						SCSI_ASC_INVALID_PARAMETER,
 						SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			if (!res)
-				res = SNTI_INTERNAL_ERROR;
 			break;
 		}
 		break;
@@ -1722,8 +1391,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		if (!res)
-			res = SNTI_INTERNAL_ERROR;
 		break;
 	}
 
@@ -1734,7 +1401,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					u8 *cmd, u16 parm_list_len, u8 pf,
 					u8 sp, u8 cdb10)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	u8 *parm_list;
 	u16 bd_len;
 	u8 llbaa = 0;
@@ -1750,7 +1417,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	}
 
 	res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len);
-	if (res != SNTI_TRANSLATION_SUCCESS)
+	if (res)
 		goto out_mem;
 
 	nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa);
@@ -1788,7 +1455,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		mp_size = parm_list[index + 1] + 2;
 		res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index],
 								page_code);
-		if (res != SNTI_TRANSLATION_SUCCESS)
+		if (res)
 			break;
 		index += mp_size;
 	} while (index < parm_list_len);
@@ -1804,12 +1471,9 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
 					     struct sg_io_hdr *hdr)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res = 0;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
-	struct nvme_id_ns *id_ns;
 	u8 flbas;
 
 	/*
@@ -1820,22 +1484,12 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
 	 */
 
 	if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
-		mem = dma_alloc_coherent(&dev->pci_dev->dev,
-			sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL);
-		if (mem == NULL) {
-			res = -ENOMEM;
-			goto out;
-		}
-		/* nvme ns identify */
-		nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+		struct nvme_id_ns *id_ns;
+
+		nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		if (res)
-			goto out_dma;
-		if (nvme_sc) {
-			res = nvme_sc;
-			goto out_dma;
-		}
-		id_ns = mem;
+			return res;
 
 		if (ns->mode_select_num_blocks == 0)
 			ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap);
@@ -1844,18 +1498,17 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
 			ns->mode_select_block_len =
 						(1 << (id_ns->lbaf[flbas].ds));
 		}
- out_dma:
-		dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
-				  mem, dma_addr);
+
+		kfree(id_ns);
 	}
- out:
-	return res;
+
+	return 0;
 }
 
 static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
 					u8 format_prot_info, u8 *nvme_pf_code)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	u8 *parm_list;
 	u8 pf_usage, pf_code;
 
@@ -1865,7 +1518,7 @@ static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
 		goto out;
 	}
 	res = nvme_trans_copy_from_user(hdr, parm_list, len);
-	if (res != SNTI_TRANSLATION_SUCCESS)
+	if (res)
 		goto out_mem;
 
 	if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] &
@@ -1915,11 +1568,9 @@ static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
 static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 				   u8 prot_info)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	struct nvme_id_ns *id_ns;
 	u8 i;
 	u8 flbas, nlbaf;
@@ -1928,22 +1579,11 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	struct nvme_command c;
 
 	/* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
-	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
-							&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out;
-	}
-	/* nvme ns identify */
-	nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out_dma;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_dma;
-	}
-	id_ns = mem;
+		return res;
+
 	flbas = (id_ns->flbas) & 0x0F;
 	nlbaf = id_ns->nlbaf;
 
@@ -1971,69 +1611,13 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	c.format.nsid = cpu_to_le32(ns->ns_id);
 	c.format.cdw10 = cpu_to_le32(cdw10);
 
-	nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL);
+	nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
 	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		goto out_dma;
-	if (nvme_sc)
-		res = nvme_sc;
 
- out_dma:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
-			  dma_addr);
- out:
+	kfree(id_ns);
 	return res;
 }
 
-/* Read/Write Helper Functions */
-
-static inline void nvme_trans_get_io_cdb6(u8 *cmd,
-					struct nvme_trans_io_cdb *cdb_info)
-{
-	cdb_info->fua = 0;
-	cdb_info->prot_info = 0;
-	cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_6_CDB_LBA_OFFSET) &
-					IO_6_CDB_LBA_MASK;
-	cdb_info->xfer_len = GET_U8_FROM_CDB(cmd, IO_6_CDB_TX_LEN_OFFSET);
-
-	/* sbc3r27 sec 5.32 - TRANSFER LEN of 0 implies a 256 Block transfer */
-	if (cdb_info->xfer_len == 0)
-		cdb_info->xfer_len = IO_6_DEFAULT_TX_LEN;
-}
-
-static inline void nvme_trans_get_io_cdb10(u8 *cmd,
-					struct nvme_trans_io_cdb *cdb_info)
-{
-	cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_10_CDB_FUA_OFFSET) &
-					IO_CDB_FUA_MASK;
-	cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_10_CDB_WP_OFFSET) &
-					IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
-	cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_10_CDB_LBA_OFFSET);
-	cdb_info->xfer_len = GET_U16_FROM_CDB(cmd, IO_10_CDB_TX_LEN_OFFSET);
-}
-
-static inline void nvme_trans_get_io_cdb12(u8 *cmd,
-					struct nvme_trans_io_cdb *cdb_info)
-{
-	cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_12_CDB_FUA_OFFSET) &
-					IO_CDB_FUA_MASK;
-	cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_12_CDB_WP_OFFSET) &
-					IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
-	cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_12_CDB_LBA_OFFSET);
-	cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_12_CDB_TX_LEN_OFFSET);
-}
-
-static inline void nvme_trans_get_io_cdb16(u8 *cmd,
-					struct nvme_trans_io_cdb *cdb_info)
-{
-	cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_16_CDB_FUA_OFFSET) &
-					IO_CDB_FUA_MASK;
-	cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_16_CDB_WP_OFFSET) &
-					IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
-	cdb_info->lba = GET_U64_FROM_CDB(cmd, IO_16_CDB_LBA_OFFSET);
-	cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_16_CDB_TX_LEN_OFFSET);
-}
-
 static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr,
 					struct nvme_trans_io_cdb *cdb_info,
 					u32 max_blocks)
@@ -2063,11 +1647,8 @@ static u16 nvme_trans_io_get_control(struct nvme_ns *ns,
 static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 				struct nvme_trans_io_cdb *cdb_info, u8 is_write)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
-	int nvme_sc;
-	struct nvme_dev *dev = ns->dev;
+	int nvme_sc = NVME_SC_SUCCESS;
 	u32 num_cmds;
-	struct nvme_iod *iod;
 	u64 unit_len;
 	u64 unit_num_blocks;	/* Number of blocks to xfer in each nvme cmd */
 	u32 retcode;
@@ -2118,45 +1699,20 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		control = nvme_trans_io_get_control(ns, cdb_info);
 		c.rw.control = cpu_to_le16(control);
 
-		iod = nvme_map_user_pages(dev,
-			(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
-			(unsigned long)next_mapping_addr, unit_len);
-		if (IS_ERR(iod)) {
-			res = PTR_ERR(iod);
-			goto out;
-		}
-		retcode = nvme_setup_prps(dev, iod, unit_len, GFP_KERNEL);
-		if (retcode != unit_len) {
-			nvme_unmap_user_pages(dev,
-				(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
-				iod);
-			nvme_free_iod(dev, iod);
-			res = -ENOMEM;
-			goto out;
+		if (get_capacity(ns->disk) - unit_num_blocks <
+				cdb_info->lba + nvme_offset) {
+			nvme_sc = NVME_SC_LBA_RANGE;
+			break;
 		}
-		c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-		c.rw.prp2 = cpu_to_le64(iod->first_dma);
+		nvme_sc = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
+				next_mapping_addr, unit_len, NULL, 0);
+		if (nvme_sc)
+			break;
 
 		nvme_offset += unit_num_blocks;
-
-		nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL);
-		if (nvme_sc != NVME_SC_SUCCESS) {
-			nvme_unmap_user_pages(dev,
-				(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
-				iod);
-			nvme_free_iod(dev, iod);
-			res = nvme_trans_status_code(hdr, nvme_sc);
-			goto out;
-		}
-		nvme_unmap_user_pages(dev,
-				(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
-				iod);
-		nvme_free_iod(dev, iod);
 	}
-	res = nvme_trans_status_code(hdr, NVME_SC_SUCCESS);
 
- out:
-	return res;
+	return nvme_trans_status_code(hdr, nvme_sc);
 }
 
 
@@ -2165,8 +1721,8 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
-	struct nvme_trans_io_cdb cdb_info;
+	int res = 0;
+	struct nvme_trans_io_cdb cdb_info = { 0, };
 	u8 opcode = cmd[0];
 	u64 xfer_bytes;
 	u64 sum_iov_len = 0;
@@ -2174,27 +1730,52 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
 	int i;
 	size_t not_copied;
 
-	/* Extract Fields from CDB */
+	/*
+	 * The FUA and WPROTECT fields are not supported in 6-byte CDBs,
+	 * but always in the same place for all others.
+	 */
+	switch (opcode) {
+	case WRITE_6:
+	case READ_6:
+		break;
+	default:
+		cdb_info.fua = cmd[1] & 0x8;
+		cdb_info.prot_info = (cmd[1] & 0xe0) >> 5;
+		if (cdb_info.prot_info && !ns->pi_type) {
+			return nvme_trans_completion(hdr,
+					SAM_STAT_CHECK_CONDITION,
+					ILLEGAL_REQUEST,
+					SCSI_ASC_INVALID_CDB,
+					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		}
+	}
+
 	switch (opcode) {
 	case WRITE_6:
 	case READ_6:
-		nvme_trans_get_io_cdb6(cmd, &cdb_info);
+		cdb_info.lba = get_unaligned_be24(&cmd[1]);
+		cdb_info.xfer_len = cmd[4];
+		if (cdb_info.xfer_len == 0)
+			cdb_info.xfer_len = 256;
 		break;
 	case WRITE_10:
 	case READ_10:
-		nvme_trans_get_io_cdb10(cmd, &cdb_info);
+		cdb_info.lba = get_unaligned_be32(&cmd[2]);
+		cdb_info.xfer_len = get_unaligned_be16(&cmd[7]);
 		break;
 	case WRITE_12:
 	case READ_12:
-		nvme_trans_get_io_cdb12(cmd, &cdb_info);
+		cdb_info.lba = get_unaligned_be32(&cmd[2]);
+		cdb_info.xfer_len = get_unaligned_be32(&cmd[6]);
 		break;
 	case WRITE_16:
 	case READ_16:
-		nvme_trans_get_io_cdb16(cmd, &cdb_info);
+		cdb_info.lba = get_unaligned_be64(&cmd[2]);
+		cdb_info.xfer_len = get_unaligned_be32(&cmd[10]);
 		break;
 	default:
 		/* Will never really reach here */
-		res = SNTI_INTERNAL_ERROR;
+		res = -EIO;
 		goto out;
 	}
 
@@ -2236,7 +1817,7 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
 
 	/* Send NVMe IO Command(s) */
 	res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write);
-	if (res != SNTI_TRANSLATION_SUCCESS)
+	if (res)
 		goto out;
 
  out:
@@ -2246,17 +1827,18 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
 static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res = 0;
 	u8 evpd;
 	u8 page_code;
 	int alloc_len;
 	u8 *inq_response;
 
-	evpd = GET_INQ_EVPD_BIT(cmd);
-	page_code = GET_INQ_PAGE_CODE(cmd);
-	alloc_len = GET_INQ_ALLOC_LENGTH(cmd);
+	evpd = cmd[1] & 0x01;
+	page_code = cmd[2];
+	alloc_len = get_unaligned_be16(&cmd[3]);
 
-	inq_response = kmalloc(alloc_len, GFP_KERNEL);
+	inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH),
+				GFP_KERNEL);
 	if (inq_response == NULL) {
 		res = -ENOMEM;
 		goto out_mem;
@@ -2314,29 +1896,27 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	u16 alloc_len;
-	u8 sp;
 	u8 pc;
 	u8 page_code;
 
-	sp = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_SP_OFFSET);
-	if (sp != LOG_SENSE_CDB_SP_NOT_ENABLED) {
+	if (cmd[1] != LOG_SENSE_CDB_SP_NOT_ENABLED) {
 		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 		goto out;
 	}
-	pc = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_PC_OFFSET);
-	page_code = pc & LOG_SENSE_CDB_PAGE_CODE_MASK;
-	pc = (pc & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
+
+	page_code = cmd[2] & LOG_SENSE_CDB_PAGE_CODE_MASK;
+	pc = (cmd[2] & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
 	if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) {
 		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 		goto out;
 	}
-	alloc_len = GET_U16_FROM_CDB(cmd, LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET);
+	alloc_len = get_unaligned_be16(&cmd[7]);
 	switch (page_code) {
 	case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE:
 		res = nvme_trans_log_supp_pages(ns, hdr, alloc_len);
@@ -2361,24 +1941,18 @@ static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
 	u8 cdb10 = 0;
 	u16 parm_list_len;
 	u8 page_format;
 	u8 save_pages;
 
-	page_format = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_PAGE_FORMAT_OFFSET);
-	page_format &= MODE_SELECT_CDB_PAGE_FORMAT_MASK;
+	page_format = cmd[1] & MODE_SELECT_CDB_PAGE_FORMAT_MASK;
+	save_pages = cmd[1] & MODE_SELECT_CDB_SAVE_PAGES_MASK;
 
-	save_pages = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_SAVE_PAGES_OFFSET);
-	save_pages &= MODE_SELECT_CDB_SAVE_PAGES_MASK;
-
-	if (GET_OPCODE(cmd) == MODE_SELECT) {
-		parm_list_len = GET_U8_FROM_CDB(cmd,
-				MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET);
+	if (cmd[0] == MODE_SELECT) {
+		parm_list_len = cmd[4];
 	} else {
-		parm_list_len = GET_U16_FROM_CDB(cmd,
-				MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET);
+		parm_list_len = cmd[7];
 		cdb10 = 1;
 	}
 
@@ -2387,42 +1961,36 @@ static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		 * According to SPC-4 r24, a paramter list length field of 0
 		 * shall not be considered an error
 		 */
-		res = nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
+		return nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
 						page_format, save_pages, cdb10);
 	}
 
-	return res;
+	return 0;
 }
 
 static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res = 0;
 	u16 alloc_len;
 	u8 cdb10 = 0;
-	u8 page_code;
-	u8 pc;
 
-	if (GET_OPCODE(cmd) == MODE_SENSE) {
-		alloc_len = GET_U8_FROM_CDB(cmd, MODE_SENSE6_ALLOC_LEN_OFFSET);
+	if (cmd[0] == MODE_SENSE) {
+		alloc_len = cmd[4];
 	} else {
-		alloc_len = GET_U16_FROM_CDB(cmd,
-						MODE_SENSE10_ALLOC_LEN_OFFSET);
+		alloc_len = get_unaligned_be16(&cmd[7]);
 		cdb10 = 1;
 	}
 
-	pc = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CONTROL_OFFSET) &
-						MODE_SENSE_PAGE_CONTROL_MASK;
-	if (pc != MODE_SENSE_PC_CURRENT_VALUES) {
+	if ((cmd[2] & MODE_SENSE_PAGE_CONTROL_MASK) !=
+			MODE_SENSE_PC_CURRENT_VALUES) {
 		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 		goto out;
 	}
 
-	page_code = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CODE_OFFSET) &
-					MODE_SENSE_PAGE_CODE_MASK;
-	switch (page_code) {
+	switch (cmd[2] & MODE_SENSE_PAGE_CODE_MASK) {
 	case MODE_PAGE_CACHING:
 		res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
 						cdb10,
@@ -2465,47 +2033,34 @@ static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 }
 
 static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
+							u8 *cmd, u8 cdb16)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
-	u32 alloc_len = READ_CAP_10_RESP_SIZE;
-	u32 resp_size = READ_CAP_10_RESP_SIZE;
+	u32 alloc_len;
+	u32 resp_size;
 	u32 xfer_len;
-	u8 cdb16;
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	struct nvme_id_ns *id_ns;
 	u8 *response;
 
-	cdb16 = IS_READ_CAP_16(cmd);
 	if (cdb16) {
-		alloc_len = GET_READ_CAP_16_ALLOC_LENGTH(cmd);
+		alloc_len = get_unaligned_be32(&cmd[10]);
 		resp_size = READ_CAP_16_RESP_SIZE;
+	} else {
+		alloc_len = READ_CAP_10_RESP_SIZE;
+		resp_size = READ_CAP_10_RESP_SIZE;
 	}
 
-	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
-							&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out;
-	}
-	/* nvme ns identify */
-	nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out_dma;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_dma;
-	}
-	id_ns = mem;
+		return res;	
 
 	response = kzalloc(resp_size, GFP_KERNEL);
 	if (response == NULL) {
 		res = -ENOMEM;
-		goto out_dma;
+		goto out_free_id;
 	}
 	nvme_trans_fill_read_cap(response, id_ns, cdb16);
 
@@ -2513,72 +2068,53 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	res = nvme_trans_copy_to_user(hdr, response, xfer_len);
 
 	kfree(response);
- out_dma:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
-			  dma_addr);
- out:
+ out_free_id:
+	kfree(id_ns);
 	return res;
 }
 
 static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	u32 alloc_len, xfer_len, resp_size;
-	u8 select_report;
 	u8 *response;
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	struct nvme_id_ctrl *id_ctrl;
 	u32 ll_length, lun_id;
 	u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
 	__be32 tmp_len;
 
-	alloc_len = GET_REPORT_LUNS_ALLOC_LENGTH(cmd);
-	select_report = GET_U8_FROM_CDB(cmd, REPORT_LUNS_SR_OFFSET);
-
-	if ((select_report != ALL_LUNS_RETURNED) &&
-	    (select_report != ALL_WELL_KNOWN_LUNS_RETURNED) &&
-	    (select_report != RESTRICTED_LUNS_RETURNED)) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
+	switch (cmd[2]) {
+	default:
+		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out;
-	} else {
-		/* NVMe Controller Identify */
-		mem = dma_alloc_coherent(&dev->pci_dev->dev,
-					sizeof(struct nvme_id_ctrl),
-					&dma_addr, GFP_KERNEL);
-		if (mem == NULL) {
-			res = -ENOMEM;
-			goto out;
-		}
-		nvme_sc = nvme_identify(dev, 0, 1, dma_addr);
+	case ALL_LUNS_RETURNED:
+	case ALL_WELL_KNOWN_LUNS_RETURNED:
+	case RESTRICTED_LUNS_RETURNED:
+		nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		if (res)
-			goto out_dma;
-		if (nvme_sc) {
-			res = nvme_sc;
-			goto out_dma;
-		}
-		id_ctrl = mem;
+			return res;
+
 		ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE;
 		resp_size = ll_length + LUN_DATA_HEADER_SIZE;
 
+		alloc_len = get_unaligned_be32(&cmd[6]);
 		if (alloc_len < resp_size) {
 			res = nvme_trans_completion(hdr,
 					SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			goto out_dma;
+			goto out_free_id;
 		}
 
 		response = kzalloc(resp_size, GFP_KERNEL);
 		if (response == NULL) {
 			res = -ENOMEM;
-			goto out_dma;
+			goto out_free_id;
 		}
 
 		/* The first LUN ID will always be 0 per the SAM spec */
@@ -2599,24 +2135,21 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	res = nvme_trans_copy_to_user(hdr, response, xfer_len);
 
 	kfree(response);
- out_dma:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem,
-			  dma_addr);
- out:
+ out_free_id:
+	kfree(id_ctrl);
 	return res;
 }
 
 static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	u8 alloc_len, xfer_len, resp_size;
 	u8 desc_format;
 	u8 *response;
 
-	alloc_len = GET_REQUEST_SENSE_ALLOC_LENGTH(cmd);
-	desc_format = GET_U8_FROM_CDB(cmd, REQUEST_SENSE_DESC_OFFSET);
-	desc_format &= REQUEST_SENSE_DESC_MASK;
+	desc_format = cmd[1] & 0x01;
+	alloc_len = cmd[4];
 
 	resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
 					(FIXED_FMT_SENSE_DATA_SIZE));
@@ -2626,7 +2159,7 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		goto out;
 	}
 
-	if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) {
+	if (desc_format) {
 		/* Descriptor Format Sense Data */
 		response[0] = DESC_FORMAT_SENSE_DATA;
 		response[1] = NO_SENSE;
@@ -2665,95 +2198,58 @@ static int nvme_trans_security_protocol(struct nvme_ns *ns,
 				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 }
 
-static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
+static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
+					struct sg_io_hdr *hdr)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
 	int nvme_sc;
 	struct nvme_command c;
-	u8 immed, pcmod, pc, no_flush, start;
 
-	immed = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_IMMED_OFFSET);
-	pcmod = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET);
-	pc = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_OFFSET);
-	no_flush = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_NO_FLUSH_OFFSET);
-	start = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_START_OFFSET);
+	memset(&c, 0, sizeof(c));
+	c.common.opcode = nvme_cmd_flush;
+	c.common.nsid = cpu_to_le32(ns->ns_id);
 
-	immed &= START_STOP_UNIT_CDB_IMMED_MASK;
-	pcmod &= START_STOP_UNIT_CDB_POWER_COND_MOD_MASK;
-	pc = (pc & START_STOP_UNIT_CDB_POWER_COND_MASK) >> NIBBLE_SHIFT;
-	no_flush &= START_STOP_UNIT_CDB_NO_FLUSH_MASK;
-	start &= START_STOP_UNIT_CDB_START_MASK;
+	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
+	return nvme_trans_status_code(hdr, nvme_sc);
+}
+
+static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+							u8 *cmd)
+{
+	u8 immed, pcmod, pc, no_flush, start;
+
+	immed = cmd[1] & 0x01;
+	pcmod = cmd[3] & 0x0f;
+	pc = (cmd[4] & 0xf0) >> 4;
+	no_flush = cmd[4] & 0x04;
+	start = cmd[4] & 0x01;
 
 	if (immed != 0) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
+		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 	} else {
 		if (no_flush == 0) {
 			/* Issue NVME FLUSH command prior to START STOP UNIT */
-			memset(&c, 0, sizeof(c));
-			c.common.opcode = nvme_cmd_flush;
-			c.common.nsid = cpu_to_le32(ns->ns_id);
-
-			nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL);
-			res = nvme_trans_status_code(hdr, nvme_sc);
+			int res = nvme_trans_synchronize_cache(ns, hdr);
 			if (res)
-				goto out;
-			if (nvme_sc) {
-				res = nvme_sc;
-				goto out;
-			}
+				return res;
 		}
 		/* Setup the expected power state transition */
-		res = nvme_trans_power_state(ns, hdr, pc, pcmod, start);
+		return nvme_trans_power_state(ns, hdr, pc, pcmod, start);
 	}
-
- out:
-	return res;
-}
-
-static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *cmd)
-{
-	int res = SNTI_TRANSLATION_SUCCESS;
-	int nvme_sc;
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_cmd_flush;
-	c.common.nsid = cpu_to_le32(ns->ns_id);
-
-	nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL);
-
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		goto out;
-	if (nvme_sc)
-		res = nvme_sc;
-
- out:
-	return res;
 }
 
 static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	u8 parm_hdr_len = 0;
 	u8 nvme_pf_code = 0;
 	u8 format_prot_info, long_list, format_data;
 
-	format_prot_info = GET_U8_FROM_CDB(cmd,
-				FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET);
-	long_list = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_LONG_LIST_OFFSET);
-	format_data = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET);
-
-	format_prot_info = (format_prot_info &
-				FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK) >>
-				FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT;
-	long_list &= FORMAT_UNIT_CDB_LONG_LIST_MASK;
-	format_data &= FORMAT_UNIT_CDB_FORMAT_DATA_MASK;
+	format_prot_info = (cmd[1] & 0xc0) >> 6;
+	long_list = cmd[1] & 0x20;
+	format_data = cmd[1] & 0x10;
 
 	if (format_data != 0) {
 		if (format_prot_info != 0) {
@@ -2777,16 +2273,16 @@ static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	if (parm_hdr_len > 0) {
 		res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len,
 					format_prot_info, &nvme_pf_code);
-		if (res != SNTI_TRANSLATION_SUCCESS)
+		if (res)
 			goto out;
 	}
 
 	/* Attempt to activate any previously downloaded firmware image */
-	res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, 0, 0, 0);
+	res = nvme_trans_send_activate_fw_cmd(ns, hdr, 0);
 
 	/* Determine Block size and count and send format command */
 	res = nvme_trans_fmt_set_blk_size_count(ns, hdr);
-	if (res != SNTI_TRANSLATION_SUCCESS)
+	if (res)
 		goto out;
 
 	res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code);
@@ -2799,28 +2295,24 @@ static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr,
 					u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
 	struct nvme_dev *dev = ns->dev;
 
 	if (!(readl(&dev->bar->csts) & NVME_CSTS_RDY))
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
+		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					    NOT_READY, SCSI_ASC_LUN_NOT_READY,
 					    SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 	else
-		res = nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
-
-	return res;
+		return nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
 }
 
 static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res = 0;
 	u32 buffer_offset, parm_list_length;
 	u8 buffer_id, mode;
 
-	parm_list_length =
-		GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET);
+	parm_list_length = get_unaligned_be24(&cmd[6]);
 	if (parm_list_length % BYTES_TO_DWORDS != 0) {
 		/* NVMe expects Firmware file to be a whole number of DWORDS */
 		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
@@ -2828,38 +2320,32 @@ static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 		goto out;
 	}
-	buffer_id = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_ID_OFFSET);
+	buffer_id = cmd[2];
 	if (buffer_id > NVME_MAX_FIRMWARE_SLOT) {
 		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 		goto out;
 	}
-	mode = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_MODE_OFFSET) &
-						WRITE_BUFFER_CDB_MODE_MASK;
-	buffer_offset =
-		GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET);
+	mode = cmd[1] & 0x1f;
+	buffer_offset = get_unaligned_be24(&cmd[3]);
 
 	switch (mode) {
 	case DOWNLOAD_SAVE_ACTIVATE:
-		res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw,
+		res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
 						parm_list_length, buffer_offset,
 						buffer_id);
-		if (res != SNTI_TRANSLATION_SUCCESS)
+		if (res)
 			goto out;
-		res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw,
-						parm_list_length, buffer_offset,
-						buffer_id);
+		res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
 		break;
 	case DOWNLOAD_SAVE_DEFER_ACTIVATE:
-		res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw,
+		res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
 						parm_list_length, buffer_offset,
 						buffer_id);
 		break;
 	case ACTIVATE_DEFERRED_MICROCODE:
-		res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw,
-						parm_list_length, buffer_offset,
-						buffer_id);
+		res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
 		break;
 	default:
 		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
@@ -2888,15 +2374,13 @@ struct scsi_unmap_parm_list {
 static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	struct nvme_dev *dev = ns->dev;
 	struct scsi_unmap_parm_list *plist;
 	struct nvme_dsm_range *range;
 	struct nvme_command c;
-	int i, nvme_sc, res = -ENOMEM;
+	int i, nvme_sc, res;
 	u16 ndesc, list_len;
-	dma_addr_t dma_addr;
 
-	list_len = GET_U16_FROM_CDB(cmd, UNMAP_CDB_PARAM_LIST_LENGTH_OFFSET);
+	list_len = get_unaligned_be16(&cmd[7]);
 	if (!list_len)
 		return -EINVAL;
 
@@ -2905,7 +2389,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		return -ENOMEM;
 
 	res = nvme_trans_copy_from_user(hdr, plist, list_len);
-	if (res != SNTI_TRANSLATION_SUCCESS)
+	if (res)
 		goto out;
 
 	ndesc = be16_to_cpu(plist->unmap_blk_desc_data_len) >> 4;
@@ -2914,10 +2398,11 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		goto out;
 	}
 
-	range = dma_alloc_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),
-							&dma_addr, GFP_KERNEL);
-	if (!range)
+	range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL);
+	if (!range) {
+		res = -ENOMEM;
 		goto out;
+	}
 
 	for (i = 0; i < ndesc; i++) {
 		range[i].nlb = cpu_to_le32(be32_to_cpu(plist->desc[i].nlb));
@@ -2928,15 +2413,14 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	memset(&c, 0, sizeof(c));
 	c.dsm.opcode = nvme_cmd_dsm;
 	c.dsm.nsid = cpu_to_le32(ns->ns_id);
-	c.dsm.prp1 = cpu_to_le64(dma_addr);
 	c.dsm.nr = cpu_to_le32(ndesc - 1);
 	c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
 
-	nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL);
+	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range,
+			ndesc * sizeof(*range));
 	res = nvme_trans_status_code(hdr, nvme_sc);
 
-	dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),
-							range, dma_addr);
+	kfree(range);
  out:
 	kfree(plist);
 	return res;
@@ -2991,13 +2475,16 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
 		retcode = nvme_trans_mode_sense(ns, hdr, cmd);
 		break;
 	case READ_CAPACITY:
-		retcode = nvme_trans_read_capacity(ns, hdr, cmd);
+		retcode = nvme_trans_read_capacity(ns, hdr, cmd, 0);
 		break;
 	case SERVICE_ACTION_IN_16:
-		if (IS_READ_CAP_16(cmd))
-			retcode = nvme_trans_read_capacity(ns, hdr, cmd);
-		else
+		switch (cmd[1]) {
+		case SAI_READ_CAPACITY_16:
+			retcode = nvme_trans_read_capacity(ns, hdr, cmd, 1);
+			break;
+		default:
 			goto out;
+		}
 		break;
 	case REPORT_LUNS:
 		retcode = nvme_trans_report_luns(ns, hdr, cmd);
@@ -3013,7 +2500,7 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
 		retcode = nvme_trans_start_stop(ns, hdr, cmd);
 		break;
 	case SYNCHRONIZE_CACHE:
-		retcode = nvme_trans_synchronize_cache(ns, hdr, cmd);
+		retcode = nvme_trans_synchronize_cache(ns, hdr);
 		break;
 	case FORMAT_UNIT:
 		retcode = nvme_trans_format_unit(ns, hdr, cmd);
@@ -3051,15 +2538,16 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
 	if (hdr.cmd_len > BLK_MAX_CDB)
 		return -EINVAL;
 
+	/*
+	 * A positive return code means a NVMe status, which has been
+	 * translated to sense data.
+	 */
 	retcode = nvme_scsi_translate(ns, &hdr);
 	if (retcode < 0)
 		return retcode;
-	if (retcode > 0)
-		retcode = SNTI_TRANSLATION_SUCCESS;
 	if (copy_to_user(u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0)
 		return -EFAULT;
-
-	return retcode;
+	return 0;
 }
 
 int nvme_sg_get_version_num(int __user *ip)
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index d48715b287e6..dbb4da1cdca8 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -442,7 +442,7 @@ static char *pd_buf;		/* buffer for request in progress */
 
 static enum action do_pd_io_start(void)
 {
-	if (pd_req->cmd_type == REQ_TYPE_SPECIAL) {
+	if (pd_req->cmd_type == REQ_TYPE_DRV_PRIV) {
 		phase = pd_special;
 		return pd_special();
 	}
@@ -725,7 +725,7 @@ static int pd_special_command(struct pd_unit *disk,
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
-	rq->cmd_type = REQ_TYPE_SPECIAL;
+	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->special = func;
 
 	err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 09e628dafd9d..4c20c228184c 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -61,6 +61,7 @@
 #include <linux/freezer.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/backing-dev.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_ioctl.h>
 #include <scsi/scsi.h>
diff --git a/drivers/block/pmem.c b/drivers/block/pmem.c
index eabf4a8d0085..095dfaadcaa5 100644
--- a/drivers/block/pmem.c
+++ b/drivers/block/pmem.c
@@ -139,11 +139,11 @@ static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res)
 	}
 
 	/*
-	 * Map the memory as non-cachable, as we can't write back the contents
+	 * Map the memory as write-through, as we can't write back the contents
 	 * of the CPU caches in case of a crash.
 	 */
 	err = -ENOMEM;
-	pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size);
+	pmem->virt_addr = ioremap_wt(pmem->phys_addr, pmem->size);
 	if (!pmem->virt_addr)
 		goto out_release_region;
 
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index ef45cfb98fd2..b1612eb16172 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -1,5 +1,5 @@
 /*
- * ps3vram - Use extra PS3 video ram as MTD block device.
+ * ps3vram - Use extra PS3 video ram as block device.
  *
  * Copyright 2009 Sony Corporation
  *
@@ -73,8 +73,8 @@ struct ps3vram_priv {
 
 	u64 memory_handle;
 	u64 context_handle;
-	u32 *ctrl;
-	void *reports;
+	u32 __iomem *ctrl;
+	void __iomem *reports;
 	u8 *xdr_buf;
 
 	u32 *fifo_base;
@@ -104,7 +104,7 @@ static char *size = "256M";
 module_param(size, charp, 0);
 MODULE_PARM_DESC(size, "memory size");
 
-static u32 *ps3vram_get_notifier(void *reports, int notifier)
+static u32 __iomem *ps3vram_get_notifier(void __iomem *reports, int notifier)
 {
 	return reports + DMA_NOTIFIER_OFFSET_BASE +
 	       DMA_NOTIFIER_SIZE * notifier;
@@ -113,22 +113,22 @@ static u32 *ps3vram_get_notifier(void *reports, int notifier)
 static void ps3vram_notifier_reset(struct ps3_system_bus_device *dev)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
-	u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+	u32 __iomem *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
 	int i;
 
 	for (i = 0; i < 4; i++)
-		notify[i] = 0xffffffff;
+		iowrite32be(0xffffffff, notify + i);
 }
 
 static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev,
 				 unsigned int timeout_ms)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
-	u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+	u32 __iomem *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
 	unsigned long timeout;
 
 	for (timeout = 20; timeout; timeout--) {
-		if (!notify[3])
+		if (!ioread32be(notify + 3))
 			return 0;
 		udelay(10);
 	}
@@ -136,7 +136,7 @@ static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev,
 	timeout = jiffies + msecs_to_jiffies(timeout_ms);
 
 	do {
-		if (!notify[3])
+		if (!ioread32be(notify + 3))
 			return 0;
 		msleep(1);
 	} while (time_before(jiffies, timeout));
@@ -148,8 +148,8 @@ static void ps3vram_init_ring(struct ps3_system_bus_device *dev)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
 
-	priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
-	priv->ctrl[CTRL_GET] = FIFO_BASE + FIFO_OFFSET;
+	iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_PUT);
+	iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_GET);
 }
 
 static int ps3vram_wait_ring(struct ps3_system_bus_device *dev,
@@ -159,14 +159,14 @@ static int ps3vram_wait_ring(struct ps3_system_bus_device *dev,
 	unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
 
 	do {
-		if (priv->ctrl[CTRL_PUT] == priv->ctrl[CTRL_GET])
+		if (ioread32be(priv->ctrl + CTRL_PUT) == ioread32be(priv->ctrl + CTRL_GET))
 			return 0;
 		msleep(1);
 	} while (time_before(jiffies, timeout));
 
 	dev_warn(&dev->core, "FIFO timeout (%08x/%08x/%08x)\n",
-		 priv->ctrl[CTRL_PUT], priv->ctrl[CTRL_GET],
-		 priv->ctrl[CTRL_TOP]);
+		 ioread32be(priv->ctrl + CTRL_PUT), ioread32be(priv->ctrl + CTRL_GET),
+		 ioread32be(priv->ctrl + CTRL_TOP));
 
 	return -ETIMEDOUT;
 }
@@ -189,7 +189,7 @@ static void ps3vram_rewind_ring(struct ps3_system_bus_device *dev)
 
 	ps3vram_out_ring(priv, 0x20000000 | (FIFO_BASE + FIFO_OFFSET));
 
-	priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
+	iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_PUT);
 
 	/* asking the HV for a blit will kick the FIFO */
 	status = lv1_gpu_fb_blit(priv->context_handle, 0, 0, 0, 0);
@@ -207,8 +207,8 @@ static void ps3vram_fire_ring(struct ps3_system_bus_device *dev)
 
 	mutex_lock(&ps3_gpu_mutex);
 
-	priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET +
-			       (priv->fifo_ptr - priv->fifo_base) * sizeof(u32);
+	iowrite32be(FIFO_BASE + FIFO_OFFSET + (priv->fifo_ptr - priv->fifo_base)
+		* sizeof(u32), priv->ctrl + CTRL_PUT);
 
 	/* asking the HV for a blit will kick the FIFO */
 	status = lv1_gpu_fb_blit(priv->context_handle, 0, 0, 0, 0);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 812523330a78..ec6c5c6e1ac9 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2264,6 +2264,11 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
 			result, xferred);
 		if (!img_request->result)
 			img_request->result = result;
+		/*
+		 * Need to end I/O on the entire obj_request worth of
+		 * bytes in case of error.
+		 */
+		xferred = obj_request->length;
 	}
 
 	/* Image object requests don't own their page array */
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 5d552857de41..59c91d49b14b 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -620,7 +620,7 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
 	spin_unlock_irq(&host->lock);
 
 	DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
-	crq->rq->cmd_type = REQ_TYPE_SPECIAL;
+	crq->rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	crq->rq->special = crq;
 	blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
 
@@ -661,7 +661,7 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
 	crq->msg_bucket = (u32) rc;
 
 	DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
-	crq->rq->cmd_type = REQ_TYPE_SPECIAL;
+	crq->rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	crq->rq->special = crq;
 	blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
 
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5ea2f0bbbc7c..d4d05f064d39 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -124,7 +124,7 @@ static inline void virtblk_request_done(struct request *req)
 		req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
 		req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
 		req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
-	} else if (req->cmd_type == REQ_TYPE_SPECIAL) {
+	} else if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
 		req->errors = (error != 0);
 	}
 
@@ -188,7 +188,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 			vbr->out_hdr.sector = 0;
 			vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
 			break;
-		case REQ_TYPE_SPECIAL:
+		case REQ_TYPE_DRV_PRIV:
 			vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID);
 			vbr->out_hdr.sector = 0;
 			vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
@@ -251,7 +251,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
 		return PTR_ERR(req);
 	}
 
-	req->cmd_type = REQ_TYPE_SPECIAL;
+	req->cmd_type = REQ_TYPE_DRV_PRIV;
 	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
 	blk_put_request(req);
 
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index bd2b3bbbb22c..713fc9ff1149 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -265,17 +265,6 @@ static void put_persistent_gnt(struct xen_blkif *blkif,
 	atomic_dec(&blkif->persistent_gnt_in_use);
 }
 
-static void free_persistent_gnts_unmap_callback(int result,
-						struct gntab_unmap_queue_data *data)
-{
-	struct completion *c = data->data;
-
-	/* BUG_ON used to reproduce existing behaviour,
-	   but is this the best way to deal with this? */
-	BUG_ON(result);
-	complete(c);
-}
-
 static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
                                  unsigned int num)
 {
@@ -285,12 +274,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
 	struct rb_node *n;
 	int segs_to_unmap = 0;
 	struct gntab_unmap_queue_data unmap_data;
-	struct completion unmap_completion;
 
-	init_completion(&unmap_completion);
-
-	unmap_data.data = &unmap_completion;
-	unmap_data.done = &free_persistent_gnts_unmap_callback;
 	unmap_data.pages = pages;
 	unmap_data.unmap_ops = unmap;
 	unmap_data.kunmap_ops = NULL;
@@ -310,8 +294,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
 			!rb_next(&persistent_gnt->node)) {
 
 			unmap_data.count = segs_to_unmap;
-			gnttab_unmap_refs_async(&unmap_data);
-			wait_for_completion(&unmap_completion);
+			BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
 
 			put_free_pages(blkif, pages, segs_to_unmap);
 			segs_to_unmap = 0;
@@ -329,8 +312,13 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct persistent_gnt *persistent_gnt;
-	int ret, segs_to_unmap = 0;
+	int segs_to_unmap = 0;
 	struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
+	struct gntab_unmap_queue_data unmap_data;
+
+	unmap_data.pages = pages;
+	unmap_data.unmap_ops = unmap;
+	unmap_data.kunmap_ops = NULL;
 
 	while(!list_empty(&blkif->persistent_purge_list)) {
 		persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
@@ -346,17 +334,16 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
 		pages[segs_to_unmap] = persistent_gnt->page;
 
 		if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
-			ret = gnttab_unmap_refs(unmap, NULL, pages,
-				segs_to_unmap);
-			BUG_ON(ret);
+			unmap_data.count = segs_to_unmap;
+			BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
 			put_free_pages(blkif, pages, segs_to_unmap);
 			segs_to_unmap = 0;
 		}
 		kfree(persistent_gnt);
 	}
 	if (segs_to_unmap > 0) {
-		ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap);
-		BUG_ON(ret);
+		unmap_data.count = segs_to_unmap;
+		BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
 		put_free_pages(blkif, pages, segs_to_unmap);
 	}
 }
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index 6489c0fd0ea6..386ba3d1a6ee 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -23,12 +23,4 @@ config ZRAM_LZ4_COMPRESS
 	default n
 	help
 	  This option enables LZ4 compression algorithm support. Compression
-	  algorithm can be changed using `comp_algorithm' device attribute.
-
-config ZRAM_DEBUG
-	bool "Compressed RAM block device debug support"
-	depends on ZRAM
-	default n
-	help
-	  This option adds additional debugging code to the compressed
-	  RAM block device driver.
+	  algorithm can be changed using `comp_algorithm' device attribute.
+\ No newline at end of file
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index f1ff39a3d1c1..965d1afb0eaa 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -274,7 +274,7 @@ ssize_t zcomp_available_show(const char *comp, char *buf)
 	int i = 0;
 
 	while (backends[i]) {
-		if (sysfs_streq(comp, backends[i]->name))
+		if (!strcmp(comp, backends[i]->name))
 			sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
 					"[%s] ", backends[i]->name);
 		else
@@ -286,6 +286,11 @@ ssize_t zcomp_available_show(const char *comp, char *buf)
 	return sz;
 }
 
+bool zcomp_available_algorithm(const char *comp)
+{
+	return find_backend(comp) != NULL;
+}
+
 bool zcomp_set_max_streams(struct zcomp *comp, int num_strm)
 {
 	return comp->set_max_streams(comp, num_strm);
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index c59d1fca72c0..46e2b9f8f1f0 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -51,6 +51,7 @@ struct zcomp {
 };
 
 ssize_t zcomp_available_show(const char *comp, char *buf);
+bool zcomp_available_algorithm(const char *comp);
 
 struct zcomp *zcomp_create(const char *comp, int max_strm);
 void zcomp_destroy(struct zcomp *comp);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index c94386aa563d..fb655e8d1e3b 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -15,10 +15,6 @@
 #define KMSG_COMPONENT "zram"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
-#ifdef CONFIG_ZRAM_DEBUG
-#define DEBUG
-#endif
-
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/bio.h>
@@ -32,12 +28,16 @@
 #include <linux/string.h>
 #include <linux/vmalloc.h>
 #include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/sysfs.h>
 
 #include "zram_drv.h"
 
-/* Globals */
+static DEFINE_IDR(zram_index_idr);
+/* idr index must be protected */
+static DEFINE_MUTEX(zram_index_mutex);
+
 static int zram_major;
-static struct zram *zram_devices;
 static const char *default_compressor = "lzo";
 
 /* Module params (documentation at end) */
@@ -53,7 +53,7 @@ static inline void deprecated_attr_warn(const char *name)
 }
 
 #define ZRAM_ATTR_RO(name)						\
-static ssize_t name##_show(struct device *d,		\
+static ssize_t name##_show(struct device *d,				\
 				struct device_attribute *attr, char *b)	\
 {									\
 	struct zram *zram = dev_to_zram(d);				\
@@ -74,12 +74,117 @@ static inline struct zram *dev_to_zram(struct device *dev)
 	return (struct zram *)dev_to_disk(dev)->private_data;
 }
 
-static ssize_t disksize_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
+/* flag operations require table entry bit_spin_lock() being held */
+static int zram_test_flag(struct zram_meta *meta, u32 index,
+			enum zram_pageflags flag)
 {
-	struct zram *zram = dev_to_zram(dev);
+	return meta->table[index].value & BIT(flag);
+}
 
-	return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
+static void zram_set_flag(struct zram_meta *meta, u32 index,
+			enum zram_pageflags flag)
+{
+	meta->table[index].value |= BIT(flag);
+}
+
+static void zram_clear_flag(struct zram_meta *meta, u32 index,
+			enum zram_pageflags flag)
+{
+	meta->table[index].value &= ~BIT(flag);
+}
+
+static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+{
+	return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+}
+
+static void zram_set_obj_size(struct zram_meta *meta,
+					u32 index, size_t size)
+{
+	unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
+
+	meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
+}
+
+static inline int is_partial_io(struct bio_vec *bvec)
+{
+	return bvec->bv_len != PAGE_SIZE;
+}
+
+/*
+ * Check if request is within bounds and aligned on zram logical blocks.
+ */
+static inline int valid_io_request(struct zram *zram,
+		sector_t start, unsigned int size)
+{
+	u64 end, bound;
+
+	/* unaligned request */
+	if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
+		return 0;
+	if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
+		return 0;
+
+	end = start + (size >> SECTOR_SHIFT);
+	bound = zram->disksize >> SECTOR_SHIFT;
+	/* out of range range */
+	if (unlikely(start >= bound || end > bound || start > end))
+		return 0;
+
+	/* I/O request is valid */
+	return 1;
+}
+
+static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
+{
+	if (*offset + bvec->bv_len >= PAGE_SIZE)
+		(*index)++;
+	*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
+}
+
+static inline void update_used_max(struct zram *zram,
+					const unsigned long pages)
+{
+	unsigned long old_max, cur_max;
+
+	old_max = atomic_long_read(&zram->stats.max_used_pages);
+
+	do {
+		cur_max = old_max;
+		if (pages > cur_max)
+			old_max = atomic_long_cmpxchg(
+				&zram->stats.max_used_pages, cur_max, pages);
+	} while (old_max != cur_max);
+}
+
+static int page_zero_filled(void *ptr)
+{
+	unsigned int pos;
+	unsigned long *page;
+
+	page = (unsigned long *)ptr;
+
+	for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
+		if (page[pos])
+			return 0;
+	}
+
+	return 1;
+}
+
+static void handle_zero_page(struct bio_vec *bvec)
+{
+	struct page *page = bvec->bv_page;
+	void *user_mem;
+
+	user_mem = kmap_atomic(page);
+	if (is_partial_io(bvec))
+		memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
+	else
+		clear_page(user_mem);
+	kunmap_atomic(user_mem);
+
+	flush_dcache_page(page);
 }
 
 static ssize_t initstate_show(struct device *dev,
@@ -95,6 +200,14 @@ static ssize_t initstate_show(struct device *dev,
 	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
 }
 
+static ssize_t disksize_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct zram *zram = dev_to_zram(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
+}
+
 static ssize_t orig_data_size_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -122,19 +235,6 @@ static ssize_t mem_used_total_show(struct device *dev,
 	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
 }
 
-static ssize_t max_comp_streams_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	int val;
-	struct zram *zram = dev_to_zram(dev);
-
-	down_read(&zram->init_lock);
-	val = zram->max_comp_streams;
-	up_read(&zram->init_lock);
-
-	return scnprintf(buf, PAGE_SIZE, "%d\n", val);
-}
-
 static ssize_t mem_limit_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -204,6 +304,19 @@ static ssize_t mem_used_max_store(struct device *dev,
 	return len;
 }
 
+static ssize_t max_comp_streams_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	int val;
+	struct zram *zram = dev_to_zram(dev);
+
+	down_read(&zram->init_lock);
+	val = zram->max_comp_streams;
+	up_read(&zram->init_lock);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
 static ssize_t max_comp_streams_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
 {
@@ -250,6 +363,8 @@ static ssize_t comp_algorithm_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
 {
 	struct zram *zram = dev_to_zram(dev);
+	size_t sz;
+
 	down_write(&zram->init_lock);
 	if (init_done(zram)) {
 		up_write(&zram->init_lock);
@@ -257,69 +372,108 @@ static ssize_t comp_algorithm_store(struct device *dev,
 		return -EBUSY;
 	}
 	strlcpy(zram->compressor, buf, sizeof(zram->compressor));
+
+	/* ignore trailing newline */
+	sz = strlen(zram->compressor);
+	if (sz > 0 && zram->compressor[sz - 1] == '\n')
+		zram->compressor[sz - 1] = 0x00;
+
+	if (!zcomp_available_algorithm(zram->compressor))
+		len = -EINVAL;
+
 	up_write(&zram->init_lock);
 	return len;
 }
 
-/* flag operations needs meta->tb_lock */
-static int zram_test_flag(struct zram_meta *meta, u32 index,
-			enum zram_pageflags flag)
+static ssize_t compact_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
 {
-	return meta->table[index].value & BIT(flag);
-}
+	unsigned long nr_migrated;
+	struct zram *zram = dev_to_zram(dev);
+	struct zram_meta *meta;
 
-static void zram_set_flag(struct zram_meta *meta, u32 index,
-			enum zram_pageflags flag)
-{
-	meta->table[index].value |= BIT(flag);
-}
+	down_read(&zram->init_lock);
+	if (!init_done(zram)) {
+		up_read(&zram->init_lock);
+		return -EINVAL;
+	}
 
-static void zram_clear_flag(struct zram_meta *meta, u32 index,
-			enum zram_pageflags flag)
-{
-	meta->table[index].value &= ~BIT(flag);
+	meta = zram->meta;
+	nr_migrated = zs_compact(meta->mem_pool);
+	atomic64_add(nr_migrated, &zram->stats.num_migrated);
+	up_read(&zram->init_lock);
+
+	return len;
 }
 
-static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+static ssize_t io_stat_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
 {
-	return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+	struct zram *zram = dev_to_zram(dev);
+	ssize_t ret;
+
+	down_read(&zram->init_lock);
+	ret = scnprintf(buf, PAGE_SIZE,
+			"%8llu %8llu %8llu %8llu\n",
+			(u64)atomic64_read(&zram->stats.failed_reads),
+			(u64)atomic64_read(&zram->stats.failed_writes),
+			(u64)atomic64_read(&zram->stats.invalid_io),
+			(u64)atomic64_read(&zram->stats.notify_free));
+	up_read(&zram->init_lock);
+
+	return ret;
 }
 
-static void zram_set_obj_size(struct zram_meta *meta,
-					u32 index, size_t size)
+static ssize_t mm_stat_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
 {
-	unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
+	struct zram *zram = dev_to_zram(dev);
+	u64 orig_size, mem_used = 0;
+	long max_used;
+	ssize_t ret;
 
-	meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
+	down_read(&zram->init_lock);
+	if (init_done(zram))
+		mem_used = zs_get_total_pages(zram->meta->mem_pool);
+
+	orig_size = atomic64_read(&zram->stats.pages_stored);
+	max_used = atomic_long_read(&zram->stats.max_used_pages);
+
+	ret = scnprintf(buf, PAGE_SIZE,
+			"%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n",
+			orig_size << PAGE_SHIFT,
+			(u64)atomic64_read(&zram->stats.compr_data_size),
+			mem_used << PAGE_SHIFT,
+			zram->limit_pages << PAGE_SHIFT,
+			max_used << PAGE_SHIFT,
+			(u64)atomic64_read(&zram->stats.zero_pages),
+			(u64)atomic64_read(&zram->stats.num_migrated));
+	up_read(&zram->init_lock);
+
+	return ret;
 }
 
-static inline int is_partial_io(struct bio_vec *bvec)
+static DEVICE_ATTR_RO(io_stat);
+static DEVICE_ATTR_RO(mm_stat);
+ZRAM_ATTR_RO(num_reads);
+ZRAM_ATTR_RO(num_writes);
+ZRAM_ATTR_RO(failed_reads);
+ZRAM_ATTR_RO(failed_writes);
+ZRAM_ATTR_RO(invalid_io);
+ZRAM_ATTR_RO(notify_free);
+ZRAM_ATTR_RO(zero_pages);
+ZRAM_ATTR_RO(compr_data_size);
+
+static inline bool zram_meta_get(struct zram *zram)
 {
-	return bvec->bv_len != PAGE_SIZE;
+	if (atomic_inc_not_zero(&zram->refcount))
+		return true;
+	return false;
 }
 
-/*
- * Check if request is within bounds and aligned on zram logical blocks.
- */
-static inline int valid_io_request(struct zram *zram,
-		sector_t start, unsigned int size)
+static inline void zram_meta_put(struct zram *zram)
 {
-	u64 end, bound;
-
-	/* unaligned request */
-	if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
-		return 0;
-	if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
-		return 0;
-
-	end = start + (size >> SECTOR_SHIFT);
-	bound = zram->disksize >> SECTOR_SHIFT;
-	/* out of range range */
-	if (unlikely(start >= bound || end > bound || start > end))
-		return 0;
-
-	/* I/O request is valid */
-	return 1;
+	atomic_dec(&zram->refcount);
 }
 
 static void zram_meta_free(struct zram_meta *meta, u64 disksize)
@@ -373,56 +527,6 @@ out_error:
 	return NULL;
 }
 
-static inline bool zram_meta_get(struct zram *zram)
-{
-	if (atomic_inc_not_zero(&zram->refcount))
-		return true;
-	return false;
-}
-
-static inline void zram_meta_put(struct zram *zram)
-{
-	atomic_dec(&zram->refcount);
-}
-
-static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
-{
-	if (*offset + bvec->bv_len >= PAGE_SIZE)
-		(*index)++;
-	*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
-}
-
-static int page_zero_filled(void *ptr)
-{
-	unsigned int pos;
-	unsigned long *page;
-
-	page = (unsigned long *)ptr;
-
-	for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
-		if (page[pos])
-			return 0;
-	}
-
-	return 1;
-}
-
-static void handle_zero_page(struct bio_vec *bvec)
-{
-	struct page *page = bvec->bv_page;
-	void *user_mem;
-
-	user_mem = kmap_atomic(page);
-	if (is_partial_io(bvec))
-		memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
-	else
-		clear_page(user_mem);
-	kunmap_atomic(user_mem);
-
-	flush_dcache_page(page);
-}
-
-
 /*
  * To protect concurrent access to the same index entry,
  * caller should hold this table index entry's bit_spinlock to
@@ -540,21 +644,6 @@ out_cleanup:
 	return ret;
 }
 
-static inline void update_used_max(struct zram *zram,
-					const unsigned long pages)
-{
-	unsigned long old_max, cur_max;
-
-	old_max = atomic_long_read(&zram->stats.max_used_pages);
-
-	do {
-		cur_max = old_max;
-		if (pages > cur_max)
-			old_max = atomic_long_cmpxchg(
-				&zram->stats.max_used_pages, cur_max, pages);
-	} while (old_max != cur_max);
-}
-
 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
 			   int offset)
 {
@@ -564,8 +653,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
 	struct page *page;
 	unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
 	struct zram_meta *meta = zram->meta;
-	struct zcomp_strm *zstrm;
-	bool locked = false;
+	struct zcomp_strm *zstrm = NULL;
 	unsigned long alloced_pages;
 
 	page = bvec->bv_page;
@@ -585,7 +673,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
 	}
 
 	zstrm = zcomp_strm_find(zram->comp);
-	locked = true;
 	user_mem = kmap_atomic(page);
 
 	if (is_partial_io(bvec)) {
@@ -657,7 +744,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
 	}
 
 	zcomp_strm_release(zram->comp, zstrm);
-	locked = false;
+	zstrm = NULL;
 	zs_unmap_object(meta->mem_pool, handle);
 
 	/*
@@ -675,42 +762,13 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
 	atomic64_add(clen, &zram->stats.compr_data_size);
 	atomic64_inc(&zram->stats.pages_stored);
 out:
-	if (locked)
+	if (zstrm)
 		zcomp_strm_release(zram->comp, zstrm);
 	if (is_partial_io(bvec))
 		kfree(uncmem);
 	return ret;
 }
 
-static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
-			int offset, int rw)
-{
-	unsigned long start_time = jiffies;
-	int ret;
-
-	generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT,
-			&zram->disk->part0);
-
-	if (rw == READ) {
-		atomic64_inc(&zram->stats.num_reads);
-		ret = zram_bvec_read(zram, bvec, index, offset);
-	} else {
-		atomic64_inc(&zram->stats.num_writes);
-		ret = zram_bvec_write(zram, bvec, index, offset);
-	}
-
-	generic_end_io_acct(rw, &zram->disk->part0, start_time);
-
-	if (unlikely(ret)) {
-		if (rw == READ)
-			atomic64_inc(&zram->stats.failed_reads);
-		else
-			atomic64_inc(&zram->stats.failed_writes);
-	}
-
-	return ret;
-}
-
 /*
  * zram_bio_discard - handler on discard request
  * @index: physical block index in PAGE_SIZE units
@@ -750,149 +808,32 @@ static void zram_bio_discard(struct zram *zram, u32 index,
 	}
 }
 
-static void zram_reset_device(struct zram *zram)
-{
-	struct zram_meta *meta;
-	struct zcomp *comp;
-	u64 disksize;
-
-	down_write(&zram->init_lock);
-
-	zram->limit_pages = 0;
-
-	if (!init_done(zram)) {
-		up_write(&zram->init_lock);
-		return;
-	}
-
-	meta = zram->meta;
-	comp = zram->comp;
-	disksize = zram->disksize;
-	/*
-	 * Refcount will go down to 0 eventually and r/w handler
-	 * cannot handle further I/O so it will bail out by
-	 * check zram_meta_get.
-	 */
-	zram_meta_put(zram);
-	/*
-	 * We want to free zram_meta in process context to avoid
-	 * deadlock between reclaim path and any other locks.
-	 */
-	wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
-
-	/* Reset stats */
-	memset(&zram->stats, 0, sizeof(zram->stats));
-	zram->disksize = 0;
-	zram->max_comp_streams = 1;
-	set_capacity(zram->disk, 0);
-
-	up_write(&zram->init_lock);
-	/* I/O operation under all of CPU are done so let's free */
-	zram_meta_free(meta, disksize);
-	zcomp_destroy(comp);
-}
-
-static ssize_t disksize_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t len)
-{
-	u64 disksize;
-	struct zcomp *comp;
-	struct zram_meta *meta;
-	struct zram *zram = dev_to_zram(dev);
-	int err;
-
-	disksize = memparse(buf, NULL);
-	if (!disksize)
-		return -EINVAL;
-
-	disksize = PAGE_ALIGN(disksize);
-	meta = zram_meta_alloc(zram->disk->first_minor, disksize);
-	if (!meta)
-		return -ENOMEM;
-
-	comp = zcomp_create(zram->compressor, zram->max_comp_streams);
-	if (IS_ERR(comp)) {
-		pr_info("Cannot initialise %s compressing backend\n",
-				zram->compressor);
-		err = PTR_ERR(comp);
-		goto out_free_meta;
-	}
-
-	down_write(&zram->init_lock);
-	if (init_done(zram)) {
-		pr_info("Cannot change disksize for initialized device\n");
-		err = -EBUSY;
-		goto out_destroy_comp;
-	}
-
-	init_waitqueue_head(&zram->io_done);
-	atomic_set(&zram->refcount, 1);
-	zram->meta = meta;
-	zram->comp = comp;
-	zram->disksize = disksize;
-	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
-	up_write(&zram->init_lock);
-
-	/*
-	 * Revalidate disk out of the init_lock to avoid lockdep splat.
-	 * It's okay because disk's capacity is protected by init_lock
-	 * so that revalidate_disk always sees up-to-date capacity.
-	 */
-	revalidate_disk(zram->disk);
-
-	return len;
-
-out_destroy_comp:
-	up_write(&zram->init_lock);
-	zcomp_destroy(comp);
-out_free_meta:
-	zram_meta_free(meta, disksize);
-	return err;
-}
-
-static ssize_t reset_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t len)
+static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
+			int offset, int rw)
 {
+	unsigned long start_time = jiffies;
 	int ret;
-	unsigned short do_reset;
-	struct zram *zram;
-	struct block_device *bdev;
 
-	zram = dev_to_zram(dev);
-	bdev = bdget_disk(zram->disk, 0);
-
-	if (!bdev)
-		return -ENOMEM;
+	generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT,
+			&zram->disk->part0);
 
-	mutex_lock(&bdev->bd_mutex);
-	/* Do not reset an active device! */
-	if (bdev->bd_openers) {
-		ret = -EBUSY;
-		goto out;
+	if (rw == READ) {
+		atomic64_inc(&zram->stats.num_reads);
+		ret = zram_bvec_read(zram, bvec, index, offset);
+	} else {
+		atomic64_inc(&zram->stats.num_writes);
+		ret = zram_bvec_write(zram, bvec, index, offset);
 	}
 
-	ret = kstrtou16(buf, 10, &do_reset);
-	if (ret)
-		goto out;
+	generic_end_io_acct(rw, &zram->disk->part0, start_time);
 
-	if (!do_reset) {
-		ret = -EINVAL;
-		goto out;
+	if (unlikely(ret)) {
+		if (rw == READ)
+			atomic64_inc(&zram->stats.failed_reads);
+		else
+			atomic64_inc(&zram->stats.failed_writes);
 	}
 
-	/* Make sure all pending I/O is finished */
-	fsync_bdev(bdev);
-	zram_reset_device(zram);
-
-	mutex_unlock(&bdev->bd_mutex);
-	revalidate_disk(zram->disk);
-	bdput(bdev);
-
-	return len;
-
-out:
-	mutex_unlock(&bdev->bd_mutex);
-	bdput(bdev);
 	return ret;
 }
 
@@ -1032,79 +973,185 @@ out:
 	return err;
 }
 
-static const struct block_device_operations zram_devops = {
-	.swap_slot_free_notify = zram_slot_free_notify,
-	.rw_page = zram_rw_page,
-	.owner = THIS_MODULE
-};
+static void zram_reset_device(struct zram *zram)
+{
+	struct zram_meta *meta;
+	struct zcomp *comp;
+	u64 disksize;
 
-static DEVICE_ATTR_RW(disksize);
-static DEVICE_ATTR_RO(initstate);
-static DEVICE_ATTR_WO(reset);
-static DEVICE_ATTR_RO(orig_data_size);
-static DEVICE_ATTR_RO(mem_used_total);
-static DEVICE_ATTR_RW(mem_limit);
-static DEVICE_ATTR_RW(mem_used_max);
-static DEVICE_ATTR_RW(max_comp_streams);
-static DEVICE_ATTR_RW(comp_algorithm);
+	down_write(&zram->init_lock);
 
-static ssize_t io_stat_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
+	zram->limit_pages = 0;
+
+	if (!init_done(zram)) {
+		up_write(&zram->init_lock);
+		return;
+	}
+
+	meta = zram->meta;
+	comp = zram->comp;
+	disksize = zram->disksize;
+	/*
+	 * Refcount will go down to 0 eventually and r/w handler
+	 * cannot handle further I/O so it will bail out by
+	 * check zram_meta_get.
+	 */
+	zram_meta_put(zram);
+	/*
+	 * We want to free zram_meta in process context to avoid
+	 * deadlock between reclaim path and any other locks.
+	 */
+	wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
+
+	/* Reset stats */
+	memset(&zram->stats, 0, sizeof(zram->stats));
+	zram->disksize = 0;
+	zram->max_comp_streams = 1;
+
+	set_capacity(zram->disk, 0);
+	part_stat_set_all(&zram->disk->part0, 0);
+
+	up_write(&zram->init_lock);
+	/* I/O operation under all of CPU are done so let's free */
+	zram_meta_free(meta, disksize);
+	zcomp_destroy(comp);
+}
+
+static ssize_t disksize_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
 {
+	u64 disksize;
+	struct zcomp *comp;
+	struct zram_meta *meta;
 	struct zram *zram = dev_to_zram(dev);
-	ssize_t ret;
+	int err;
 
-	down_read(&zram->init_lock);
-	ret = scnprintf(buf, PAGE_SIZE,
-			"%8llu %8llu %8llu %8llu\n",
-			(u64)atomic64_read(&zram->stats.failed_reads),
-			(u64)atomic64_read(&zram->stats.failed_writes),
-			(u64)atomic64_read(&zram->stats.invalid_io),
-			(u64)atomic64_read(&zram->stats.notify_free));
-	up_read(&zram->init_lock);
+	disksize = memparse(buf, NULL);
+	if (!disksize)
+		return -EINVAL;
 
-	return ret;
+	disksize = PAGE_ALIGN(disksize);
+	meta = zram_meta_alloc(zram->disk->first_minor, disksize);
+	if (!meta)
+		return -ENOMEM;
+
+	comp = zcomp_create(zram->compressor, zram->max_comp_streams);
+	if (IS_ERR(comp)) {
+		pr_info("Cannot initialise %s compressing backend\n",
+				zram->compressor);
+		err = PTR_ERR(comp);
+		goto out_free_meta;
+	}
+
+	down_write(&zram->init_lock);
+	if (init_done(zram)) {
+		pr_info("Cannot change disksize for initialized device\n");
+		err = -EBUSY;
+		goto out_destroy_comp;
+	}
+
+	init_waitqueue_head(&zram->io_done);
+	atomic_set(&zram->refcount, 1);
+	zram->meta = meta;
+	zram->comp = comp;
+	zram->disksize = disksize;
+	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
+	up_write(&zram->init_lock);
+
+	/*
+	 * Revalidate disk out of the init_lock to avoid lockdep splat.
+	 * It's okay because disk's capacity is protected by init_lock
+	 * so that revalidate_disk always sees up-to-date capacity.
+	 */
+	revalidate_disk(zram->disk);
+
+	return len;
+
+out_destroy_comp:
+	up_write(&zram->init_lock);
+	zcomp_destroy(comp);
+out_free_meta:
+	zram_meta_free(meta, disksize);
+	return err;
 }
 
-static ssize_t mm_stat_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
+static ssize_t reset_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
 {
-	struct zram *zram = dev_to_zram(dev);
-	u64 orig_size, mem_used = 0;
-	long max_used;
-	ssize_t ret;
+	int ret;
+	unsigned short do_reset;
+	struct zram *zram;
+	struct block_device *bdev;
 
-	down_read(&zram->init_lock);
-	if (init_done(zram))
-		mem_used = zs_get_total_pages(zram->meta->mem_pool);
+	ret = kstrtou16(buf, 10, &do_reset);
+	if (ret)
+		return ret;
 
-	orig_size = atomic64_read(&zram->stats.pages_stored);
-	max_used = atomic_long_read(&zram->stats.max_used_pages);
+	if (!do_reset)
+		return -EINVAL;
 
-	ret = scnprintf(buf, PAGE_SIZE,
-			"%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n",
-			orig_size << PAGE_SHIFT,
-			(u64)atomic64_read(&zram->stats.compr_data_size),
-			mem_used << PAGE_SHIFT,
-			zram->limit_pages << PAGE_SHIFT,
-			max_used << PAGE_SHIFT,
-			(u64)atomic64_read(&zram->stats.zero_pages),
-			(u64)atomic64_read(&zram->stats.num_migrated));
-	up_read(&zram->init_lock);
+	zram = dev_to_zram(dev);
+	bdev = bdget_disk(zram->disk, 0);
+	if (!bdev)
+		return -ENOMEM;
+
+	mutex_lock(&bdev->bd_mutex);
+	/* Do not reset an active device or claimed device */
+	if (bdev->bd_openers || zram->claim) {
+		mutex_unlock(&bdev->bd_mutex);
+		bdput(bdev);
+		return -EBUSY;
+	}
+
+	/* From now on, anyone can't open /dev/zram[0-9] */
+	zram->claim = true;
+	mutex_unlock(&bdev->bd_mutex);
+
+	/* Make sure all the pending I/O are finished */
+	fsync_bdev(bdev);
+	zram_reset_device(zram);
+	revalidate_disk(zram->disk);
+	bdput(bdev);
+
+	mutex_lock(&bdev->bd_mutex);
+	zram->claim = false;
+	mutex_unlock(&bdev->bd_mutex);
+
+	return len;
+}
+
+static int zram_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret = 0;
+	struct zram *zram;
+
+	WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
+
+	zram = bdev->bd_disk->private_data;
+	/* zram was claimed to reset so open request fails */
+	if (zram->claim)
+		ret = -EBUSY;
 
 	return ret;
 }
 
-static DEVICE_ATTR_RO(io_stat);
-static DEVICE_ATTR_RO(mm_stat);
-ZRAM_ATTR_RO(num_reads);
-ZRAM_ATTR_RO(num_writes);
-ZRAM_ATTR_RO(failed_reads);
-ZRAM_ATTR_RO(failed_writes);
-ZRAM_ATTR_RO(invalid_io);
-ZRAM_ATTR_RO(notify_free);
-ZRAM_ATTR_RO(zero_pages);
-ZRAM_ATTR_RO(compr_data_size);
+static const struct block_device_operations zram_devops = {
+	.open = zram_open,
+	.swap_slot_free_notify = zram_slot_free_notify,
+	.rw_page = zram_rw_page,
+	.owner = THIS_MODULE
+};
+
+static DEVICE_ATTR_WO(compact);
+static DEVICE_ATTR_RW(disksize);
+static DEVICE_ATTR_RO(initstate);
+static DEVICE_ATTR_WO(reset);
+static DEVICE_ATTR_RO(orig_data_size);
+static DEVICE_ATTR_RO(mem_used_total);
+static DEVICE_ATTR_RW(mem_limit);
+static DEVICE_ATTR_RW(mem_used_max);
+static DEVICE_ATTR_RW(max_comp_streams);
+static DEVICE_ATTR_RW(comp_algorithm);
 
 static struct attribute *zram_disk_attrs[] = {
 	&dev_attr_disksize.attr,
@@ -1114,6 +1161,7 @@ static struct attribute *zram_disk_attrs[] = {
 	&dev_attr_num_writes.attr,
 	&dev_attr_failed_reads.attr,
 	&dev_attr_failed_writes.attr,
+	&dev_attr_compact.attr,
 	&dev_attr_invalid_io.attr,
 	&dev_attr_notify_free.attr,
 	&dev_attr_zero_pages.attr,
@@ -1133,10 +1181,24 @@ static struct attribute_group zram_disk_attr_group = {
 	.attrs = zram_disk_attrs,
 };
 
-static int create_device(struct zram *zram, int device_id)
+/*
+ * Allocate and initialize new zram device. the function returns
+ * '>= 0' device_id upon success, and negative value otherwise.
+ */
+static int zram_add(void)
 {
+	struct zram *zram;
 	struct request_queue *queue;
-	int ret = -ENOMEM;
+	int ret, device_id;
+
+	zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
+	if (!zram)
+		return -ENOMEM;
+
+	ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
+	if (ret < 0)
+		goto out_free_dev;
+	device_id = ret;
 
 	init_rwsem(&zram->init_lock);
 
@@ -1144,12 +1206,13 @@ static int create_device(struct zram *zram, int device_id)
 	if (!queue) {
 		pr_err("Error allocating disk queue for device %d\n",
 			device_id);
-		goto out;
+		ret = -ENOMEM;
+		goto out_free_idr;
 	}
 
 	blk_queue_make_request(queue, zram_make_request);
 
-	 /* gendisk structure */
+	/* gendisk structure */
 	zram->disk = alloc_disk(1);
 	if (!zram->disk) {
 		pr_warn("Error allocating disk structure for device %d\n",
@@ -1207,90 +1270,177 @@ static int create_device(struct zram *zram, int device_id)
 	strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
 	zram->meta = NULL;
 	zram->max_comp_streams = 1;
-	return 0;
+
+	pr_info("Added device: %s\n", zram->disk->disk_name);
+	return device_id;
 
 out_free_disk:
 	del_gendisk(zram->disk);
 	put_disk(zram->disk);
 out_free_queue:
 	blk_cleanup_queue(queue);
-out:
+out_free_idr:
+	idr_remove(&zram_index_idr, device_id);
+out_free_dev:
+	kfree(zram);
 	return ret;
 }
 
-static void destroy_devices(unsigned int nr)
+static int zram_remove(struct zram *zram)
+{
+	struct block_device *bdev;
+
+	bdev = bdget_disk(zram->disk, 0);
+	if (!bdev)
+		return -ENOMEM;
+
+	mutex_lock(&bdev->bd_mutex);
+	if (bdev->bd_openers || zram->claim) {
+		mutex_unlock(&bdev->bd_mutex);
+		bdput(bdev);
+		return -EBUSY;
+	}
+
+	zram->claim = true;
+	mutex_unlock(&bdev->bd_mutex);
+
+	/*
+	 * Remove sysfs first, so no one will perform a disksize
+	 * store while we destroy the devices. This also helps during
+	 * hot_remove -- zram_reset_device() is the last holder of
+	 * ->init_lock, no later/concurrent disksize_store() or any
+	 * other sysfs handlers are possible.
+	 */
+	sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
+			&zram_disk_attr_group);
+
+	/* Make sure all the pending I/O are finished */
+	fsync_bdev(bdev);
+	zram_reset_device(zram);
+	bdput(bdev);
+
+	pr_info("Removed device: %s\n", zram->disk->disk_name);
+
+	idr_remove(&zram_index_idr, zram->disk->first_minor);
+	blk_cleanup_queue(zram->disk->queue);
+	del_gendisk(zram->disk);
+	put_disk(zram->disk);
+	kfree(zram);
+	return 0;
+}
+
+/* zram-control sysfs attributes */
+static ssize_t hot_add_show(struct class *class,
+			struct class_attribute *attr,
+			char *buf)
+{
+	int ret;
+
+	mutex_lock(&zram_index_mutex);
+	ret = zram_add();
+	mutex_unlock(&zram_index_mutex);
+
+	if (ret < 0)
+		return ret;
+	return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+}
+
+static ssize_t hot_remove_store(struct class *class,
+			struct class_attribute *attr,
+			const char *buf,
+			size_t count)
 {
 	struct zram *zram;
-	unsigned int i;
+	int ret, dev_id;
 
-	for (i = 0; i < nr; i++) {
-		zram = &zram_devices[i];
-		/*
-		 * Remove sysfs first, so no one will perform a disksize
-		 * store while we destroy the devices
-		 */
-		sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
-				&zram_disk_attr_group);
+	/* dev_id is gendisk->first_minor, which is `int' */
+	ret = kstrtoint(buf, 10, &dev_id);
+	if (ret)
+		return ret;
+	if (dev_id < 0)
+		return -EINVAL;
 
-		zram_reset_device(zram);
+	mutex_lock(&zram_index_mutex);
 
-		blk_cleanup_queue(zram->disk->queue);
-		del_gendisk(zram->disk);
-		put_disk(zram->disk);
-	}
+	zram = idr_find(&zram_index_idr, dev_id);
+	if (zram)
+		ret = zram_remove(zram);
+	else
+		ret = -ENODEV;
+
+	mutex_unlock(&zram_index_mutex);
+	return ret ? ret : count;
+}
+
+static struct class_attribute zram_control_class_attrs[] = {
+	__ATTR_RO(hot_add),
+	__ATTR_WO(hot_remove),
+	__ATTR_NULL,
+};
+
+static struct class zram_control_class = {
+	.name		= "zram-control",
+	.owner		= THIS_MODULE,
+	.class_attrs	= zram_control_class_attrs,
+};
+
+static int zram_remove_cb(int id, void *ptr, void *data)
+{
+	zram_remove(ptr);
+	return 0;
+}
 
-	kfree(zram_devices);
+static void destroy_devices(void)
+{
+	class_unregister(&zram_control_class);
+	idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
+	idr_destroy(&zram_index_idr);
 	unregister_blkdev(zram_major, "zram");
-	pr_info("Destroyed %u device(s)\n", nr);
 }
 
 static int __init zram_init(void)
 {
-	int ret, dev_id;
+	int ret;
 
-	if (num_devices > max_num_devices) {
-		pr_warn("Invalid value for num_devices: %u\n",
-				num_devices);
-		return -EINVAL;
+	ret = class_register(&zram_control_class);
+	if (ret) {
+		pr_warn("Unable to register zram-control class\n");
+		return ret;
 	}
 
 	zram_major = register_blkdev(0, "zram");
 	if (zram_major <= 0) {
 		pr_warn("Unable to get major number\n");
+		class_unregister(&zram_control_class);
 		return -EBUSY;
 	}
 
-	/* Allocate the device array and initialize each one */
-	zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL);
-	if (!zram_devices) {
-		unregister_blkdev(zram_major, "zram");
-		return -ENOMEM;
-	}
-
-	for (dev_id = 0; dev_id < num_devices; dev_id++) {
-		ret = create_device(&zram_devices[dev_id], dev_id);
-		if (ret)
+	while (num_devices != 0) {
+		mutex_lock(&zram_index_mutex);
+		ret = zram_add();
+		mutex_unlock(&zram_index_mutex);
+		if (ret < 0)
 			goto out_error;
+		num_devices--;
 	}
 
-	pr_info("Created %u device(s)\n", num_devices);
 	return 0;
 
 out_error:
-	destroy_devices(dev_id);
+	destroy_devices();
 	return ret;
 }
 
 static void __exit zram_exit(void)
 {
-	destroy_devices(num_devices);
+	destroy_devices();
 }
 
 module_init(zram_init);
 module_exit(zram_exit);
 
 module_param(num_devices, uint, 0);
-MODULE_PARM_DESC(num_devices, "Number of zram devices");
+MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 570c598f4ce9..6dbe2df506bf 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -20,12 +20,6 @@
 
 #include "zcomp.h"
 
-/*
- * Some arbitrary value. This is just to catch
- * invalid value for num_devices module parameter.
- */
-static const unsigned max_num_devices = 32;
-
 /*-- Configurable parameters */
 
 /*
@@ -121,5 +115,9 @@ struct zram {
 	 */
 	u64 disksize;	/* bytes */
 	char compressor[10];
+	/*
+	 * zram is claimed so open request will be failed
+	 */
+	bool claim; /* Protected by bdev->bd_mutex */
 };
 #endif