summaryrefslogtreecommitdiffstats
path: root/drivers/nvme/host/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r--drivers/nvme/host/core.c922
1 files changed, 667 insertions, 255 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 903d5813023a..5a14cc7f28ee 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -27,7 +27,6 @@
#include <linux/nvme_ioctl.h>
#include <linux/t10-pi.h>
#include <linux/pm_qos.h>
-#include <scsi/sg.h>
#include <asm/unaligned.h>
#include "nvme.h"
@@ -45,7 +44,7 @@ module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
EXPORT_SYMBOL_GPL(nvme_io_timeout);
-unsigned char shutdown_timeout = 5;
+static unsigned char shutdown_timeout = 5;
module_param(shutdown_timeout, byte, 0644);
MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
@@ -65,34 +64,67 @@ static bool force_apst;
module_param(force_apst, bool, 0644);
MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
+static bool streams;
+module_param(streams, bool, 0644);
+MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
+
+struct workqueue_struct *nvme_wq;
+EXPORT_SYMBOL_GPL(nvme_wq);
+
static LIST_HEAD(nvme_ctrl_list);
static DEFINE_SPINLOCK(dev_list_lock);
static struct class *nvme_class;
-static int nvme_error_status(struct request *req)
+static __le32 nvme_get_log_dw10(u8 lid, size_t size)
+{
+ return cpu_to_le32((((size / 4) - 1) << 16) | lid);
+}
+
+int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
+{
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+ return -EBUSY;
+ if (!queue_work(nvme_wq, &ctrl->reset_work))
+ return -EBUSY;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
+
+static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
+{
+ int ret;
+
+ ret = nvme_reset_ctrl(ctrl);
+ if (!ret)
+ flush_work(&ctrl->reset_work);
+ return ret;
+}
+
+static blk_status_t nvme_error_status(struct request *req)
{
switch (nvme_req(req)->status & 0x7ff) {
case NVME_SC_SUCCESS:
- return 0;
+ return BLK_STS_OK;
case NVME_SC_CAP_EXCEEDED:
- return -ENOSPC;
- default:
- return -EIO;
-
- /*
- * XXX: these errors are a nasty side-band protocol to
- * drivers/md/dm-mpath.c:noretry_error() that aren't documented
- * anywhere..
- */
- case NVME_SC_CMD_SEQ_ERROR:
- return -EILSEQ;
+ return BLK_STS_NOSPC;
case NVME_SC_ONCS_NOT_SUPPORTED:
- return -EOPNOTSUPP;
+ return BLK_STS_NOTSUPP;
case NVME_SC_WRITE_FAULT:
case NVME_SC_READ_ERROR:
case NVME_SC_UNWRITTEN_BLOCK:
- return -ENODATA;
+ case NVME_SC_ACCESS_DENIED:
+ case NVME_SC_READ_ONLY:
+ return BLK_STS_MEDIUM;
+ case NVME_SC_GUARD_CHECK:
+ case NVME_SC_APPTAG_CHECK:
+ case NVME_SC_REFTAG_CHECK:
+ case NVME_SC_INVALID_PI:
+ return BLK_STS_PROTECTION;
+ case NVME_SC_RESERVATION_CONFLICT:
+ return BLK_STS_NEXUS;
+ default:
+ return BLK_STS_IOERR;
}
}
@@ -102,8 +134,6 @@ static inline bool nvme_req_needs_retry(struct request *req)
return false;
if (nvme_req(req)->status & NVME_SC_DNR)
return false;
- if (jiffies - req->start_time >= req->timeout)
- return false;
if (nvme_req(req)->retries >= nvme_max_retries)
return false;
return true;
@@ -113,7 +143,7 @@ void nvme_complete_rq(struct request *req)
{
if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
nvme_req(req)->retries++;
- blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
+ blk_mq_requeue_request(req, true);
return;
}
@@ -144,9 +174,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
enum nvme_ctrl_state new_state)
{
enum nvme_ctrl_state old_state;
+ unsigned long flags;
bool changed = false;
- spin_lock_irq(&ctrl->lock);
+ spin_lock_irqsave(&ctrl->lock, flags);
old_state = ctrl->state;
switch (new_state) {
@@ -165,7 +196,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
- case NVME_CTRL_RECONNECTING:
changed = true;
/* FALLTHRU */
default:
@@ -208,7 +238,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
if (changed)
ctrl->state = new_state;
- spin_unlock_irq(&ctrl->lock);
+ spin_unlock_irqrestore(&ctrl->lock, flags);
return changed;
}
@@ -283,6 +313,105 @@ struct request *nvme_alloc_request(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(nvme_alloc_request);
+static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+
+ c.directive.opcode = nvme_admin_directive_send;
+ c.directive.nsid = cpu_to_le32(NVME_NSID_ALL);
+ c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
+ c.directive.dtype = NVME_DIR_IDENTIFY;
+ c.directive.tdtype = NVME_DIR_STREAMS;
+ c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
+
+ return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
+}
+
+static int nvme_disable_streams(struct nvme_ctrl *ctrl)
+{
+ return nvme_toggle_streams(ctrl, false);
+}
+
+static int nvme_enable_streams(struct nvme_ctrl *ctrl)
+{
+ return nvme_toggle_streams(ctrl, true);
+}
+
+static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
+ struct streams_directive_params *s, u32 nsid)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+ memset(s, 0, sizeof(*s));
+
+ c.directive.opcode = nvme_admin_directive_recv;
+ c.directive.nsid = cpu_to_le32(nsid);
+ c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1);
+ c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
+ c.directive.dtype = NVME_DIR_STREAMS;
+
+ return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
+}
+
+static int nvme_configure_directives(struct nvme_ctrl *ctrl)
+{
+ struct streams_directive_params s;
+ int ret;
+
+ if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
+ return 0;
+ if (!streams)
+ return 0;
+
+ ret = nvme_enable_streams(ctrl);
+ if (ret)
+ return ret;
+
+ ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL);
+ if (ret)
+ return ret;
+
+ ctrl->nssa = le16_to_cpu(s.nssa);
+ if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
+ dev_info(ctrl->device, "too few streams (%u) available\n",
+ ctrl->nssa);
+ nvme_disable_streams(ctrl);
+ return 0;
+ }
+
+ ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
+ dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
+ return 0;
+}
+
+/*
+ * Check if 'req' has a write hint associated with it. If it does, assign
+ * a valid namespace stream to the write.
+ */
+static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
+ struct request *req, u16 *control,
+ u32 *dsmgmt)
+{
+ enum rw_hint streamid = req->write_hint;
+
+ if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
+ streamid = 0;
+ else {
+ streamid--;
+ if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
+ return;
+
+ *control |= NVME_RW_DTYPE_STREAMS;
+ *dsmgmt |= streamid << 16;
+ }
+
+ if (streamid < ARRAY_SIZE(req->q->write_hints))
+ req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
+}
+
static inline void nvme_setup_flush(struct nvme_ns *ns,
struct nvme_command *cmnd)
{
@@ -291,7 +420,7 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
cmnd->common.nsid = cpu_to_le32(ns->ns_id);
}
-static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
+static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd)
{
unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
@@ -300,7 +429,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
if (!range)
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
__rq_for_each_bio(bio, req) {
u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
@@ -314,7 +443,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
if (WARN_ON_ONCE(n != segments)) {
kfree(range);
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
memset(cmnd, 0, sizeof(*cmnd));
@@ -328,15 +457,26 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
req->special_vec.bv_len = sizeof(*range) * segments;
req->rq_flags |= RQF_SPECIAL_PAYLOAD;
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
-static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
- struct nvme_command *cmnd)
+static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
+ struct request *req, struct nvme_command *cmnd)
{
+ struct nvme_ctrl *ctrl = ns->ctrl;
u16 control = 0;
u32 dsmgmt = 0;
+ /*
+ * If formated with metadata, require the block layer provide a buffer
+ * unless this namespace is formated such that the metadata can be
+ * stripped/generated by the controller with PRACT=1.
+ */
+ if (ns && ns->ms &&
+ (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) &&
+ !blk_integrity_rq(req) && !blk_rq_is_passthrough(req))
+ return BLK_STS_NOTSUPP;
+
if (req->cmd_flags & REQ_FUA)
control |= NVME_RW_FUA;
if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
@@ -351,6 +491,9 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
+ nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
+
if (ns->ms) {
switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE3:
@@ -370,12 +513,13 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
cmnd->rw.control = cpu_to_le16(control);
cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+ return 0;
}
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmd)
{
- int ret = BLK_MQ_RQ_QUEUE_OK;
+ blk_status_t ret = BLK_STS_OK;
if (!(req->rq_flags & RQF_DONTPREP)) {
nvme_req(req)->retries = 0;
@@ -398,11 +542,11 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
break;
case REQ_OP_READ:
case REQ_OP_WRITE:
- nvme_setup_rw(ns, req, cmd);
+ ret = nvme_setup_rw(ns, req, cmd);
break;
default:
WARN_ON_ONCE(1);
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
cmd->common.command_id = req->tag;
@@ -454,10 +598,44 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
}
EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
-int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
- void __user *ubuffer, unsigned bufflen,
- void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
- u32 *result, unsigned timeout)
+static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
+ unsigned len, u32 seed, bool write)
+{
+ struct bio_integrity_payload *bip;
+ int ret = -ENOMEM;
+ void *buf;
+
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf)
+ goto out;
+
+ ret = -EFAULT;
+ if (write && copy_from_user(buf, ubuf, len))
+ goto out_free_meta;
+
+ bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
+ if (IS_ERR(bip)) {
+ ret = PTR_ERR(bip);
+ goto out_free_meta;
+ }
+
+ bip->bip_iter.bi_size = len;
+ bip->bip_iter.bi_sector = seed;
+ ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
+ offset_in_page(buf));
+ if (ret == len)
+ return buf;
+ ret = -ENOMEM;
+out_free_meta:
+ kfree(buf);
+out:
+ return ERR_PTR(ret);
+}
+
+static int nvme_submit_user_cmd(struct request_queue *q,
+ struct nvme_command *cmd, void __user *ubuffer,
+ unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
+ u32 meta_seed, u32 *result, unsigned timeout)
{
bool write = nvme_is_write(cmd);
struct nvme_ns *ns = q->queuedata;
@@ -479,50 +657,17 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
if (ret)
goto out;
bio = req->bio;
-
- if (!disk)
- goto submit;
- bio->bi_bdev = bdget_disk(disk, 0);
- if (!bio->bi_bdev) {
- ret = -ENODEV;
- goto out_unmap;
- }
-
- if (meta_buffer && meta_len) {
- struct bio_integrity_payload *bip;
-
- meta = kmalloc(meta_len, GFP_KERNEL);
- if (!meta) {
- ret = -ENOMEM;
+ bio->bi_disk = disk;
+ if (disk && meta_buffer && meta_len) {
+ meta = nvme_add_user_metadata(bio, meta_buffer, meta_len,
+ meta_seed, write);
+ if (IS_ERR(meta)) {
+ ret = PTR_ERR(meta);
goto out_unmap;
}
-
- if (write) {
- if (copy_from_user(meta, meta_buffer,
- meta_len)) {
- ret = -EFAULT;
- goto out_free_meta;
- }
- }
-
- bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
- if (IS_ERR(bip)) {
- ret = PTR_ERR(bip);
- goto out_free_meta;
- }
-
- bip->bip_iter.bi_size = meta_len;
- bip->bip_iter.bi_sector = meta_seed;
-
- ret = bio_integrity_add_page(bio, virt_to_page(meta),
- meta_len, offset_in_page(meta));
- if (ret != meta_len) {
- ret = -ENOMEM;
- goto out_free_meta;
- }
}
}
- submit:
+
blk_execute_rq(req->q, disk, req, 0);
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
ret = -EINTR;
@@ -534,36 +679,25 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
if (copy_to_user(meta_buffer, meta, meta_len))
ret = -EFAULT;
}
- out_free_meta:
kfree(meta);
out_unmap:
- if (bio) {
- if (disk && bio->bi_bdev)
- bdput(bio->bi_bdev);
+ if (bio)
blk_rq_unmap_user(bio);
- }
out:
blk_mq_free_request(req);
return ret;
}
-int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
- void __user *ubuffer, unsigned bufflen, u32 *result,
- unsigned timeout)
-{
- return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0,
- result, timeout);
-}
-
-static void nvme_keep_alive_end_io(struct request *rq, int error)
+static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
{
struct nvme_ctrl *ctrl = rq->end_io_data;
blk_mq_free_request(rq);
- if (error) {
+ if (status) {
dev_err(ctrl->device,
- "failed nvme_keep_alive_end_io error=%d\n", error);
+ "failed nvme_keep_alive_end_io error=%d\n",
+ status);
return;
}
@@ -599,7 +733,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
if (nvme_keep_alive(ctrl)) {
/* allocation failure, reset the controller */
dev_err(ctrl->device, "keep-alive failed\n");
- ctrl->ops->reset_ctrl(ctrl);
+ nvme_reset_ctrl(ctrl);
return;
}
}
@@ -623,7 +757,7 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
-int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
+static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
{
struct nvme_command c = { };
int error;
@@ -643,6 +777,78 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
return error;
}
+static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
+ u8 *eui64, u8 *nguid, uuid_t *uuid)
+{
+ struct nvme_command c = { };
+ int status;
+ void *data;
+ int pos;
+ int len;
+
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.nsid = cpu_to_le32(nsid);
+ c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
+
+ data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ status = nvme_submit_sync_cmd(ctrl->admin_q, &c, data,
+ NVME_IDENTIFY_DATA_SIZE);
+ if (status)
+ goto free_data;
+
+ for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
+ struct nvme_ns_id_desc *cur = data + pos;
+
+ if (cur->nidl == 0)
+ break;
+
+ switch (cur->nidt) {
+ case NVME_NIDT_EUI64:
+ if (cur->nidl != NVME_NIDT_EUI64_LEN) {
+ dev_warn(ctrl->device,
+ "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
+ cur->nidl);
+ goto free_data;
+ }
+ len = NVME_NIDT_EUI64_LEN;
+ memcpy(eui64, data + pos + sizeof(*cur), len);
+ break;
+ case NVME_NIDT_NGUID:
+ if (cur->nidl != NVME_NIDT_NGUID_LEN) {
+ dev_warn(ctrl->device,
+ "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
+ cur->nidl);
+ goto free_data;
+ }
+ len = NVME_NIDT_NGUID_LEN;
+ memcpy(nguid, data + pos + sizeof(*cur), len);
+ break;
+ case NVME_NIDT_UUID:
+ if (cur->nidl != NVME_NIDT_UUID_LEN) {
+ dev_warn(ctrl->device,
+ "ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
+ cur->nidl);
+ goto free_data;
+ }
+ len = NVME_NIDT_UUID_LEN;
+ uuid_copy(uuid, data + pos + sizeof(*cur));
+ break;
+ default:
+ /* Skip unnkown types */
+ len = cur->nidl;
+ break;
+ }
+
+ len += sizeof(*cur);
+ }
+free_data:
+ kfree(data);
+ return status;
+}
+
static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
{
struct nvme_command c = { };
@@ -653,9 +859,10 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
}
-int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
- struct nvme_id_ns **id)
+static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
+ unsigned nsid)
{
+ struct nvme_id_ns *id;
struct nvme_command c = { };
int error;
@@ -664,37 +871,21 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
c.identify.nsid = cpu_to_le32(nsid);
c.identify.cns = NVME_ID_CNS_NS;
- *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
- if (!*id)
- return -ENOMEM;
-
- error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
- sizeof(struct nvme_id_ns));
- if (error)
- kfree(*id);
- return error;
-}
+ id = kmalloc(sizeof(*id), GFP_KERNEL);
+ if (!id)
+ return NULL;
-int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
- void *buffer, size_t buflen, u32 *result)
-{
- struct nvme_command c;
- union nvme_result res;
- int ret;
-
- memset(&c, 0, sizeof(c));
- c.features.opcode = nvme_admin_get_features;
- c.features.nsid = cpu_to_le32(nsid);
- c.features.fid = cpu_to_le32(fid);
+ error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
+ if (error) {
+ dev_warn(ctrl->device, "Identify namespace failed\n");
+ kfree(id);
+ return NULL;
+ }
- ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0,
- NVME_QID_ANY, 0, 0);
- if (ret >= 0 && result)
- *result = le32_to_cpu(res.u32);
- return ret;
+ return id;
}
-int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
+static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
void *buffer, size_t buflen, u32 *result)
{
struct nvme_command c;
@@ -713,28 +904,6 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
return ret;
}
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
-{
- struct nvme_command c = { };
- int error;
-
- c.common.opcode = nvme_admin_get_log_page,
- c.common.nsid = cpu_to_le32(0xFFFFFFFF),
- c.common.cdw10[0] = cpu_to_le32(
- (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
- NVME_LOG_SMART),
-
- *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
- if (!*log)
- return -ENOMEM;
-
- error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
- sizeof(struct nvme_smart_log));
- if (error)
- kfree(*log);
- return error;
-}
-
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
{
u32 q_count = (*count - 1) | ((*count - 1) << 16);
@@ -752,7 +921,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
* access to the admin queue, as that might be only way to fix them up.
*/
if (status > 0) {
- dev_err(ctrl->dev, "Could not set queue count (%d)\n", status);
+ dev_err(ctrl->device, "Could not set queue count (%d)\n", status);
*count = 0;
} else {
nr_io_queues = min(result & 0xffff, result >> 16) + 1;
@@ -808,7 +977,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.apptag = cpu_to_le16(io.apptag);
c.rw.appmask = cpu_to_le16(io.appmask);
- return __nvme_submit_user_cmd(ns->queue, &c,
+ return nvme_submit_user_cmd(ns->queue, &c,
(void __user *)(uintptr_t)io.addr, length,
metadata, meta_len, io.slba, NULL, 0);
}
@@ -846,7 +1015,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
(void __user *)(uintptr_t)cmd.addr, cmd.data_len,
- &cmd.result, timeout);
+ (void __user *)(uintptr_t)cmd.metadata, cmd.metadata,
+ 0, &cmd.result, timeout);
if (status >= 0) {
if (put_user(cmd.result, &ucmd->result))
return -EFAULT;
@@ -870,12 +1040,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
case NVME_IOCTL_SUBMIT_IO:
return nvme_submit_io(ns, (void __user *)arg);
-#ifdef CONFIG_BLK_DEV_NVME_SCSI
- case SG_GET_VERSION_NUM:
- return nvme_sg_get_version_num((void __user *)arg);
- case SG_IO:
- return nvme_sg_io(ns, (void __user *)arg);
-#endif
default:
#ifdef CONFIG_NVM
if (ns->ndev)
@@ -892,10 +1056,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
- switch (cmd) {
- case SG_IO:
- return -ENOIOCTLCMD;
- }
return nvme_ioctl(bdev, mode, cmd, arg);
}
#else
@@ -983,6 +1143,12 @@ static void nvme_init_integrity(struct nvme_ns *ns)
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
+static void nvme_set_chunk_size(struct nvme_ns *ns)
+{
+ u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
+ blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
+}
+
static void nvme_config_discard(struct nvme_ns *ns)
{
struct nvme_ctrl *ctrl = ns->ctrl;
@@ -991,8 +1157,15 @@ static void nvme_config_discard(struct nvme_ns *ns)
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
- ns->queue->limits.discard_alignment = logical_block_size;
- ns->queue->limits.discard_granularity = logical_block_size;
+ if (ctrl->nr_streams && ns->sws && ns->sgs) {
+ unsigned int sz = logical_block_size * ns->sws * ns->sgs;
+
+ ns->queue->limits.discard_alignment = sz;
+ ns->queue->limits.discard_granularity = sz;
+ } else {
+ ns->queue->limits.discard_alignment = logical_block_size;
+ ns->queue->limits.discard_granularity = logical_block_size;
+ }
blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
@@ -1001,29 +1174,27 @@ static void nvme_config_discard(struct nvme_ns *ns)
blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX);
}
-static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
+static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
+ struct nvme_id_ns *id, u8 *eui64, u8 *nguid, uuid_t *uuid)
{
- if (nvme_identify_ns(ns->ctrl, ns->ns_id, id)) {
- dev_warn(ns->ctrl->dev, "%s: Identify failure\n", __func__);
- return -ENODEV;
- }
-
- if ((*id)->ncap == 0) {
- kfree(*id);
- return -ENODEV;
+ if (ctrl->vs >= NVME_VS(1, 1, 0))
+ memcpy(eui64, id->eui64, sizeof(id->eui64));
+ if (ctrl->vs >= NVME_VS(1, 2, 0))
+ memcpy(nguid, id->nguid, sizeof(id->nguid));
+ if (ctrl->vs >= NVME_VS(1, 3, 0)) {
+ /* Don't treat error as fatal we potentially
+ * already have a NGUID or EUI-64
+ */
+ if (nvme_identify_ns_descs(ctrl, nsid, eui64, nguid, uuid))
+ dev_warn(ctrl->device,
+ "%s: Identify Descriptors failed\n", __func__);
}
-
- if (ns->ctrl->vs >= NVME_VS(1, 1, 0))
- memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui));
- if (ns->ctrl->vs >= NVME_VS(1, 2, 0))
- memcpy(ns->uuid, (*id)->nguid, sizeof(ns->uuid));
-
- return 0;
}
static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
struct nvme_ns *ns = disk->private_data;
+ struct nvme_ctrl *ctrl = ns->ctrl;
u16 bs;
/*
@@ -1034,12 +1205,15 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->lba_shift == 0)
ns->lba_shift = 9;
bs = 1 << ns->lba_shift;
+ ns->noiob = le16_to_cpu(id->noiob);
blk_mq_freeze_queue(disk->queue);
- if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
+ if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
nvme_prep_integrity(disk, id, bs);
blk_queue_logical_block_size(ns->queue, bs);
+ if (ns->noiob)
+ nvme_set_chunk_size(ns);
if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
nvme_init_integrity(ns);
if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
@@ -1047,7 +1221,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
else
set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
- if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+ if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
nvme_config_discard(ns);
blk_mq_unfreeze_queue(disk->queue);
}
@@ -1055,22 +1229,38 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
static int nvme_revalidate_disk(struct gendisk *disk)
{
struct nvme_ns *ns = disk->private_data;
- struct nvme_id_ns *id = NULL;
- int ret;
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ struct nvme_id_ns *id;
+ u8 eui64[8] = { 0 }, nguid[16] = { 0 };
+ uuid_t uuid = uuid_null;
+ int ret = 0;
if (test_bit(NVME_NS_DEAD, &ns->flags)) {
set_capacity(disk, 0);
return -ENODEV;
}
- ret = nvme_revalidate_ns(ns, &id);
- if (ret)
- return ret;
+ id = nvme_identify_ns(ctrl, ns->ns_id);
+ if (!id)
+ return -ENODEV;
- __nvme_revalidate_disk(disk, id);
- kfree(id);
+ if (id->ncap == 0) {
+ ret = -ENODEV;
+ goto out;
+ }
- return 0;
+ nvme_report_ns_ids(ctrl, ns->ns_id, id, eui64, nguid, &uuid);
+ if (!uuid_equal(&ns->uuid, &uuid) ||
+ memcmp(&ns->nguid, &nguid, sizeof(ns->nguid)) ||
+ memcmp(&ns->eui, &eui64, sizeof(ns->eui))) {
+ dev_err(ctrl->device,
+ "identifiers changed for nsid %d\n", ns->ns_id);
+ ret = -ENODEV;
+ }
+
+out:
+ kfree(id);
+ return ret;
}
static char nvme_pr_type(enum pr_type type)
@@ -1270,7 +1460,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
ctrl->ctrl_config = NVME_CC_CSS_NVM;
ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
- ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+ ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
ctrl->ctrl_config |= NVME_CC_ENABLE;
@@ -1283,7 +1473,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
{
- unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
+ unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ);
u32 csts;
int ret;
@@ -1332,7 +1522,24 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
blk_queue_write_cache(q, vwc, vwc);
}
-static void nvme_configure_apst(struct nvme_ctrl *ctrl)
+static int nvme_configure_timestamp(struct nvme_ctrl *ctrl)
+{
+ __le64 ts;
+ int ret;
+
+ if (!(ctrl->oncs & NVME_CTRL_ONCS_TIMESTAMP))
+ return 0;
+
+ ts = cpu_to_le64(ktime_to_ms(ktime_get_real()));
+ ret = nvme_set_features(ctrl, NVME_FEAT_TIMESTAMP, 0, &ts, sizeof(ts),
+ NULL);
+ if (ret)
+ dev_warn_once(ctrl->device,
+ "could not set timestamp (%d)\n", ret);
+ return ret;
+}
+
+static int nvme_configure_apst(struct nvme_ctrl *ctrl)
{
/*
* APST (Autonomous Power State Transition) lets us program a
@@ -1361,18 +1568,18 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
* then don't do anything.
*/
if (!ctrl->apsta)
- return;
+ return 0;
if (ctrl->npss > 31) {
dev_warn(ctrl->device, "NPSS is invalid; not using APST\n");
- return;
+ return 0;
}
table = kzalloc(sizeof(*table), GFP_KERNEL);
if (!table)
- return;
+ return 0;
- if (ctrl->ps_max_latency_us == 0) {
+ if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) {
/* Turn off APST. */
apste = 0;
dev_dbg(ctrl->device, "APST disabled\n");
@@ -1452,6 +1659,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret);
kfree(table);
+ return ret;
}
static void nvme_set_latency_tolerance(struct device *dev, s32 val)
@@ -1528,6 +1736,31 @@ static bool quirk_matches(const struct nvme_id_ctrl *id,
string_matches(id->fr, q->fr, sizeof(id->fr));
}
+static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+{
+ size_t nqnlen;
+ int off;
+
+ nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
+ if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
+ strcpy(ctrl->subnqn, id->subnqn);
+ return;
+ }
+
+ if (ctrl->vs >= NVME_VS(1, 2, 1))
+ dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
+
+ /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
+ off = snprintf(ctrl->subnqn, NVMF_NQN_SIZE,
+ "nqn.2014.08.org.nvmexpress:%4x%4x",
+ le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
+ memcpy(ctrl->subnqn + off, id->sn, sizeof(id->sn));
+ off += sizeof(id->sn);
+ memcpy(ctrl->subnqn + off, id->mn, sizeof(id->mn));
+ off += sizeof(id->mn);
+ memset(ctrl->subnqn + off, 0, sizeof(ctrl->subnqn) - off);
+}
+
/*
* Initialize the cached copies of the Identify data and various controller
* register in our nvme_ctrl structure. This should be called as soon as
@@ -1539,7 +1772,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
u64 cap;
int ret, page_shift;
u32 max_hw_sectors;
- u8 prev_apsta;
+ bool prev_apst_enabled;
ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
if (ret) {
@@ -1563,6 +1796,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
return -EIO;
}
+ nvme_init_subnqn(ctrl, id);
+
if (!ctrl->identified) {
/*
* Check for quirks. Quirk can depend on firmware version,
@@ -1582,7 +1817,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
}
if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
- dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
+ dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
}
@@ -1606,17 +1841,32 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas);
+ if (id->rtd3e) {
+ /* us -> s */
+ u32 transition_time = le32_to_cpu(id->rtd3e) / 1000000;
+
+ ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time,
+ shutdown_timeout, 60);
+
+ if (ctrl->shutdown_timeout != shutdown_timeout)
+ dev_warn(ctrl->device,
+ "Shutdown timeout set to %u seconds\n",
+ ctrl->shutdown_timeout);
+ } else
+ ctrl->shutdown_timeout = shutdown_timeout;
+
ctrl->npss = id->npss;
- prev_apsta = ctrl->apsta;
+ ctrl->apsta = id->apsta;
+ prev_apst_enabled = ctrl->apst_enabled;
if (ctrl->quirks & NVME_QUIRK_NO_APST) {
if (force_apst && id->apsta) {
- dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
- ctrl->apsta = 1;
+ dev_warn(ctrl->device, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
+ ctrl->apst_enabled = true;
} else {
- ctrl->apsta = 0;
+ ctrl->apst_enabled = false;
}
} else {
- ctrl->apsta = id->apsta;
+ ctrl->apst_enabled = id->apsta;
}
memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
@@ -1630,29 +1880,50 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
* In fabrics we need to verify the cntlid matches the
* admin connect
*/
- if (ctrl->cntlid != le16_to_cpu(id->cntlid))
+ if (ctrl->cntlid != le16_to_cpu(id->cntlid)) {
ret = -EINVAL;
+ goto out_free;
+ }
if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
- dev_err(ctrl->dev,
+ dev_err(ctrl->device,
"keep-alive support is mandatory for fabrics\n");
ret = -EINVAL;
+ goto out_free;
}
} else {
ctrl->cntlid = le16_to_cpu(id->cntlid);
+ ctrl->hmpre = le32_to_cpu(id->hmpre);
+ ctrl->hmmin = le32_to_cpu(id->hmmin);
+ ctrl->hmminds = le32_to_cpu(id->hmminds);
+ ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
}
kfree(id);
- if (ctrl->apsta && !prev_apsta)
+ if (ctrl->apst_enabled && !prev_apst_enabled)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
- else if (!ctrl->apsta && prev_apsta)
+ else if (!ctrl->apst_enabled && prev_apst_enabled)
dev_pm_qos_hide_latency_tolerance(ctrl->device);
- nvme_configure_apst(ctrl);
+ ret = nvme_configure_apst(ctrl);
+ if (ret < 0)
+ return ret;
+
+ ret = nvme_configure_timestamp(ctrl);
+ if (ret < 0)
+ return ret;
+
+ ret = nvme_configure_directives(ctrl);
+ if (ret < 0)
+ return ret;
ctrl->identified = true;
+ return 0;
+
+out_free:
+ kfree(id);
return ret;
}
EXPORT_SYMBOL_GPL(nvme_init_identify);
@@ -1735,7 +2006,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
return nvme_dev_user_cmd(ctrl, argp);
case NVME_IOCTL_RESET:
dev_warn(ctrl->device, "resetting controller\n");
- return ctrl->ops->reset_ctrl(ctrl);
+ return nvme_reset_ctrl_sync(ctrl);
case NVME_IOCTL_SUBSYS_RESET:
return nvme_reset_subsystem(ctrl);
case NVME_IOCTL_RESCAN:
@@ -1761,7 +2032,7 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
int ret;
- ret = ctrl->ops->reset_ctrl(ctrl);
+ ret = nvme_reset_ctrl_sync(ctrl);
if (ret < 0)
return ret;
return count;
@@ -1787,15 +2058,20 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
int serial_len = sizeof(ctrl->serial);
int model_len = sizeof(ctrl->model);
- if (memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
- return sprintf(buf, "eui.%16phN\n", ns->uuid);
+ if (!uuid_is_null(&ns->uuid))
+ return sprintf(buf, "uuid.%pU\n", &ns->uuid);
+
+ if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+ return sprintf(buf, "eui.%16phN\n", ns->nguid);
if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
return sprintf(buf, "eui.%8phN\n", ns->eui);
- while (ctrl->serial[serial_len - 1] == ' ')
+ while (serial_len > 0 && (ctrl->serial[serial_len - 1] == ' ' ||
+ ctrl->serial[serial_len - 1] == '\0'))
serial_len--;
- while (ctrl->model[model_len - 1] == ' ')
+ while (model_len > 0 && (ctrl->model[model_len - 1] == ' ' ||
+ ctrl->model[model_len - 1] == '\0'))
model_len--;
return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", ctrl->vid,
@@ -1803,11 +2079,28 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL);
+static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+ return sprintf(buf, "%pU\n", ns->nguid);
+}
+static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL);
+
static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
- return sprintf(buf, "%pU\n", ns->uuid);
+
+ /* For backward compatibility expose the NGUID to userspace if
+ * we have no UUID set
+ */
+ if (uuid_is_null(&ns->uuid)) {
+ printk_ratelimited(KERN_WARNING
+ "No UUID available providing old NGUID\n");
+ return sprintf(buf, "%pU\n", ns->nguid);
+ }
+ return sprintf(buf, "%pU\n", &ns->uuid);
}
static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
@@ -1830,6 +2123,7 @@ static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
static struct attribute *nvme_ns_attrs[] = {
&dev_attr_wwid.attr,
&dev_attr_uuid.attr,
+ &dev_attr_nguid.attr,
&dev_attr_eui.attr,
&dev_attr_nsid.attr,
NULL,
@@ -1842,7 +2136,12 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
if (a == &dev_attr_uuid.attr) {
- if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
+ if (uuid_is_null(&ns->uuid) &&
+ !memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+ return 0;
+ }
+ if (a == &dev_attr_nguid.attr) {
+ if (!memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
return 0;
}
if (a == &dev_attr_eui.attr) {
@@ -1931,8 +2230,7 @@ static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- return snprintf(buf, PAGE_SIZE, "%s\n",
- ctrl->ops->get_subsysnqn(ctrl));
+ return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subnqn);
}
static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
@@ -1961,24 +2259,16 @@ static struct attribute *nvme_dev_attrs[] = {
NULL
};
-#define CHECK_ATTR(ctrl, a, name) \
- if ((a) == &dev_attr_##name.attr && \
- !(ctrl)->ops->get_##name) \
- return 0
-
static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
struct attribute *a, int n)
{
struct device *dev = container_of(kobj, struct device, kobj);
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- if (a == &dev_attr_delete_controller.attr) {
- if (!ctrl->ops->delete_ctrl)
- return 0;
- }
-
- CHECK_ATTR(ctrl, a, subsysnqn);
- CHECK_ATTR(ctrl, a, address);
+ if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl)
+ return 0;
+ if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
+ return 0;
return a->mode;
}
@@ -2019,6 +2309,32 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
return ret;
}
+static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
+{
+ struct streams_directive_params s;
+ int ret;
+
+ if (!ctrl->nr_streams)
+ return 0;
+
+ ret = nvme_get_stream_params(ctrl, &s, ns->ns_id);
+ if (ret)
+ return ret;
+
+ ns->sws = le32_to_cpu(s.sws);
+ ns->sgs = le16_to_cpu(s.sgs);
+
+ if (ns->sws) {
+ unsigned int bs = 1 << ns->lba_shift;
+
+ blk_queue_io_min(ns->queue, bs * ns->sws);
+ if (ns->sgs)
+ blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
+ }
+
+ return 0;
+}
+
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns;
@@ -2048,16 +2364,24 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
nvme_set_queue_limits(ctrl, ns->queue);
+ nvme_setup_streams_ns(ctrl, ns);
sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
- if (nvme_revalidate_ns(ns, &id))
+ id = nvme_identify_ns(ctrl, nsid);
+ if (!id)
goto out_free_queue;
- if (nvme_nvm_ns_supported(ns, id) &&
- nvme_nvm_register(ns, disk_name, node)) {
- dev_warn(ctrl->dev, "%s: LightNVM init failure\n", __func__);
+ if (id->ncap == 0)
goto out_free_id;
+
+ nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid);
+
+ if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
+ if (nvme_nvm_register(ns, disk_name, node)) {
+ dev_warn(ctrl->device, "LightNVM init failure\n");
+ goto out_free_id;
+ }
}
disk = alloc_disk_node(0, node);
@@ -2231,7 +2555,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
* removal.
*/
if (ctrl->state == NVME_CTRL_LIVE)
- schedule_work(&ctrl->scan_work);
+ queue_work(nvme_wq, &ctrl->scan_work);
}
EXPORT_SYMBOL_GPL(nvme_queue_scan);
@@ -2264,7 +2588,7 @@ static void nvme_async_event_work(struct work_struct *work)
container_of(work, struct nvme_ctrl, async_event_work);
spin_lock_irq(&ctrl->lock);
- while (ctrl->event_limit > 0) {
+ while (ctrl->state == NVME_CTRL_LIVE && ctrl->event_limit > 0) {
int aer_idx = --ctrl->event_limit;
spin_unlock_irq(&ctrl->lock);
@@ -2274,6 +2598,71 @@ static void nvme_async_event_work(struct work_struct *work)
spin_unlock_irq(&ctrl->lock);
}
+static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
+{
+
+ u32 csts;
+
+ if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
+ return false;
+
+ if (csts == ~0)
+ return false;
+
+ return ((ctrl->ctrl_config & NVME_CC_ENABLE) && (csts & NVME_CSTS_PP));
+}
+
+static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
+{
+ struct nvme_command c = { };
+ struct nvme_fw_slot_info_log *log;
+
+ log = kmalloc(sizeof(*log), GFP_KERNEL);
+ if (!log)
+ return;
+
+ c.common.opcode = nvme_admin_get_log_page;
+ c.common.nsid = cpu_to_le32(NVME_NSID_ALL);
+ c.common.cdw10[0] = nvme_get_log_dw10(NVME_LOG_FW_SLOT, sizeof(*log));
+
+ if (!nvme_submit_sync_cmd(ctrl->admin_q, &c, log, sizeof(*log)))
+ dev_warn(ctrl->device,
+ "Get FW SLOT INFO log error\n");
+ kfree(log);
+}
+
+static void nvme_fw_act_work(struct work_struct *work)
+{
+ struct nvme_ctrl *ctrl = container_of(work,
+ struct nvme_ctrl, fw_act_work);
+ unsigned long fw_act_timeout;
+
+ if (ctrl->mtfa)
+ fw_act_timeout = jiffies +
+ msecs_to_jiffies(ctrl->mtfa * 100);
+ else
+ fw_act_timeout = jiffies +
+ msecs_to_jiffies(admin_timeout * 1000);
+
+ nvme_stop_queues(ctrl);
+ while (nvme_ctrl_pp_status(ctrl)) {
+ if (time_after(jiffies, fw_act_timeout)) {
+ dev_warn(ctrl->device,
+ "Fw activation timeout, reset controller\n");
+ nvme_reset_ctrl(ctrl);
+ break;
+ }
+ msleep(100);
+ }
+
+ if (ctrl->state != NVME_CTRL_LIVE)
+ return;
+
+ nvme_start_queues(ctrl);
+ /* read FW slot informationi to clear the AER*/
+ nvme_get_fw_slot_info(ctrl);
+}
+
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
union nvme_result *res)
{
@@ -2286,7 +2675,8 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
/*FALLTHRU*/
case NVME_SC_ABORT_REQ:
++ctrl->event_limit;
- schedule_work(&ctrl->async_event_work);
+ if (ctrl->state == NVME_CTRL_LIVE)
+ queue_work(nvme_wq, &ctrl->async_event_work);
break;
default:
break;
@@ -2300,6 +2690,9 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
dev_info(ctrl->device, "rescanning\n");
nvme_queue_scan(ctrl);
break;
+ case NVME_AER_NOTICE_FW_ACT_STARTING:
+ queue_work(nvme_wq, &ctrl->fw_act_work);
+ break;
default:
dev_warn(ctrl->device, "async event result %08x\n", result);
}
@@ -2309,7 +2702,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
void nvme_queue_async_events(struct nvme_ctrl *ctrl)
{
ctrl->event_limit = NVME_NR_AERS;
- schedule_work(&ctrl->async_event_work);
+ queue_work(nvme_wq, &ctrl->async_event_work);
}
EXPORT_SYMBOL_GPL(nvme_queue_async_events);
@@ -2342,12 +2735,30 @@ static void nvme_release_instance(struct nvme_ctrl *ctrl)
spin_unlock(&dev_list_lock);
}
-void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
{
+ nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work);
flush_work(&ctrl->scan_work);
- nvme_remove_namespaces(ctrl);
+ cancel_work_sync(&ctrl->fw_act_work);
+}
+EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
+
+void nvme_start_ctrl(struct nvme_ctrl *ctrl)
+{
+ if (ctrl->kato)
+ nvme_start_keep_alive(ctrl);
+
+ if (ctrl->queue_count > 1) {
+ nvme_queue_scan(ctrl);
+ nvme_queue_async_events(ctrl);
+ nvme_start_queues(ctrl);
+ }
+}
+EXPORT_SYMBOL_GPL(nvme_start_ctrl);
+void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+{
device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
spin_lock(&dev_list_lock);
@@ -2393,6 +2804,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->quirks = quirks;
INIT_WORK(&ctrl->scan_work, nvme_scan_work);
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
+ INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
ret = nvme_set_instance(ctrl);
if (ret)
@@ -2442,8 +2854,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
mutex_lock(&ctrl->namespaces_mutex);
- /* Forcibly start all queues to avoid having stuck requests */
- blk_mq_start_hw_queues(ctrl->admin_q);
+ /* Forcibly unquiesce queues to avoid blocking dispatch */
+ if (ctrl->admin_q)
+ blk_mq_unquiesce_queue(ctrl->admin_q);
list_for_each_entry(ns, &ctrl->namespaces, list) {
/*
@@ -2455,15 +2868,8 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
revalidate_disk(ns->disk);
blk_set_queue_dying(ns->queue);
- /*
- * Forcibly start all queues to avoid having stuck requests.
- * Note that we must ensure the queues are not stopped
- * when the final removal happens.
- */
- blk_mq_start_hw_queues(ns->queue);
-
- /* draining requests in requeue list */
- blk_mq_kick_requeue_list(ns->queue);
+ /* Forcibly unquiesce queues to avoid blocking dispatch */
+ blk_mq_unquiesce_queue(ns->queue);
}
mutex_unlock(&ctrl->namespaces_mutex);
}
@@ -2532,10 +2938,8 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
struct nvme_ns *ns;
mutex_lock(&ctrl->namespaces_mutex);
- list_for_each_entry(ns, &ctrl->namespaces, list) {
- blk_mq_start_stopped_hw_queues(ns->queue, true);
- blk_mq_kick_requeue_list(ns->queue);
- }
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ blk_mq_unquiesce_queue(ns->queue);
mutex_unlock(&ctrl->namespaces_mutex);
}
EXPORT_SYMBOL_GPL(nvme_start_queues);
@@ -2544,10 +2948,15 @@ int __init nvme_core_init(void)
{
int result;
+ nvme_wq = alloc_workqueue("nvme-wq",
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ if (!nvme_wq)
+ return -ENOMEM;
+
result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
&nvme_dev_fops);
if (result < 0)
- return result;
+ goto destroy_wq;
else if (result > 0)
nvme_char_major = result;
@@ -2559,8 +2968,10 @@ int __init nvme_core_init(void)
return 0;
- unregister_chrdev:
+unregister_chrdev:
__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+destroy_wq:
+ destroy_workqueue(nvme_wq);
return result;
}
@@ -2568,6 +2979,7 @@ void nvme_core_exit(void)
{
class_destroy(nvme_class);
__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+ destroy_workqueue(nvme_wq);
}
MODULE_LICENSE("GPL");