summaryrefslogtreecommitdiffstats
path: root/drivers/nvme
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/core.c350
-rw-r--r--drivers/nvme/host/fabrics.c23
-rw-r--r--drivers/nvme/host/fc.c149
-rw-r--r--drivers/nvme/host/lightnvm.c15
-rw-r--r--drivers/nvme/host/nvme.h17
-rw-r--r--drivers/nvme/host/pci.c31
-rw-r--r--drivers/nvme/host/rdma.c603
-rw-r--r--drivers/nvme/target/admin-cmd.c33
-rw-r--r--drivers/nvme/target/configfs.c2
-rw-r--r--drivers/nvme/target/core.c15
-rw-r--r--drivers/nvme/target/fabrics-cmd.c1
-rw-r--r--drivers/nvme/target/fc.c48
-rw-r--r--drivers/nvme/target/fcloop.c3
-rw-r--r--drivers/nvme/target/io-cmd.c6
-rw-r--r--drivers/nvme/target/loop.c1
-rw-r--r--drivers/nvme/target/nvmet.h1
-rw-r--r--drivers/nvme/target/rdma.c5
17 files changed, 741 insertions, 562 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 37046ac2c441..277a7a02cba5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -76,6 +76,11 @@ static DEFINE_SPINLOCK(dev_list_lock);
static struct class *nvme_class;
+static __le32 nvme_get_log_dw10(u8 lid, size_t size)
+{
+ return cpu_to_le32((((size / 4) - 1) << 16) | lid);
+}
+
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@ -108,7 +113,16 @@ static blk_status_t nvme_error_status(struct request *req)
case NVME_SC_WRITE_FAULT:
case NVME_SC_READ_ERROR:
case NVME_SC_UNWRITTEN_BLOCK:
+ case NVME_SC_ACCESS_DENIED:
+ case NVME_SC_READ_ONLY:
return BLK_STS_MEDIUM;
+ case NVME_SC_GUARD_CHECK:
+ case NVME_SC_APPTAG_CHECK:
+ case NVME_SC_REFTAG_CHECK:
+ case NVME_SC_INVALID_PI:
+ return BLK_STS_PROTECTION;
+ case NVME_SC_RESERVATION_CONFLICT:
+ return BLK_STS_NEXUS;
default:
return BLK_STS_IOERR;
}
@@ -162,9 +176,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
enum nvme_ctrl_state new_state)
{
enum nvme_ctrl_state old_state;
+ unsigned long flags;
bool changed = false;
- spin_lock_irq(&ctrl->lock);
+ spin_lock_irqsave(&ctrl->lock, flags);
old_state = ctrl->state;
switch (new_state) {
@@ -225,7 +240,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
if (changed)
ctrl->state = new_state;
- spin_unlock_irq(&ctrl->lock);
+ spin_unlock_irqrestore(&ctrl->lock, flags);
return changed;
}
@@ -307,7 +322,7 @@ static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
memset(&c, 0, sizeof(c));
c.directive.opcode = nvme_admin_directive_send;
- c.directive.nsid = cpu_to_le32(0xffffffff);
+ c.directive.nsid = cpu_to_le32(NVME_NSID_ALL);
c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
c.directive.dtype = NVME_DIR_IDENTIFY;
c.directive.tdtype = NVME_DIR_STREAMS;
@@ -357,7 +372,7 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)
if (ret)
return ret;
- ret = nvme_get_stream_params(ctrl, &s, 0xffffffff);
+ ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL);
if (ret)
return ret;
@@ -585,10 +600,44 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
}
EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
-int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
- void __user *ubuffer, unsigned bufflen,
- void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
- u32 *result, unsigned timeout)
+static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
+ unsigned len, u32 seed, bool write)
+{
+ struct bio_integrity_payload *bip;
+ int ret = -ENOMEM;
+ void *buf;
+
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf)
+ goto out;
+
+ ret = -EFAULT;
+ if (write && copy_from_user(buf, ubuf, len))
+ goto out_free_meta;
+
+ bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
+ if (IS_ERR(bip)) {
+ ret = PTR_ERR(bip);
+ goto out_free_meta;
+ }
+
+ bip->bip_iter.bi_size = len;
+ bip->bip_iter.bi_sector = seed;
+ ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
+ offset_in_page(buf));
+ if (ret == len)
+ return buf;
+ ret = -ENOMEM;
+out_free_meta:
+ kfree(buf);
+out:
+ return ERR_PTR(ret);
+}
+
+static int nvme_submit_user_cmd(struct request_queue *q,
+ struct nvme_command *cmd, void __user *ubuffer,
+ unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
+ u32 meta_seed, u32 *result, unsigned timeout)
{
bool write = nvme_is_write(cmd);
struct nvme_ns *ns = q->queuedata;
@@ -610,50 +659,17 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
if (ret)
goto out;
bio = req->bio;
-
- if (!disk)
- goto submit;
- bio->bi_bdev = bdget_disk(disk, 0);
- if (!bio->bi_bdev) {
- ret = -ENODEV;
- goto out_unmap;
- }
-
- if (meta_buffer && meta_len) {
- struct bio_integrity_payload *bip;
-
- meta = kmalloc(meta_len, GFP_KERNEL);
- if (!meta) {
- ret = -ENOMEM;
+ bio->bi_disk = disk;
+ if (disk && meta_buffer && meta_len) {
+ meta = nvme_add_user_metadata(bio, meta_buffer, meta_len,
+ meta_seed, write);
+ if (IS_ERR(meta)) {
+ ret = PTR_ERR(meta);
goto out_unmap;
}
-
- if (write) {
- if (copy_from_user(meta, meta_buffer,
- meta_len)) {
- ret = -EFAULT;
- goto out_free_meta;
- }
- }
-
- bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
- if (IS_ERR(bip)) {
- ret = PTR_ERR(bip);
- goto out_free_meta;
- }
-
- bip->bip_iter.bi_size = meta_len;
- bip->bip_iter.bi_sector = meta_seed;
-
- ret = bio_integrity_add_page(bio, virt_to_page(meta),
- meta_len, offset_in_page(meta));
- if (ret != meta_len) {
- ret = -ENOMEM;
- goto out_free_meta;
- }
}
}
- submit:
+
blk_execute_rq(req->q, disk, req, 0);
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
ret = -EINTR;
@@ -665,27 +681,15 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
if (copy_to_user(meta_buffer, meta, meta_len))
ret = -EFAULT;
}
- out_free_meta:
kfree(meta);
out_unmap:
- if (bio) {
- if (disk && bio->bi_bdev)
- bdput(bio->bi_bdev);
+ if (bio)
blk_rq_unmap_user(bio);
- }
out:
blk_mq_free_request(req);
return ret;
}
-int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
- void __user *ubuffer, unsigned bufflen, u32 *result,
- unsigned timeout)
-{
- return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0,
- result, timeout);
-}
-
static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
{
struct nvme_ctrl *ctrl = rq->end_io_data;
@@ -775,7 +779,8 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
return error;
}
-static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
+static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
+ u8 *eui64, u8 *nguid, uuid_t *uuid)
{
struct nvme_command c = { };
int status;
@@ -791,7 +796,7 @@ static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
if (!data)
return -ENOMEM;
- status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data,
+ status = nvme_submit_sync_cmd(ctrl->admin_q, &c, data,
NVME_IDENTIFY_DATA_SIZE);
if (status)
goto free_data;
@@ -805,33 +810,33 @@ static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
switch (cur->nidt) {
case NVME_NIDT_EUI64:
if (cur->nidl != NVME_NIDT_EUI64_LEN) {
- dev_warn(ns->ctrl->device,
+ dev_warn(ctrl->device,
"ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
cur->nidl);
goto free_data;
}
len = NVME_NIDT_EUI64_LEN;
- memcpy(ns->eui, data + pos + sizeof(*cur), len);
+ memcpy(eui64, data + pos + sizeof(*cur), len);
break;
case NVME_NIDT_NGUID:
if (cur->nidl != NVME_NIDT_NGUID_LEN) {
- dev_warn(ns->ctrl->device,
+ dev_warn(ctrl->device,
"ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
cur->nidl);
goto free_data;
}
len = NVME_NIDT_NGUID_LEN;
- memcpy(ns->nguid, data + pos + sizeof(*cur), len);
+ memcpy(nguid, data + pos + sizeof(*cur), len);
break;
case NVME_NIDT_UUID:
if (cur->nidl != NVME_NIDT_UUID_LEN) {
- dev_warn(ns->ctrl->device,
+ dev_warn(ctrl->device,
"ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
cur->nidl);
goto free_data;
}
len = NVME_NIDT_UUID_LEN;
- uuid_copy(&ns->uuid, data + pos + sizeof(*cur));
+ uuid_copy(uuid, data + pos + sizeof(*cur));
break;
default:
/* Skip unnkown types */
@@ -856,9 +861,10 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
}
-static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
- struct nvme_id_ns **id)
+static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
+ unsigned nsid)
{
+ struct nvme_id_ns *id;
struct nvme_command c = { };
int error;
@@ -867,15 +873,18 @@ static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
c.identify.nsid = cpu_to_le32(nsid);
c.identify.cns = NVME_ID_CNS_NS;
- *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
- if (!*id)
- return -ENOMEM;
+ id = kmalloc(sizeof(*id), GFP_KERNEL);
+ if (!id)
+ return NULL;
- error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
- sizeof(struct nvme_id_ns));
- if (error)
- kfree(*id);
- return error;
+ error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
+ if (error) {
+ dev_warn(ctrl->device, "Identify namespace failed\n");
+ kfree(id);
+ return NULL;
+ }
+
+ return id;
}
static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
@@ -970,7 +979,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.apptag = cpu_to_le16(io.apptag);
c.rw.appmask = cpu_to_le16(io.appmask);
- return __nvme_submit_user_cmd(ns->queue, &c,
+ return nvme_submit_user_cmd(ns->queue, &c,
(void __user *)(uintptr_t)io.addr, length,
metadata, meta_len, io.slba, NULL, 0);
}
@@ -1008,7 +1017,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
(void __user *)(uintptr_t)cmd.addr, cmd.data_len,
- &cmd.result, timeout);
+ (void __user *)(uintptr_t)cmd.metadata, cmd.metadata,
+ 0, &cmd.result, timeout);
if (status >= 0) {
if (put_user(cmd.result, &ucmd->result))
return -EFAULT;
@@ -1166,32 +1176,21 @@ static void nvme_config_discard(struct nvme_ns *ns)
blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX);
}
-static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
+static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
+ struct nvme_id_ns *id, u8 *eui64, u8 *nguid, uuid_t *uuid)
{
- if (nvme_identify_ns(ns->ctrl, ns->ns_id, id)) {
- dev_warn(ns->ctrl->dev, "%s: Identify failure\n", __func__);
- return -ENODEV;
- }
-
- if ((*id)->ncap == 0) {
- kfree(*id);
- return -ENODEV;
- }
-
- if (ns->ctrl->vs >= NVME_VS(1, 1, 0))
- memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui));
- if (ns->ctrl->vs >= NVME_VS(1, 2, 0))
- memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid));
- if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) {
+ if (ctrl->vs >= NVME_VS(1, 1, 0))
+ memcpy(eui64, id->eui64, sizeof(id->eui64));
+ if (ctrl->vs >= NVME_VS(1, 2, 0))
+ memcpy(nguid, id->nguid, sizeof(id->nguid));
+ if (ctrl->vs >= NVME_VS(1, 3, 0)) {
/* Don't treat error as fatal we potentially
* already have a NGUID or EUI-64
*/
- if (nvme_identify_ns_descs(ns, ns->ns_id))
- dev_warn(ns->ctrl->device,
+ if (nvme_identify_ns_descs(ctrl, nsid, eui64, nguid, uuid))
+ dev_warn(ctrl->device,
"%s: Identify Descriptors failed\n", __func__);
}
-
- return 0;
}
static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
@@ -1232,22 +1231,38 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
static int nvme_revalidate_disk(struct gendisk *disk)
{
struct nvme_ns *ns = disk->private_data;
- struct nvme_id_ns *id = NULL;
- int ret;
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ struct nvme_id_ns *id;
+ u8 eui64[8] = { 0 }, nguid[16] = { 0 };
+ uuid_t uuid = uuid_null;
+ int ret = 0;
if (test_bit(NVME_NS_DEAD, &ns->flags)) {
set_capacity(disk, 0);
return -ENODEV;
}
- ret = nvme_revalidate_ns(ns, &id);
- if (ret)
- return ret;
+ id = nvme_identify_ns(ctrl, ns->ns_id);
+ if (!id)
+ return -ENODEV;
- __nvme_revalidate_disk(disk, id);
- kfree(id);
+ if (id->ncap == 0) {
+ ret = -ENODEV;
+ goto out;
+ }
- return 0;
+ nvme_report_ns_ids(ctrl, ns->ns_id, id, eui64, nguid, &uuid);
+ if (!uuid_equal(&ns->uuid, &uuid) ||
+ memcmp(&ns->nguid, &nguid, sizeof(ns->nguid)) ||
+ memcmp(&ns->eui, &eui64, sizeof(ns->eui))) {
+ dev_err(ctrl->device,
+ "identifiers changed for nsid %d\n", ns->ns_id);
+ ret = -ENODEV;
+ }
+
+out:
+ kfree(id);
+ return ret;
}
static char nvme_pr_type(enum pr_type type)
@@ -1447,7 +1462,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
ctrl->ctrl_config = NVME_CC_CSS_NVM;
ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
- ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+ ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
ctrl->ctrl_config |= NVME_CC_ENABLE;
@@ -1460,7 +1475,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
{
- unsigned long timeout = jiffies + (shutdown_timeout * HZ);
+ unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ);
u32 csts;
int ret;
@@ -1509,6 +1524,23 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
blk_queue_write_cache(q, vwc, vwc);
}
+static int nvme_configure_timestamp(struct nvme_ctrl *ctrl)
+{
+ __le64 ts;
+ int ret;
+
+ if (!(ctrl->oncs & NVME_CTRL_ONCS_TIMESTAMP))
+ return 0;
+
+ ts = cpu_to_le64(ktime_to_ms(ktime_get_real()));
+ ret = nvme_set_features(ctrl, NVME_FEAT_TIMESTAMP, 0, &ts, sizeof(ts),
+ NULL);
+ if (ret)
+ dev_warn_once(ctrl->device,
+ "could not set timestamp (%d)\n", ret);
+ return ret;
+}
+
static int nvme_configure_apst(struct nvme_ctrl *ctrl)
{
/*
@@ -1811,6 +1843,20 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas);
+ if (id->rtd3e) {
+ /* us -> s */
+ u32 transition_time = le32_to_cpu(id->rtd3e) / 1000000;
+
+ ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time,
+ shutdown_timeout, 60);
+
+ if (ctrl->shutdown_timeout != shutdown_timeout)
+ dev_warn(ctrl->device,
+ "Shutdown timeout set to %u seconds\n",
+ ctrl->shutdown_timeout);
+ } else
+ ctrl->shutdown_timeout = shutdown_timeout;
+
ctrl->npss = id->npss;
ctrl->apsta = id->apsta;
prev_apst_enabled = ctrl->apst_enabled;
@@ -1863,6 +1909,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ret = nvme_configure_apst(ctrl);
if (ret < 0)
return ret;
+
+ ret = nvme_configure_timestamp(ctrl);
+ if (ret < 0)
+ return ret;
ret = nvme_configure_directives(ctrl);
if (ret < 0)
@@ -2318,9 +2368,15 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
- if (nvme_revalidate_ns(ns, &id))
+ id = nvme_identify_ns(ctrl, nsid);
+ if (!id)
goto out_free_queue;
+ if (id->ncap == 0)
+ goto out_free_id;
+
+ nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid);
+
if (nvme_nvm_ns_supported(ns, id) &&
nvme_nvm_register(ns, disk_name, node)) {
dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
@@ -2541,6 +2597,71 @@ static void nvme_async_event_work(struct work_struct *work)
spin_unlock_irq(&ctrl->lock);
}
+static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
+{
+
+ u32 csts;
+
+ if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
+ return false;
+
+ if (csts == ~0)
+ return false;
+
+ return ((ctrl->ctrl_config & NVME_CC_ENABLE) && (csts & NVME_CSTS_PP));
+}
+
+static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
+{
+ struct nvme_command c = { };
+ struct nvme_fw_slot_info_log *log;
+
+ log = kmalloc(sizeof(*log), GFP_KERNEL);
+ if (!log)
+ return;
+
+ c.common.opcode = nvme_admin_get_log_page;
+ c.common.nsid = cpu_to_le32(NVME_NSID_ALL);
+ c.common.cdw10[0] = nvme_get_log_dw10(NVME_LOG_FW_SLOT, sizeof(*log));
+
+ if (!nvme_submit_sync_cmd(ctrl->admin_q, &c, log, sizeof(*log)))
+ dev_warn(ctrl->device,
+ "Get FW SLOT INFO log error\n");
+ kfree(log);
+}
+
+static void nvme_fw_act_work(struct work_struct *work)
+{
+ struct nvme_ctrl *ctrl = container_of(work,
+ struct nvme_ctrl, fw_act_work);
+ unsigned long fw_act_timeout;
+
+ if (ctrl->mtfa)
+ fw_act_timeout = jiffies +
+ msecs_to_jiffies(ctrl->mtfa * 100);
+ else
+ fw_act_timeout = jiffies +
+ msecs_to_jiffies(admin_timeout * 1000);
+
+ nvme_stop_queues(ctrl);
+ while (nvme_ctrl_pp_status(ctrl)) {
+ if (time_after(jiffies, fw_act_timeout)) {
+ dev_warn(ctrl->device,
+ "Fw activation timeout, reset controller\n");
+ nvme_reset_ctrl(ctrl);
+ break;
+ }
+ msleep(100);
+ }
+
+ if (ctrl->state != NVME_CTRL_LIVE)
+ return;
+
+ nvme_start_queues(ctrl);
+ /* read FW slot informationi to clear the AER*/
+ nvme_get_fw_slot_info(ctrl);
+}
+
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
union nvme_result *res)
{
@@ -2567,6 +2688,9 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
dev_info(ctrl->device, "rescanning\n");
nvme_queue_scan(ctrl);
break;
+ case NVME_AER_NOTICE_FW_ACT_STARTING:
+ schedule_work(&ctrl->fw_act_work);
+ break;
default:
dev_warn(ctrl->device, "async event result %08x\n", result);
}
@@ -2614,6 +2738,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work);
flush_work(&ctrl->scan_work);
+ cancel_work_sync(&ctrl->fw_act_work);
}
EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
@@ -2677,6 +2802,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->quirks = quirks;
INIT_WORK(&ctrl->scan_work, nvme_scan_work);
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
+ INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
ret = nvme_set_instance(ctrl);
if (ret)
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 5f5cd306f76d..47307752dc65 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -22,7 +22,7 @@
#include "fabrics.h"
static LIST_HEAD(nvmf_transports);
-static DEFINE_MUTEX(nvmf_transports_mutex);
+static DECLARE_RWSEM(nvmf_transports_rwsem);
static LIST_HEAD(nvmf_hosts);
static DEFINE_MUTEX(nvmf_hosts_mutex);
@@ -75,7 +75,7 @@ static struct nvmf_host *nvmf_host_default(void)
kref_init(&host->ref);
snprintf(host->nqn, NVMF_NQN_SIZE,
- "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id);
+ "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id);
mutex_lock(&nvmf_hosts_mutex);
list_add_tail(&host->list, &nvmf_hosts);
@@ -495,9 +495,9 @@ int nvmf_register_transport(struct nvmf_transport_ops *ops)
if (!ops->create_ctrl)
return -EINVAL;
- mutex_lock(&nvmf_transports_mutex);
+ down_write(&nvmf_transports_rwsem);
list_add_tail(&ops->entry, &nvmf_transports);
- mutex_unlock(&nvmf_transports_mutex);
+ up_write(&nvmf_transports_rwsem);
return 0;
}
@@ -514,9 +514,9 @@ EXPORT_SYMBOL_GPL(nvmf_register_transport);
*/
void nvmf_unregister_transport(struct nvmf_transport_ops *ops)
{
- mutex_lock(&nvmf_transports_mutex);
+ down_write(&nvmf_transports_rwsem);
list_del(&ops->entry);
- mutex_unlock(&nvmf_transports_mutex);
+ up_write(&nvmf_transports_rwsem);
}
EXPORT_SYMBOL_GPL(nvmf_unregister_transport);
@@ -525,7 +525,7 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
{
struct nvmf_transport_ops *ops;
- lockdep_assert_held(&nvmf_transports_mutex);
+ lockdep_assert_held(&nvmf_transports_rwsem);
list_for_each_entry(ops, &nvmf_transports, entry) {
if (strcmp(ops->name, opts->transport) == 0)
@@ -735,6 +735,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
goto out;
}
if (uuid_parse(p, &hostid)) {
+ pr_err("Invalid hostid %s\n", p);
ret = -EINVAL;
goto out;
}
@@ -850,7 +851,7 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
goto out_free_opts;
opts->mask &= ~NVMF_REQUIRED_OPTS;
- mutex_lock(&nvmf_transports_mutex);
+ down_read(&nvmf_transports_rwsem);
ops = nvmf_lookup_transport(opts);
if (!ops) {
pr_info("no handler found for transport %s.\n",
@@ -877,16 +878,16 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
dev_warn(ctrl->device,
"controller returned incorrect NQN: \"%s\".\n",
ctrl->subnqn);
- mutex_unlock(&nvmf_transports_mutex);
+ up_read(&nvmf_transports_rwsem);
ctrl->ops->delete_ctrl(ctrl);
return ERR_PTR(-EINVAL);
}
- mutex_unlock(&nvmf_transports_mutex);
+ up_read(&nvmf_transports_rwsem);
return ctrl;
out_unlock:
- mutex_unlock(&nvmf_transports_mutex);
+ up_read(&nvmf_transports_rwsem);
out_free_opts:
nvmf_free_options(opts);
return ERR_PTR(ret);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5c2a08ef08ba..d2e882c0f496 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -220,6 +220,90 @@ static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *);
static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
struct nvme_fc_queue *, unsigned int);
+static void
+nvme_fc_free_lport(struct kref *ref)
+{
+ struct nvme_fc_lport *lport =
+ container_of(ref, struct nvme_fc_lport, ref);
+ unsigned long flags;
+
+ WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED);
+ WARN_ON(!list_empty(&lport->endp_list));
+
+ /* remove from transport list */
+ spin_lock_irqsave(&nvme_fc_lock, flags);
+ list_del(&lport->port_list);
+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
+
+ /* let the LLDD know we've finished tearing it down */
+ lport->ops->localport_delete(&lport->localport);
+
+ ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
+ ida_destroy(&lport->endp_cnt);
+
+ put_device(lport->dev);
+
+ kfree(lport);
+}
+
+static void
+nvme_fc_lport_put(struct nvme_fc_lport *lport)
+{
+ kref_put(&lport->ref, nvme_fc_free_lport);
+}
+
+static int
+nvme_fc_lport_get(struct nvme_fc_lport *lport)
+{
+ return kref_get_unless_zero(&lport->ref);
+}
+
+
+static struct nvme_fc_lport *
+nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo)
+{
+ struct nvme_fc_lport *lport;
+ unsigned long flags;
+
+ spin_lock_irqsave(&nvme_fc_lock, flags);
+
+ list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
+ if (lport->localport.node_name != pinfo->node_name ||
+ lport->localport.port_name != pinfo->port_name)
+ continue;
+
+ if (lport->localport.port_state != FC_OBJSTATE_DELETED) {
+ lport = ERR_PTR(-EEXIST);
+ goto out_done;
+ }
+
+ if (!nvme_fc_lport_get(lport)) {
+ /*
+ * fails if ref cnt already 0. If so,
+ * act as if lport already deleted
+ */
+ lport = NULL;
+ goto out_done;
+ }
+
+ /* resume the lport */
+
+ lport->localport.port_role = pinfo->port_role;
+ lport->localport.port_id = pinfo->port_id;
+ lport->localport.port_state = FC_OBJSTATE_ONLINE;
+
+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
+
+ return lport;
+ }
+
+ lport = NULL;
+
+out_done:
+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
+
+ return lport;
+}
/**
* nvme_fc_register_localport - transport entry point called by an
@@ -257,6 +341,28 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
goto out_reghost_failed;
}
+ /*
+ * look to see if there is already a localport that had been
+ * deregistered and in the process of waiting for all the
+ * references to fully be removed. If the references haven't
+ * expired, we can simply re-enable the localport. Remoteports
+ * and controller reconnections should resume naturally.
+ */
+ newrec = nvme_fc_attach_to_unreg_lport(pinfo);
+
+ /* found an lport, but something about its state is bad */
+ if (IS_ERR(newrec)) {
+ ret = PTR_ERR(newrec);
+ goto out_reghost_failed;
+
+ /* found existing lport, which was resumed */
+ } else if (newrec) {
+ *portptr = &newrec->localport;
+ return 0;
+ }
+
+ /* nothing found - allocate a new localport struct */
+
newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz),
GFP_KERNEL);
if (!newrec) {
@@ -310,44 +416,6 @@ out_reghost_failed:
}
EXPORT_SYMBOL_GPL(nvme_fc_register_localport);
-static void
-nvme_fc_free_lport(struct kref *ref)
-{
- struct nvme_fc_lport *lport =
- container_of(ref, struct nvme_fc_lport, ref);
- unsigned long flags;
-
- WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED);
- WARN_ON(!list_empty(&lport->endp_list));
-
- /* remove from transport list */
- spin_lock_irqsave(&nvme_fc_lock, flags);
- list_del(&lport->port_list);
- spin_unlock_irqrestore(&nvme_fc_lock, flags);
-
- /* let the LLDD know we've finished tearing it down */
- lport->ops->localport_delete(&lport->localport);
-
- ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
- ida_destroy(&lport->endp_cnt);
-
- put_device(lport->dev);
-
- kfree(lport);
-}
-
-static void
-nvme_fc_lport_put(struct nvme_fc_lport *lport)
-{
- kref_put(&lport->ref, nvme_fc_free_lport);
-}
-
-static int
-nvme_fc_lport_get(struct nvme_fc_lport *lport)
-{
- return kref_get_unless_zero(&lport->ref);
-}
-
/**
* nvme_fc_unregister_localport - transport entry point called by an
* LLDD to deregister/remove a previously
@@ -2168,7 +2236,6 @@ static const struct blk_mq_ops nvme_fc_mq_ops = {
.complete = nvme_fc_complete_rq,
.init_request = nvme_fc_init_request,
.exit_request = nvme_fc_exit_request,
- .reinit_request = nvme_fc_reinit_request,
.init_hctx = nvme_fc_init_hctx,
.poll = nvme_fc_poll,
.timeout = nvme_fc_timeout,
@@ -2269,7 +2336,7 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
nvme_fc_init_io_queues(ctrl);
- ret = blk_mq_reinit_tagset(&ctrl->tag_set);
+ ret = blk_mq_reinit_tagset(&ctrl->tag_set, nvme_fc_reinit_request);
if (ret)
goto out_free_io_queues;
@@ -2655,7 +2722,6 @@ static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
.complete = nvme_fc_complete_rq,
.init_request = nvme_fc_init_request,
.exit_request = nvme_fc_exit_request,
- .reinit_request = nvme_fc_reinit_request,
.init_hctx = nvme_fc_init_admin_hctx,
.timeout = nvme_fc_timeout,
};
@@ -2733,6 +2799,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
if (ret)
goto out_free_queues;
+ ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
if (IS_ERR(ctrl->ctrl.admin_q)) {
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index be8541335e31..c1a28569e843 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -643,17 +643,9 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma);
}
- if (!disk)
- goto submit;
-
- bio->bi_bdev = bdget_disk(disk, 0);
- if (!bio->bi_bdev) {
- ret = -ENODEV;
- goto err_meta;
- }
+ bio->bi_disk = disk;
}
-submit:
blk_execute_rq(q, NULL, rq, 0);
if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
@@ -673,11 +665,8 @@ err_meta:
if (meta_buf && meta_len)
dma_pool_free(dev->dma_pool, metadata, metadata_dma);
err_map:
- if (bio) {
- if (disk && bio->bi_bdev)
- bdput(bio->bi_bdev);
+ if (bio)
blk_rq_unmap_user(bio);
- }
err_ppa:
if (ppa_buf && ppa_len)
dma_pool_free(dev->dma_pool, ppa_list, ppa_dma);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 8f2a168ddc01..a19a587d60ed 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -125,6 +125,7 @@ struct nvme_ctrl {
struct kref kref;
int instance;
struct blk_mq_tag_set *tagset;
+ struct blk_mq_tag_set *admin_tagset;
struct list_head namespaces;
struct mutex namespaces_mutex;
struct device *device; /* char device */
@@ -142,6 +143,7 @@ struct nvme_ctrl {
u16 cntlid;
u32 ctrl_config;
+ u16 mtfa;
u32 queue_count;
u64 cap;
@@ -160,6 +162,7 @@ struct nvme_ctrl {
u16 kas;
u8 npss;
u8 apsta;
+ unsigned int shutdown_timeout;
unsigned int kato;
bool subsystem;
unsigned long quirks;
@@ -167,6 +170,7 @@ struct nvme_ctrl {
struct work_struct scan_work;
struct work_struct async_event_work;
struct delayed_work ka_work;
+ struct work_struct fw_act_work;
/* Power saving configuration */
u64 ps_max_latency_us;
@@ -207,13 +211,9 @@ struct nvme_ns {
bool ext;
u8 pi_type;
unsigned long flags;
- u16 noiob;
-
#define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1
-
- u64 mode_select_num_blocks;
- u32 mode_select_block_len;
+ u16 noiob;
};
struct nvme_ctrl_ops {
@@ -314,13 +314,6 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
union nvme_result *result, void *buffer, unsigned bufflen,
unsigned timeout, int qid, int at_head, int flags);
-int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
- void __user *ubuffer, unsigned bufflen, u32 *result,
- unsigned timeout);
-int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
- void __user *ubuffer, unsigned bufflen,
- void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
- u32 *result, unsigned timeout);
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 925467b31a33..198245faba6b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -109,6 +109,7 @@ struct nvme_dev {
/* host memory buffer support: */
u64 host_mem_size;
u32 nr_host_mem_descs;
+ dma_addr_t host_mem_descs_dma;
struct nvme_host_mem_buf_desc *host_mem_descs;
void **host_mem_desc_bufs;
};
@@ -555,8 +556,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
int nprps, i;
length -= (page_size - offset);
- if (length <= 0)
+ if (length <= 0) {
+ iod->first_dma = 0;
return BLK_STS_OK;
+ }
dma_len -= (page_size - offset);
if (dma_len) {
@@ -666,7 +669,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1)
goto out_unmap;
- if (rq_data_dir(req))
+ if (req_op(req) == REQ_OP_WRITE)
nvme_dif_remap(req, nvme_dif_prep);
if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir))
@@ -694,7 +697,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
if (iod->nents) {
dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
if (blk_integrity_rq(req)) {
- if (!rq_data_dir(req))
+ if (req_op(req) == REQ_OP_READ)
nvme_dif_remap(req, nvme_dif_complete);
dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
}
@@ -1376,6 +1379,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
if (blk_mq_alloc_tag_set(&dev->admin_tagset))
return -ENOMEM;
+ dev->ctrl.admin_tagset = &dev->admin_tagset;
dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
if (IS_ERR(dev->ctrl.admin_q)) {
@@ -1565,16 +1569,10 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
{
- size_t len = dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs);
+ u64 dma_addr = dev->host_mem_descs_dma;
struct nvme_command c;
- u64 dma_addr;
int ret;
- dma_addr = dma_map_single(dev->dev, dev->host_mem_descs, len,
- DMA_TO_DEVICE);
- if (dma_mapping_error(dev->dev, dma_addr))
- return -ENOMEM;
-
memset(&c, 0, sizeof(c));
c.features.opcode = nvme_admin_set_features;
c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
@@ -1591,7 +1589,6 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
"failed to set host mem (err %d, flags %#x).\n",
ret, bits);
}
- dma_unmap_single(dev->dev, dma_addr, len, DMA_TO_DEVICE);
return ret;
}
@@ -1609,7 +1606,9 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
kfree(dev->host_mem_desc_bufs);
dev->host_mem_desc_bufs = NULL;
- kfree(dev->host_mem_descs);
+ dma_free_coherent(dev->dev,
+ dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs),
+ dev->host_mem_descs, dev->host_mem_descs_dma);
dev->host_mem_descs = NULL;
}
@@ -1617,6 +1616,7 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
{
struct nvme_host_mem_buf_desc *descs;
u32 chunk_size, max_entries, len;
+ dma_addr_t descs_dma;
int i = 0;
void **bufs;
u64 size = 0, tmp;
@@ -1627,7 +1627,8 @@ retry:
tmp = (preferred + chunk_size - 1);
do_div(tmp, chunk_size);
max_entries = tmp;
- descs = kcalloc(max_entries, sizeof(*descs), GFP_KERNEL);
+ descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs),
+ &descs_dma, GFP_KERNEL);
if (!descs)
goto out;
@@ -1661,6 +1662,7 @@ retry:
dev->nr_host_mem_descs = i;
dev->host_mem_size = size;
dev->host_mem_descs = descs;
+ dev->host_mem_descs_dma = descs_dma;
dev->host_mem_desc_bufs = bufs;
return 0;
@@ -1674,7 +1676,8 @@ out_free_bufs:
kfree(bufs);
out_free_descs:
- kfree(descs);
+ dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
+ descs_dma);
out:
/* try a smaller chunk size if we failed early */
if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index da04df1af231..58983000964b 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -19,6 +19,7 @@
#include <linux/string.h>
#include <linux/atomic.h>
#include <linux/blk-mq.h>
+#include <linux/blk-mq-rdma.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -36,8 +37,6 @@
#define NVME_RDMA_CONNECT_TIMEOUT_MS 3000 /* 3 second */
-#define NVME_RDMA_MAX_SEGMENT_SIZE 0xffffff /* 24-bit SGL field */
-
#define NVME_RDMA_MAX_SEGMENTS 256
#define NVME_RDMA_MAX_INLINE_SEGMENTS 1
@@ -151,6 +150,9 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static const struct blk_mq_ops nvme_rdma_mq_ops;
+static const struct blk_mq_ops nvme_rdma_admin_mq_ops;
+
/* XXX: really should move to a generic header sooner or later.. */
static inline void put_unaligned_le24(u32 val, u8 *p)
{
@@ -463,14 +465,10 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
ibdev = queue->device->dev;
/*
- * The admin queue is barely used once the controller is live, so don't
- * bother to spread it out.
+ * Spread I/O queues completion vectors according their queue index.
+ * Admin queues can always go on completion vector 0.
*/
- if (idx == 0)
- comp_vector = 0;
- else
- comp_vector = idx % ibdev->num_comp_vectors;
-
+ comp_vector = idx == 0 ? idx : idx - 1;
/* +1 for ib_stop_cq */
queue->ib_cq = ib_alloc_cq(ibdev, queue,
@@ -503,7 +501,7 @@ out_put_dev:
return ret;
}
-static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
+static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
int idx, size_t queue_size)
{
struct nvme_rdma_queue *queue;
@@ -561,60 +559,90 @@ out_destroy_cm_id:
static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
{
+ if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
+ return;
+
rdma_disconnect(queue->cm_id);
ib_drain_qp(queue->qp);
}
static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
{
+ if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags))
+ return;
+
nvme_rdma_destroy_queue_ib(queue);
rdma_destroy_id(queue->cm_id);
}
-static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue)
+static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
{
- if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags))
- return;
- nvme_rdma_stop_queue(queue);
- nvme_rdma_free_queue(queue);
+ int i;
+
+ for (i = 1; i < ctrl->ctrl.queue_count; i++)
+ nvme_rdma_free_queue(&ctrl->queues[i]);
}
-static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl)
{
int i;
for (i = 1; i < ctrl->ctrl.queue_count; i++)
- nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
+ nvme_rdma_stop_queue(&ctrl->queues[i]);
+}
+
+static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
+{
+ int ret;
+
+ if (idx)
+ ret = nvmf_connect_io_queue(&ctrl->ctrl, idx);
+ else
+ ret = nvmf_connect_admin_queue(&ctrl->ctrl);
+
+ if (!ret)
+ set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[idx].flags);
+ else
+ dev_info(ctrl->ctrl.device,
+ "failed to connect queue: %d ret=%d\n", idx, ret);
+ return ret;
}
-static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl)
+static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl)
{
int i, ret = 0;
for (i = 1; i < ctrl->ctrl.queue_count; i++) {
- ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
- if (ret) {
- dev_info(ctrl->ctrl.device,
- "failed to connect i/o queue: %d\n", ret);
- goto out_free_queues;
- }
- set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
+ ret = nvme_rdma_start_queue(ctrl, i);
+ if (ret)
+ goto out_stop_queues;
}
return 0;
-out_free_queues:
- nvme_rdma_free_io_queues(ctrl);
+out_stop_queues:
+ for (i--; i >= 1; i--)
+ nvme_rdma_stop_queue(&ctrl->queues[i]);
return ret;
}
-static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
+static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
{
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+ struct ib_device *ibdev = ctrl->device->dev;
unsigned int nr_io_queues;
int i, ret;
nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
+
+ /*
+ * we map queues according to the device irq vectors for
+ * optimal locality so we don't need more queues than
+ * completion vectors.
+ */
+ nr_io_queues = min_t(unsigned int, nr_io_queues,
+ ibdev->num_comp_vectors);
+
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
if (ret)
return ret;
@@ -627,32 +655,230 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
"creating %d I/O queues.\n", nr_io_queues);
for (i = 1; i < ctrl->ctrl.queue_count; i++) {
- ret = nvme_rdma_init_queue(ctrl, i,
- ctrl->ctrl.opts->queue_size);
- if (ret) {
- dev_info(ctrl->ctrl.device,
- "failed to initialize i/o queue: %d\n", ret);
+ ret = nvme_rdma_alloc_queue(ctrl, i,
+ ctrl->ctrl.sqsize + 1);
+ if (ret)
goto out_free_queues;
- }
}
return 0;
out_free_queues:
for (i--; i >= 1; i--)
- nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
+ nvme_rdma_free_queue(&ctrl->queues[i]);
return ret;
}
-static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, bool admin)
+{
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+ struct blk_mq_tag_set *set = admin ?
+ &ctrl->admin_tag_set : &ctrl->tag_set;
+
+ blk_mq_free_tag_set(set);
+ nvme_rdma_dev_put(ctrl->device);
+}
+
+static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
+ bool admin)
+{
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+ struct blk_mq_tag_set *set;
+ int ret;
+
+ if (admin) {
+ set = &ctrl->admin_tag_set;
+ memset(set, 0, sizeof(*set));
+ set->ops = &nvme_rdma_admin_mq_ops;
+ set->queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH;
+ set->reserved_tags = 2; /* connect + keep-alive */
+ set->numa_node = NUMA_NO_NODE;
+ set->cmd_size = sizeof(struct nvme_rdma_request) +
+ SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ set->driver_data = ctrl;
+ set->nr_hw_queues = 1;
+ set->timeout = ADMIN_TIMEOUT;
+ } else {
+ set = &ctrl->tag_set;
+ memset(set, 0, sizeof(*set));
+ set->ops = &nvme_rdma_mq_ops;
+ set->queue_depth = nctrl->opts->queue_size;
+ set->reserved_tags = 1; /* fabric connect */
+ set->numa_node = NUMA_NO_NODE;
+ set->flags = BLK_MQ_F_SHOULD_MERGE;
+ set->cmd_size = sizeof(struct nvme_rdma_request) +
+ SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ set->driver_data = ctrl;
+ set->nr_hw_queues = nctrl->queue_count - 1;
+ set->timeout = NVME_IO_TIMEOUT;
+ }
+
+ ret = blk_mq_alloc_tag_set(set);
+ if (ret)
+ goto out;
+
+ /*
+ * We need a reference on the device as long as the tag_set is alive,
+ * as the MRs in the request structures need a valid ib_device.
+ */
+ ret = nvme_rdma_dev_get(ctrl->device);
+ if (!ret) {
+ ret = -EINVAL;
+ goto out_free_tagset;
+ }
+
+ return set;
+
+out_free_tagset:
+ blk_mq_free_tag_set(set);
+out:
+ return ERR_PTR(ret);
+}
+
+static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
+ bool remove)
{
nvme_rdma_free_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe,
sizeof(struct nvme_command), DMA_TO_DEVICE);
- nvme_rdma_stop_and_free_queue(&ctrl->queues[0]);
- blk_cleanup_queue(ctrl->ctrl.admin_q);
- blk_mq_free_tag_set(&ctrl->admin_tag_set);
- nvme_rdma_dev_put(ctrl->device);
+ nvme_rdma_stop_queue(&ctrl->queues[0]);
+ if (remove) {
+ blk_cleanup_queue(ctrl->ctrl.admin_q);
+ nvme_rdma_free_tagset(&ctrl->ctrl, true);
+ }
+ nvme_rdma_free_queue(&ctrl->queues[0]);
+}
+
+static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
+ bool new)
+{
+ int error;
+
+ error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
+ if (error)
+ return error;
+
+ ctrl->device = ctrl->queues[0].device;
+
+ ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS,
+ ctrl->device->dev->attrs.max_fast_reg_page_list_len);
+
+ if (new) {
+ ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
+ if (IS_ERR(ctrl->ctrl.admin_tagset))
+ goto out_free_queue;
+
+ ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
+ if (IS_ERR(ctrl->ctrl.admin_q)) {
+ error = PTR_ERR(ctrl->ctrl.admin_q);
+ goto out_free_tagset;
+ }
+ } else {
+ error = blk_mq_reinit_tagset(&ctrl->admin_tag_set,
+ nvme_rdma_reinit_request);
+ if (error)
+ goto out_free_queue;
+ }
+
+ error = nvme_rdma_start_queue(ctrl, 0);
+ if (error)
+ goto out_cleanup_queue;
+
+ error = ctrl->ctrl.ops->reg_read64(&ctrl->ctrl, NVME_REG_CAP,
+ &ctrl->ctrl.cap);
+ if (error) {
+ dev_err(ctrl->ctrl.device,
+ "prop_get NVME_REG_CAP failed\n");
+ goto out_cleanup_queue;
+ }
+
+ ctrl->ctrl.sqsize =
+ min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
+
+ error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
+ if (error)
+ goto out_cleanup_queue;
+
+ ctrl->ctrl.max_hw_sectors =
+ (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9);
+
+ error = nvme_init_identify(&ctrl->ctrl);
+ if (error)
+ goto out_cleanup_queue;
+
+ error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev,
+ &ctrl->async_event_sqe, sizeof(struct nvme_command),
+ DMA_TO_DEVICE);
+ if (error)
+ goto out_cleanup_queue;
+
+ return 0;
+
+out_cleanup_queue:
+ if (new)
+ blk_cleanup_queue(ctrl->ctrl.admin_q);
+out_free_tagset:
+ if (new)
+ nvme_rdma_free_tagset(&ctrl->ctrl, true);
+out_free_queue:
+ nvme_rdma_free_queue(&ctrl->queues[0]);
+ return error;
+}
+
+static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
+ bool remove)
+{
+ nvme_rdma_stop_io_queues(ctrl);
+ if (remove) {
+ blk_cleanup_queue(ctrl->ctrl.connect_q);
+ nvme_rdma_free_tagset(&ctrl->ctrl, false);
+ }
+ nvme_rdma_free_io_queues(ctrl);
+}
+
+static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
+{
+ int ret;
+
+ ret = nvme_rdma_alloc_io_queues(ctrl);
+ if (ret)
+ return ret;
+
+ if (new) {
+ ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false);
+ if (IS_ERR(ctrl->ctrl.tagset))
+ goto out_free_io_queues;
+
+ ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
+ if (IS_ERR(ctrl->ctrl.connect_q)) {
+ ret = PTR_ERR(ctrl->ctrl.connect_q);
+ goto out_free_tag_set;
+ }
+ } else {
+ ret = blk_mq_reinit_tagset(&ctrl->tag_set,
+ nvme_rdma_reinit_request);
+ if (ret)
+ goto out_free_io_queues;
+
+ blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+ ctrl->ctrl.queue_count - 1);
+ }
+
+ ret = nvme_rdma_start_io_queues(ctrl);
+ if (ret)
+ goto out_cleanup_connect_q;
+
+ return 0;
+
+out_cleanup_connect_q:
+ if (new)
+ blk_cleanup_queue(ctrl->ctrl.connect_q);
+out_free_tag_set:
+ if (new)
+ nvme_rdma_free_tagset(&ctrl->ctrl, false);
+out_free_io_queues:
+ nvme_rdma_free_io_queues(ctrl);
+ return ret;
}
static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
@@ -701,45 +927,18 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
++ctrl->ctrl.nr_reconnects;
- if (ctrl->ctrl.queue_count > 1) {
- nvme_rdma_free_io_queues(ctrl);
-
- ret = blk_mq_reinit_tagset(&ctrl->tag_set);
- if (ret)
- goto requeue;
- }
-
- nvme_rdma_stop_and_free_queue(&ctrl->queues[0]);
-
- ret = blk_mq_reinit_tagset(&ctrl->admin_tag_set);
- if (ret)
- goto requeue;
-
- ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
- if (ret)
- goto requeue;
-
- ret = nvmf_connect_admin_queue(&ctrl->ctrl);
- if (ret)
- goto requeue;
-
- set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
+ if (ctrl->ctrl.queue_count > 1)
+ nvme_rdma_destroy_io_queues(ctrl, false);
- ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
+ nvme_rdma_destroy_admin_queue(ctrl, false);
+ ret = nvme_rdma_configure_admin_queue(ctrl, false);
if (ret)
goto requeue;
if (ctrl->ctrl.queue_count > 1) {
- ret = nvme_rdma_init_io_queues(ctrl);
- if (ret)
- goto requeue;
-
- ret = nvme_rdma_connect_io_queues(ctrl);
+ ret = nvme_rdma_configure_io_queues(ctrl, false);
if (ret)
goto requeue;
-
- blk_mq_update_nr_hw_queues(&ctrl->tag_set,
- ctrl->ctrl.queue_count - 1);
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -762,16 +961,15 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, err_work);
- int i;
nvme_stop_ctrl(&ctrl->ctrl);
- for (i = 0; i < ctrl->ctrl.queue_count; i++)
- clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
-
- if (ctrl->ctrl.queue_count > 1)
+ if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
+ nvme_rdma_stop_io_queues(ctrl);
+ }
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+ nvme_rdma_stop_queue(&ctrl->queues[0]);
/* We must take care of fastfail/requeue all our inflight requests */
if (ctrl->ctrl.queue_count > 1)
@@ -856,7 +1054,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
if (req->mr->need_inval) {
res = nvme_rdma_inv_rkey(queue, req);
- if (res < 0) {
+ if (unlikely(res < 0)) {
dev_err(ctrl->ctrl.device,
"Queueing INV WR for rkey %#x failed (%d)\n",
req->mr->rkey, res);
@@ -920,8 +1118,12 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
int nr;
- nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, PAGE_SIZE);
- if (nr < count) {
+ /*
+ * Align the MR to a 4K page size to match the ctrl page size and
+ * the block virtual boundary.
+ */
+ nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
+ if (unlikely(nr < count)) {
if (nr < 0)
return nr;
return -EINVAL;
@@ -1057,7 +1259,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
first = &wr;
ret = ib_post_send(queue->qp, first, &bad_wr);
- if (ret) {
+ if (unlikely(ret)) {
dev_err(queue->ctrl->ctrl.device,
"%s failed with error code %d\n", __func__, ret);
}
@@ -1083,7 +1285,7 @@ static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue,
wr.num_sge = 1;
ret = ib_post_recv(queue->qp, &wr, &bad_wr);
- if (ret) {
+ if (unlikely(ret)) {
dev_err(queue->ctrl->ctrl.device,
"%s failed with error code %d\n", __func__, ret);
}
@@ -1443,7 +1645,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq);
err = nvme_rdma_map_data(queue, rq, c);
- if (err < 0) {
+ if (unlikely(err < 0)) {
dev_err(queue->ctrl->ctrl.device,
"Failed to map data (%d)\n", err);
nvme_cleanup_cmd(rq);
@@ -1457,7 +1659,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
flush = true;
err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
- if (err) {
+ if (unlikely(err)) {
nvme_rdma_unmap_data(queue, rq);
goto err;
}
@@ -1498,15 +1700,22 @@ static void nvme_rdma_complete_rq(struct request *rq)
nvme_complete_rq(rq);
}
+static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
+{
+ struct nvme_rdma_ctrl *ctrl = set->driver_data;
+
+ return blk_mq_rdma_map_queues(set, ctrl->device->dev, 0);
+}
+
static const struct blk_mq_ops nvme_rdma_mq_ops = {
.queue_rq = nvme_rdma_queue_rq,
.complete = nvme_rdma_complete_rq,
.init_request = nvme_rdma_init_request,
.exit_request = nvme_rdma_exit_request,
- .reinit_request = nvme_rdma_reinit_request,
.init_hctx = nvme_rdma_init_hctx,
.poll = nvme_rdma_poll,
.timeout = nvme_rdma_timeout,
+ .map_queues = nvme_rdma_map_queues,
};
static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
@@ -1514,103 +1723,11 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
.complete = nvme_rdma_complete_rq,
.init_request = nvme_rdma_init_request,
.exit_request = nvme_rdma_exit_request,
- .reinit_request = nvme_rdma_reinit_request,
.init_hctx = nvme_rdma_init_admin_hctx,
.timeout = nvme_rdma_timeout,
};
-static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
-{
- int error;
-
- error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
- if (error)
- return error;
-
- ctrl->device = ctrl->queues[0].device;
-
- /*
- * We need a reference on the device as long as the tag_set is alive,
- * as the MRs in the request structures need a valid ib_device.
- */
- error = -EINVAL;
- if (!nvme_rdma_dev_get(ctrl->device))
- goto out_free_queue;
-
- ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS,
- ctrl->device->dev->attrs.max_fast_reg_page_list_len);
-
- memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
- ctrl->admin_tag_set.ops = &nvme_rdma_admin_mq_ops;
- ctrl->admin_tag_set.queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH;
- ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
- ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
- ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_rdma_request) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
- ctrl->admin_tag_set.driver_data = ctrl;
- ctrl->admin_tag_set.nr_hw_queues = 1;
- ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
-
- error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
- if (error)
- goto out_put_dev;
-
- ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
- if (IS_ERR(ctrl->ctrl.admin_q)) {
- error = PTR_ERR(ctrl->ctrl.admin_q);
- goto out_free_tagset;
- }
-
- error = nvmf_connect_admin_queue(&ctrl->ctrl);
- if (error)
- goto out_cleanup_queue;
-
- set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
-
- error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP,
- &ctrl->ctrl.cap);
- if (error) {
- dev_err(ctrl->ctrl.device,
- "prop_get NVME_REG_CAP failed\n");
- goto out_cleanup_queue;
- }
-
- ctrl->ctrl.sqsize =
- min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
-
- error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
- if (error)
- goto out_cleanup_queue;
-
- ctrl->ctrl.max_hw_sectors =
- (ctrl->max_fr_pages - 1) << (PAGE_SHIFT - 9);
-
- error = nvme_init_identify(&ctrl->ctrl);
- if (error)
- goto out_cleanup_queue;
-
- error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev,
- &ctrl->async_event_sqe, sizeof(struct nvme_command),
- DMA_TO_DEVICE);
- if (error)
- goto out_cleanup_queue;
-
- return 0;
-
-out_cleanup_queue:
- blk_cleanup_queue(ctrl->ctrl.admin_q);
-out_free_tagset:
- /* disconnect and drain the queue before freeing the tagset */
- nvme_rdma_stop_queue(&ctrl->queues[0]);
- blk_mq_free_tag_set(&ctrl->admin_tag_set);
-out_put_dev:
- nvme_rdma_dev_put(ctrl->device);
-out_free_queue:
- nvme_rdma_free_queue(&ctrl->queues[0]);
- return error;
-}
-
-static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
{
cancel_work_sync(&ctrl->err_work);
cancel_delayed_work_sync(&ctrl->reconnect_work);
@@ -1619,33 +1736,26 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
nvme_stop_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_cancel_request, &ctrl->ctrl);
- nvme_rdma_free_io_queues(ctrl);
+ nvme_rdma_destroy_io_queues(ctrl, shutdown);
}
- if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
+ if (shutdown)
nvme_shutdown_ctrl(&ctrl->ctrl);
+ else
+ nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_cancel_request, &ctrl->ctrl);
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
- nvme_rdma_destroy_admin_queue(ctrl);
+ nvme_rdma_destroy_admin_queue(ctrl, shutdown);
}
-static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
+static void nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl)
{
- nvme_stop_ctrl(&ctrl->ctrl);
nvme_remove_namespaces(&ctrl->ctrl);
- if (shutdown)
- nvme_rdma_shutdown_ctrl(ctrl);
-
+ nvme_rdma_shutdown_ctrl(ctrl, true);
nvme_uninit_ctrl(&ctrl->ctrl);
- if (ctrl->ctrl.tagset) {
- blk_cleanup_queue(ctrl->ctrl.connect_q);
- blk_mq_free_tag_set(&ctrl->tag_set);
- nvme_rdma_dev_put(ctrl->device);
- }
-
nvme_put_ctrl(&ctrl->ctrl);
}
@@ -1654,7 +1764,8 @@ static void nvme_rdma_del_ctrl_work(struct work_struct *work)
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, delete_work);
- __nvme_rdma_remove_ctrl(ctrl, true);
+ nvme_stop_ctrl(&ctrl->ctrl);
+ nvme_rdma_remove_ctrl(ctrl);
}
static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
@@ -1686,14 +1797,6 @@ static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl)
return ret;
}
-static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
-{
- struct nvme_rdma_ctrl *ctrl = container_of(work,
- struct nvme_rdma_ctrl, delete_work);
-
- __nvme_rdma_remove_ctrl(ctrl, false);
-}
-
static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl =
@@ -1702,30 +1805,16 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
bool changed;
nvme_stop_ctrl(&ctrl->ctrl);
- nvme_rdma_shutdown_ctrl(ctrl);
+ nvme_rdma_shutdown_ctrl(ctrl, false);
- ret = nvme_rdma_configure_admin_queue(ctrl);
- if (ret) {
- /* ctrl is already shutdown, just remove the ctrl */
- INIT_WORK(&ctrl->delete_work, nvme_rdma_remove_ctrl_work);
- goto del_dead_ctrl;
- }
+ ret = nvme_rdma_configure_admin_queue(ctrl, false);
+ if (ret)
+ goto out_fail;
if (ctrl->ctrl.queue_count > 1) {
- ret = blk_mq_reinit_tagset(&ctrl->tag_set);
- if (ret)
- goto del_dead_ctrl;
-
- ret = nvme_rdma_init_io_queues(ctrl);
- if (ret)
- goto del_dead_ctrl;
-
- ret = nvme_rdma_connect_io_queues(ctrl);
+ ret = nvme_rdma_configure_io_queues(ctrl, false);
if (ret)
- goto del_dead_ctrl;
-
- blk_mq_update_nr_hw_queues(&ctrl->tag_set,
- ctrl->ctrl.queue_count - 1);
+ goto out_fail;
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -1735,10 +1824,9 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
return;
-del_dead_ctrl:
- /* Deleting this dead controller... */
+out_fail:
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
- WARN_ON(!queue_work(nvme_wq, &ctrl->delete_work));
+ nvme_rdma_remove_ctrl(ctrl);
}
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@@ -1754,62 +1842,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.get_address = nvmf_get_address,
};
-static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl)
-{
- int ret;
-
- ret = nvme_rdma_init_io_queues(ctrl);
- if (ret)
- return ret;
-
- /*
- * We need a reference on the device as long as the tag_set is alive,
- * as the MRs in the request structures need a valid ib_device.
- */
- ret = -EINVAL;
- if (!nvme_rdma_dev_get(ctrl->device))
- goto out_free_io_queues;
-
- memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
- ctrl->tag_set.ops = &nvme_rdma_mq_ops;
- ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
- ctrl->tag_set.reserved_tags = 1; /* fabric connect */
- ctrl->tag_set.numa_node = NUMA_NO_NODE;
- ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
- ctrl->tag_set.cmd_size = sizeof(struct nvme_rdma_request) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
- ctrl->tag_set.driver_data = ctrl;
- ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
- ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
-
- ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
- if (ret)
- goto out_put_dev;
- ctrl->ctrl.tagset = &ctrl->tag_set;
-
- ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
- if (IS_ERR(ctrl->ctrl.connect_q)) {
- ret = PTR_ERR(ctrl->ctrl.connect_q);
- goto out_free_tag_set;
- }
-
- ret = nvme_rdma_connect_io_queues(ctrl);
- if (ret)
- goto out_cleanup_connect_q;
-
- return 0;
-
-out_cleanup_connect_q:
- blk_cleanup_queue(ctrl->ctrl.connect_q);
-out_free_tag_set:
- blk_mq_free_tag_set(&ctrl->tag_set);
-out_put_dev:
- nvme_rdma_dev_put(ctrl->device);
-out_free_io_queues:
- nvme_rdma_free_io_queues(ctrl);
- return ret;
-}
-
static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
@@ -1867,7 +1899,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
if (!ctrl->queues)
goto out_uninit_ctrl;
- ret = nvme_rdma_configure_admin_queue(ctrl);
+ ret = nvme_rdma_configure_admin_queue(ctrl, true);
if (ret)
goto out_kfree_queues;
@@ -1902,7 +1934,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
}
if (opts->nr_io_queues) {
- ret = nvme_rdma_create_io_queues(ctrl);
+ ret = nvme_rdma_configure_io_queues(ctrl, true);
if (ret)
goto out_remove_admin_queue;
}
@@ -1924,7 +1956,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
return &ctrl->ctrl;
out_remove_admin_queue:
- nvme_rdma_destroy_admin_queue(ctrl);
+ nvme_rdma_destroy_admin_queue(ctrl, true);
out_kfree_queues:
kfree(ctrl->queues);
out_uninit_ctrl:
@@ -1946,10 +1978,6 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
.create_ctrl = nvme_rdma_create_ctrl,
};
-static void nvme_rdma_add_one(struct ib_device *ib_device)
-{
-}
-
static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
{
struct nvme_rdma_ctrl *ctrl;
@@ -1971,7 +1999,6 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
static struct ib_client nvme_rdma_ib_client = {
.name = "nvme_rdma",
- .add = nvme_rdma_add_one,
.remove = nvme_rdma_remove_one
};
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index a53bb6635b83..c4a0bf36e752 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -100,7 +100,7 @@ static u16 nvmet_get_smart_log(struct nvmet_req *req,
u16 status;
WARN_ON(req == NULL || slog == NULL);
- if (req->cmd->get_log_page.nsid == cpu_to_le32(0xFFFFFFFF))
+ if (req->cmd->get_log_page.nsid == cpu_to_le32(NVME_NSID_ALL))
status = nvmet_get_smart_log_all(req, slog);
else
status = nvmet_get_smart_log_nsid(req, slog);
@@ -168,15 +168,6 @@ out:
nvmet_req_complete(req, status);
}
-static void copy_and_pad(char *dst, int dst_len, const char *src, int src_len)
-{
- int len = min(src_len, dst_len);
-
- memcpy(dst, src, len);
- if (dst_len > len)
- memset(dst + len, ' ', dst_len - len);
-}
-
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -196,8 +187,9 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
bin2hex(id->sn, &ctrl->subsys->serial,
min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
- copy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1);
- copy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE));
+ memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' ');
+ memcpy_and_pad(id->fr, sizeof(id->fr),
+ UTS_RELEASE, strlen(UTS_RELEASE), ' ');
id->rab = 6;
@@ -451,7 +443,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
u32 val32;
u16 status = 0;
- switch (cdw10 & 0xf) {
+ switch (cdw10 & 0xff) {
case NVME_FEAT_NUM_QUEUES:
nvmet_set_result(req,
(subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16));
@@ -461,6 +453,9 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
nvmet_set_result(req, req->sq->ctrl->kato);
break;
+ case NVME_FEAT_HOST_ID:
+ status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+ break;
default:
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
@@ -475,7 +470,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
u16 status = 0;
- switch (cdw10 & 0xf) {
+ switch (cdw10 & 0xff) {
/*
* These features are mandatory in the spec, but we don't
* have a useful way to implement them. We'll eventually
@@ -509,6 +504,16 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
case NVME_FEAT_KATO:
nvmet_set_result(req, req->sq->ctrl->kato * 1000);
break;
+ case NVME_FEAT_HOST_ID:
+ /* need 128-bit host identifier flag */
+ if (!(req->cmd->common.cdw10[1] & cpu_to_le32(1 << 0))) {
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ break;
+ }
+
+ status = nvmet_copy_to_sgl(req, 0, &req->sq->ctrl->hostid,
+ sizeof(req->sq->ctrl->hostid));
+ break;
default:
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 0a0067e771f5..b6aeb1d70951 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -444,7 +444,7 @@ static struct config_group *nvmet_ns_make(struct config_group *group,
goto out;
ret = -EINVAL;
- if (nsid == 0 || nsid == 0xffffffff)
+ if (nsid == 0 || nsid == NVME_NSID_ALL)
goto out;
ret = -ENOMEM;
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index f4b02bb4a1a8..7c23eaf8e563 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -538,37 +538,37 @@ EXPORT_SYMBOL_GPL(nvmet_req_uninit);
static inline bool nvmet_cc_en(u32 cc)
{
- return cc & 0x1;
+ return (cc >> NVME_CC_EN_SHIFT) & 0x1;
}
static inline u8 nvmet_cc_css(u32 cc)
{
- return (cc >> 4) & 0x7;
+ return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
}
static inline u8 nvmet_cc_mps(u32 cc)
{
- return (cc >> 7) & 0xf;
+ return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
}
static inline u8 nvmet_cc_ams(u32 cc)
{
- return (cc >> 11) & 0x7;
+ return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
}
static inline u8 nvmet_cc_shn(u32 cc)
{
- return (cc >> 14) & 0x3;
+ return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
}
static inline u8 nvmet_cc_iosqes(u32 cc)
{
- return (cc >> 16) & 0xf;
+ return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
}
static inline u8 nvmet_cc_iocqes(u32 cc)
{
- return (cc >> 20) & 0xf;
+ return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
}
static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
@@ -749,6 +749,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
hostnqn, subsysnqn);
req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
up_read(&nvmet_config_sem);
+ status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
goto out_put_subsystem;
}
up_read(&nvmet_config_sem);
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 3cc17269504b..859a66725291 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -154,6 +154,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
le32_to_cpu(c->kato), &ctrl);
if (status)
goto out;
+ uuid_copy(&ctrl->hostid, &d->hostid);
status = nvmet_install_queue(ctrl, req);
if (status) {
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 309c84aa7595..421e43bf1dd7 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -58,7 +58,8 @@ struct nvmet_fc_ls_iod {
struct work_struct work;
} __aligned(sizeof(unsigned long long));
-#define NVMET_FC_MAX_KB_PER_XFR 256
+#define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024)
+#define NVMET_FC_MAX_XFR_SGENTS (NVMET_FC_MAX_SEQ_LENGTH / PAGE_SIZE)
enum nvmet_fcp_datadir {
NVMET_FCP_NODATA,
@@ -74,9 +75,7 @@ struct nvmet_fc_fcp_iod {
struct nvme_fc_ersp_iu rspiubuf;
dma_addr_t rspdma;
struct scatterlist *data_sg;
- struct scatterlist *next_sg;
int data_sg_cnt;
- u32 next_sg_offset;
u32 total_length;
u32 offset;
enum nvmet_fcp_datadir io_dir;
@@ -112,6 +111,7 @@ struct nvmet_fc_tgtport {
struct ida assoc_cnt;
struct nvmet_port *port;
struct kref ref;
+ u32 max_sg_cnt;
};
struct nvmet_fc_defer_fcp_req {
@@ -994,6 +994,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
INIT_LIST_HEAD(&newrec->assoc_list);
kref_init(&newrec->ref);
ida_init(&newrec->assoc_cnt);
+ newrec->max_sg_cnt = min_t(u32, NVMET_FC_MAX_XFR_SGENTS,
+ template->max_sgl_segments);
ret = nvmet_fc_alloc_ls_iodlist(newrec);
if (ret) {
@@ -1866,51 +1868,23 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_fcp_iod *fod, u8 op)
{
struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
- struct scatterlist *sg, *datasg;
unsigned long flags;
- u32 tlen, sg_off;
+ u32 tlen;
int ret;
fcpreq->op = op;
fcpreq->offset = fod->offset;
fcpreq->timeout = NVME_FC_TGTOP_TIMEOUT_SEC;
- tlen = min_t(u32, (NVMET_FC_MAX_KB_PER_XFR * 1024),
+
+ tlen = min_t(u32, tgtport->max_sg_cnt * PAGE_SIZE,
(fod->total_length - fod->offset));
- tlen = min_t(u32, tlen, NVME_FC_MAX_SEGMENTS * PAGE_SIZE);
- tlen = min_t(u32, tlen, fod->tgtport->ops->max_sgl_segments
- * PAGE_SIZE);
fcpreq->transfer_length = tlen;
fcpreq->transferred_length = 0;
fcpreq->fcp_error = 0;
fcpreq->rsplen = 0;
- fcpreq->sg_cnt = 0;
-
- datasg = fod->next_sg;
- sg_off = fod->next_sg_offset;
-
- for (sg = fcpreq->sg ; tlen; sg++) {
- *sg = *datasg;
- if (sg_off) {
- sg->offset += sg_off;
- sg->length -= sg_off;
- sg->dma_address += sg_off;
- sg_off = 0;
- }
- if (tlen < sg->length) {
- sg->length = tlen;
- fod->next_sg = datasg;
- fod->next_sg_offset += tlen;
- } else if (tlen == sg->length) {
- fod->next_sg_offset = 0;
- fod->next_sg = sg_next(datasg);
- } else {
- fod->next_sg_offset = 0;
- datasg = sg_next(datasg);
- }
- tlen -= sg->length;
- fcpreq->sg_cnt++;
- }
+ fcpreq->sg = &fod->data_sg[fod->offset / PAGE_SIZE];
+ fcpreq->sg_cnt = DIV_ROUND_UP(tlen, PAGE_SIZE);
/*
* If the last READDATA request: check if LLDD supports
@@ -2225,8 +2199,6 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
fod->req.sg = fod->data_sg;
fod->req.sg_cnt = fod->data_sg_cnt;
fod->offset = 0;
- fod->next_sg = fod->data_sg;
- fod->next_sg_offset = 0;
if (fod->io_dir == NVMET_FCP_WRITE) {
/* pull the data over before invoking nvmet layer */
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 1bb9d5b311b1..1cb9847ec261 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -193,9 +193,6 @@ out_free_options:
#define TGTPORT_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN)
-#define ALL_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN | NVMF_OPT_ROLES | \
- NVMF_OPT_FCADDR | NVMF_OPT_LPWWNN | NVMF_OPT_LPWWPN)
-
static DEFINE_SPINLOCK(fcloop_lock);
static LIST_HEAD(fcloop_lports);
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index 3b4d47a6abdb..0d4c23dc4532 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -68,7 +68,7 @@ static void nvmet_execute_rw(struct nvmet_req *req)
nvmet_inline_bio_init(req);
bio = &req->inline_bio;
- bio->bi_bdev = req->ns->bdev;
+ bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
@@ -80,7 +80,7 @@ static void nvmet_execute_rw(struct nvmet_req *req)
struct bio *prev = bio;
bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
- bio->bi_bdev = req->ns->bdev;
+ bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
bio_set_op_attrs(bio, op, op_flags);
@@ -104,7 +104,7 @@ static void nvmet_execute_flush(struct nvmet_req *req)
nvmet_inline_bio_init(req);
bio = &req->inline_bio;
- bio->bi_bdev = req->ns->bdev;
+ bio_set_dev(bio, req->ns->bdev);
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 717ed7ddb2f6..92628c432926 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -375,6 +375,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
if (error)
goto out_free_sq;
+ ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
if (IS_ERR(ctrl->ctrl.admin_q)) {
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index e3b244c7e443..7d261ab894f4 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -115,6 +115,7 @@ struct nvmet_ctrl {
u32 cc;
u32 csts;
+ uuid_t hostid;
u16 cntlid;
u32 kato;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 56a4cba690b5..76d2bb793afe 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1510,10 +1510,6 @@ static struct nvmet_fabrics_ops nvmet_rdma_ops = {
.delete_ctrl = nvmet_rdma_delete_ctrl,
};
-static void nvmet_rdma_add_one(struct ib_device *ib_device)
-{
-}
-
static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
{
struct nvmet_rdma_queue *queue;
@@ -1534,7 +1530,6 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data
static struct ib_client nvmet_rdma_ib_client = {
.name = "nvmet_rdma",
- .add = nvmet_rdma_add_one,
.remove = nvmet_rdma_remove_one
};