summaryrefslogblamecommitdiffstats
path: root/hw/nvme/nvme.h
blob: 0711b9748c28a67d5551508f54223f7ab8a6f5f9 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
















                                                       

                      
 
                      
                       


                           
 
                                
                                
                                                         
 

                                                                 


                                           






                                             


                                                        
                                       


                              
                    
                            
                        









                                                         
                                                                     







                                                               



                                                    



























                                                                  
                   
                           




                  
                 











                               



                     






                              
                      
                       
                           
                      
                       
                       



                          
                     
 





                         






















                                           








                                                   

                                                              
                              



                                                              
                             

 




                                                                




                                                        








































































                                                                               
                                                   


                                         
 
                               
                                       


                         













                           




                                      

                                
                                
                                   
                                   
                                    
                                
                                
                                 
                               


                                   








                                  












                                                                           
                                                                            
                                                                         
                                                                           
                                                                         









                                                                      
                                                                  
                                                                       
                                                              
                                                                 
                                                               


                                                                        



                                                                  







                           

                        

                        

                                            












                                   

                        
                       

                                        

             



                                                











                                                   
                                   
                        
                           



                                 
             
 

                            
                      

                       
                        
                     
 
                       
                           
                          



                             
                           
                                 
                           
                           

                                                                                

                             
                                       

                                


                              
 
            






                          



                               
 




                                            

                      



                                                                    

                            
                              
                                                         




                             





                                     


                                             
               

                                 
                                  



                                                                         

           




                              
                                                                
 
                                              


                    
                               

 













                                                   








                                                 











                                                                     














                                                                      
                                                    
                                                               
                                                                 
                                                                



                                                                  
 
                           
/*
 * QEMU NVM Express
 *
 * Copyright (c) 2012 Intel Corporation
 * Copyright (c) 2021 Minwoo Im
 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
 *
 * Authors:
 *   Keith Busch            <kbusch@kernel.org>
 *   Klaus Jensen           <k.jensen@samsung.com>
 *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
 *   Dmitry Fomichev        <dmitry.fomichev@wdc.com>
 *   Minwoo Im              <minwoo.im.dev@gmail.com>
 *
 * This code is licensed under the GNU GPL v2 or later.
 */

#ifndef HW_NVME_NVME_H
#define HW_NVME_NVME_H

#include "qemu/uuid.h"
#include "hw/pci/pci.h"
#include "hw/block/block.h"

#include "block/nvme.h"

#define NVME_MAX_CONTROLLERS 256
#define NVME_MAX_NAMESPACES  256
#define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)

QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);

typedef struct NvmeCtrl NvmeCtrl;
typedef struct NvmeNamespace NvmeNamespace;

#define TYPE_NVME_BUS "nvme-bus"
OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)

typedef struct NvmeBus {
    BusState parent_bus;
} NvmeBus;

#define TYPE_NVME_SUBSYS "nvme-subsys"
#define NVME_SUBSYS(obj) \
    OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
#define SUBSYS_SLOT_RSVD (void *)0xFFFF

typedef struct NvmeSubsystem {
    DeviceState parent_obj;
    NvmeBus     bus;
    uint8_t     subnqn[256];
    char        *serial;

    NvmeCtrl      *ctrls[NVME_MAX_CONTROLLERS];
    NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];

    struct {
        char *nqn;
    } params;
} NvmeSubsystem;

int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);

static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
                                         uint32_t cntlid)
{
    if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
        return NULL;
    }

    if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) {
        return NULL;
    }

    return subsys->ctrls[cntlid];
}

static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
                                            uint32_t nsid)
{
    if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
        return NULL;
    }

    return subsys->namespaces[nsid];
}

#define TYPE_NVME_NS "nvme-ns"
#define NVME_NS(obj) \
    OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)

typedef struct NvmeZone {
    NvmeZoneDescr   d;
    uint64_t        w_ptr;
    QTAILQ_ENTRY(NvmeZone) entry;
} NvmeZone;

typedef struct NvmeNamespaceParams {
    bool     detached;
    bool     shared;
    uint32_t nsid;
    QemuUUID uuid;
    uint64_t eui64;
    bool     eui64_default;

    uint16_t ms;
    uint8_t  mset;
    uint8_t  pi;
    uint8_t  pil;
    uint8_t  pif;

    uint16_t mssrl;
    uint32_t mcl;
    uint8_t  msrc;

    bool     zoned;
    bool     cross_zone_read;
    uint64_t zone_size_bs;
    uint64_t zone_cap_bs;
    uint32_t max_active_zones;
    uint32_t max_open_zones;
    uint32_t zd_extension_size;

    uint32_t numzrwa;
    uint64_t zrwas;
    uint64_t zrwafg;
} NvmeNamespaceParams;

typedef struct NvmeNamespace {
    DeviceState  parent_obj;
    BlockConf    blkconf;
    int32_t      bootindex;
    int64_t      size;
    int64_t      moff;
    NvmeIdNs     id_ns;
    NvmeIdNsNvm  id_ns_nvm;
    NvmeLBAF     lbaf;
    unsigned int nlbaf;
    size_t       lbasz;
    const uint32_t *iocs;
    uint8_t      csi;
    uint16_t     status;
    int          attached;
    uint8_t      pif;

    struct {
        uint16_t zrwas;
        uint16_t zrwafg;
        uint32_t numzrwa;
    } zns;

    QTAILQ_ENTRY(NvmeNamespace) entry;

    NvmeIdNsZoned   *id_ns_zoned;
    NvmeZone        *zone_array;
    QTAILQ_HEAD(, NvmeZone) exp_open_zones;
    QTAILQ_HEAD(, NvmeZone) imp_open_zones;
    QTAILQ_HEAD(, NvmeZone) closed_zones;
    QTAILQ_HEAD(, NvmeZone) full_zones;
    uint32_t        num_zones;
    uint64_t        zone_size;
    uint64_t        zone_capacity;
    uint32_t        zone_size_log2;
    uint8_t         *zd_extensions;
    int32_t         nr_open_zones;
    int32_t         nr_active_zones;

    NvmeNamespaceParams params;

    struct {
        uint32_t err_rec;
    } features;
} NvmeNamespace;

static inline uint32_t nvme_nsid(NvmeNamespace *ns)
{
    if (ns) {
        return ns->params.nsid;
    }

    return 0;
}

static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
{
    return lba << ns->lbaf.ds;
}

static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
{
    return ns->lbaf.ms * lba;
}

static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
{
    return ns->moff + nvme_m2b(ns, lba);
}

static inline bool nvme_ns_ext(NvmeNamespace *ns)
{
    return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
}

static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
{
    return zone->d.zs >> 4;
}

static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
{
    zone->d.zs = state << 4;
}

static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
{
    return zone->d.zslba + ns->zone_size;
}

static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
{
    return zone->d.zslba + zone->d.zcap;
}

static inline bool nvme_wp_is_valid(NvmeZone *zone)
{
    uint8_t st = nvme_get_zone_state(zone);

    return st != NVME_ZONE_STATE_FULL &&
           st != NVME_ZONE_STATE_READ_ONLY &&
           st != NVME_ZONE_STATE_OFFLINE;
}

static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
                                             uint32_t zone_idx)
{
    return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
}

static inline void nvme_aor_inc_open(NvmeNamespace *ns)
{
    assert(ns->nr_open_zones >= 0);
    if (ns->params.max_open_zones) {
        ns->nr_open_zones++;
        assert(ns->nr_open_zones <= ns->params.max_open_zones);
    }
}

static inline void nvme_aor_dec_open(NvmeNamespace *ns)
{
    if (ns->params.max_open_zones) {
        assert(ns->nr_open_zones > 0);
        ns->nr_open_zones--;
    }
    assert(ns->nr_open_zones >= 0);
}

static inline void nvme_aor_inc_active(NvmeNamespace *ns)
{
    assert(ns->nr_active_zones >= 0);
    if (ns->params.max_active_zones) {
        ns->nr_active_zones++;
        assert(ns->nr_active_zones <= ns->params.max_active_zones);
    }
}

static inline void nvme_aor_dec_active(NvmeNamespace *ns)
{
    if (ns->params.max_active_zones) {
        assert(ns->nr_active_zones > 0);
        ns->nr_active_zones--;
        assert(ns->nr_active_zones >= ns->nr_open_zones);
    }
    assert(ns->nr_active_zones >= 0);
}

void nvme_ns_init_format(NvmeNamespace *ns);
int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
void nvme_ns_drain(NvmeNamespace *ns);
void nvme_ns_shutdown(NvmeNamespace *ns);
void nvme_ns_cleanup(NvmeNamespace *ns);

typedef struct NvmeAsyncEvent {
    QTAILQ_ENTRY(NvmeAsyncEvent) entry;
    NvmeAerResult result;
} NvmeAsyncEvent;

enum {
    NVME_SG_ALLOC = 1 << 0,
    NVME_SG_DMA   = 1 << 1,
};

typedef struct NvmeSg {
    int flags;

    union {
        QEMUSGList   qsg;
        QEMUIOVector iov;
    };
} NvmeSg;

typedef enum NvmeTxDirection {
    NVME_TX_DIRECTION_TO_DEVICE   = 0,
    NVME_TX_DIRECTION_FROM_DEVICE = 1,
} NvmeTxDirection;

typedef struct NvmeRequest {
    struct NvmeSQueue       *sq;
    struct NvmeNamespace    *ns;
    BlockAIOCB              *aiocb;
    uint16_t                status;
    void                    *opaque;
    NvmeCqe                 cqe;
    NvmeCmd                 cmd;
    BlockAcctCookie         acct;
    NvmeSg                  sg;
    QTAILQ_ENTRY(NvmeRequest)entry;
} NvmeRequest;

typedef struct NvmeBounceContext {
    NvmeRequest *req;

    struct {
        QEMUIOVector iov;
        uint8_t *bounce;
    } data, mdata;
} NvmeBounceContext;

static inline const char *nvme_adm_opc_str(uint8_t opc)
{
    switch (opc) {
    case NVME_ADM_CMD_DELETE_SQ:        return "NVME_ADM_CMD_DELETE_SQ";
    case NVME_ADM_CMD_CREATE_SQ:        return "NVME_ADM_CMD_CREATE_SQ";
    case NVME_ADM_CMD_GET_LOG_PAGE:     return "NVME_ADM_CMD_GET_LOG_PAGE";
    case NVME_ADM_CMD_DELETE_CQ:        return "NVME_ADM_CMD_DELETE_CQ";
    case NVME_ADM_CMD_CREATE_CQ:        return "NVME_ADM_CMD_CREATE_CQ";
    case NVME_ADM_CMD_IDENTIFY:         return "NVME_ADM_CMD_IDENTIFY";
    case NVME_ADM_CMD_ABORT:            return "NVME_ADM_CMD_ABORT";
    case NVME_ADM_CMD_SET_FEATURES:     return "NVME_ADM_CMD_SET_FEATURES";
    case NVME_ADM_CMD_GET_FEATURES:     return "NVME_ADM_CMD_GET_FEATURES";
    case NVME_ADM_CMD_ASYNC_EV_REQ:     return "NVME_ADM_CMD_ASYNC_EV_REQ";
    case NVME_ADM_CMD_NS_ATTACHMENT:    return "NVME_ADM_CMD_NS_ATTACHMENT";
    case NVME_ADM_CMD_VIRT_MNGMT:       return "NVME_ADM_CMD_VIRT_MNGMT";
    case NVME_ADM_CMD_DBBUF_CONFIG:     return "NVME_ADM_CMD_DBBUF_CONFIG";
    case NVME_ADM_CMD_FORMAT_NVM:       return "NVME_ADM_CMD_FORMAT_NVM";
    default:                            return "NVME_ADM_CMD_UNKNOWN";
    }
}

static inline const char *nvme_io_opc_str(uint8_t opc)
{
    switch (opc) {
    case NVME_CMD_FLUSH:            return "NVME_NVM_CMD_FLUSH";
    case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
    case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
    case NVME_CMD_COMPARE:          return "NVME_NVM_CMD_COMPARE";
    case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
    case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
    case NVME_CMD_VERIFY:           return "NVME_NVM_CMD_VERIFY";
    case NVME_CMD_COPY:             return "NVME_NVM_CMD_COPY";
    case NVME_CMD_ZONE_MGMT_SEND:   return "NVME_ZONED_CMD_MGMT_SEND";
    case NVME_CMD_ZONE_MGMT_RECV:   return "NVME_ZONED_CMD_MGMT_RECV";
    case NVME_CMD_ZONE_APPEND:      return "NVME_ZONED_CMD_ZONE_APPEND";
    default:                        return "NVME_NVM_CMD_UNKNOWN";
    }
}

typedef struct NvmeSQueue {
    struct NvmeCtrl *ctrl;
    uint16_t    sqid;
    uint16_t    cqid;
    uint32_t    head;
    uint32_t    tail;
    uint32_t    size;
    uint64_t    dma_addr;
    uint64_t    db_addr;
    uint64_t    ei_addr;
    QEMUTimer   *timer;
    NvmeRequest *io_req;
    QTAILQ_HEAD(, NvmeRequest) req_list;
    QTAILQ_HEAD(, NvmeRequest) out_req_list;
    QTAILQ_ENTRY(NvmeSQueue) entry;
} NvmeSQueue;

typedef struct NvmeCQueue {
    struct NvmeCtrl *ctrl;
    uint8_t     phase;
    uint16_t    cqid;
    uint16_t    irq_enabled;
    uint32_t    head;
    uint32_t    tail;
    uint32_t    vector;
    uint32_t    size;
    uint64_t    dma_addr;
    uint64_t    db_addr;
    uint64_t    ei_addr;
    QEMUTimer   *timer;
    QTAILQ_HEAD(, NvmeSQueue) sq_list;
    QTAILQ_HEAD(, NvmeRequest) req_list;
} NvmeCQueue;

#define TYPE_NVME "nvme"
#define NVME(obj) \
        OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)

typedef struct NvmeParams {
    char     *serial;
    uint32_t num_queues; /* deprecated since 5.1 */
    uint32_t max_ioqpairs;
    uint16_t msix_qsize;
    uint32_t cmb_size_mb;
    uint8_t  aerl;
    uint32_t aer_max_queued;
    uint8_t  mdts;
    uint8_t  vsl;
    bool     use_intel_id;
    uint8_t  zasl;
    bool     auto_transition_zones;
    bool     legacy_cmb;
    uint8_t  sriov_max_vfs;
    uint16_t sriov_vq_flexible;
    uint16_t sriov_vi_flexible;
    uint8_t  sriov_max_vq_per_vf;
    uint8_t  sriov_max_vi_per_vf;
} NvmeParams;

typedef struct NvmeCtrl {
    PCIDevice    parent_obj;
    MemoryRegion bar0;
    MemoryRegion iomem;
    NvmeBar      bar;
    NvmeParams   params;
    NvmeBus      bus;

    uint16_t    cntlid;
    bool        qs_created;
    uint32_t    page_size;
    uint16_t    page_bits;
    uint16_t    max_prp_ents;
    uint16_t    cqe_size;
    uint16_t    sqe_size;
    uint32_t    max_q_ents;
    uint8_t     outstanding_aers;
    uint32_t    irq_status;
    int         cq_pending;
    uint64_t    host_timestamp;                 /* Timestamp sent by the host */
    uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
    uint64_t    starttime_ms;
    uint16_t    temperature;
    uint8_t     smart_critical_warning;
    uint32_t    conf_msix_qsize;
    uint32_t    conf_ioqpairs;
    uint64_t    dbbuf_dbs;
    uint64_t    dbbuf_eis;
    bool        dbbuf_enabled;

    struct {
        MemoryRegion mem;
        uint8_t      *buf;
        bool         cmse;
        hwaddr       cba;
    } cmb;

    struct {
        HostMemoryBackend *dev;
        bool              cmse;
        hwaddr            cba;
    } pmr;

    uint8_t     aer_mask;
    NvmeRequest **aer_reqs;
    QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
    int         aer_queued;

    uint32_t    dmrsl;

    /* Namespace ID is started with 1 so bitmap should be 1-based */
#define NVME_CHANGED_NSID_SIZE  (NVME_MAX_NAMESPACES + 1)
    DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);

    NvmeSubsystem   *subsys;

    NvmeNamespace   namespace;
    NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES + 1];
    NvmeSQueue      **sq;
    NvmeCQueue      **cq;
    NvmeSQueue      admin_sq;
    NvmeCQueue      admin_cq;
    NvmeIdCtrl      id_ctrl;

    struct {
        struct {
            uint16_t temp_thresh_hi;
            uint16_t temp_thresh_low;
        };

        uint32_t                async_config;
        NvmeHostBehaviorSupport hbs;
    } features;

    NvmePriCtrlCap  pri_ctrl_cap;
    NvmeSecCtrlList sec_ctrl_list;
    struct {
        uint16_t    vqrfap;
        uint16_t    virfap;
    } next_pri_ctrl_cap;    /* These override pri_ctrl_cap after reset */
} NvmeCtrl;

typedef enum NvmeResetType {
    NVME_RESET_FUNCTION   = 0,
    NVME_RESET_CONTROLLER = 1,
} NvmeResetType;

static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
{
    if (!nsid || nsid > NVME_MAX_NAMESPACES) {
        return NULL;
    }

    return n->namespaces[nsid];
}

static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
{
    NvmeSQueue *sq = req->sq;
    NvmeCtrl *n = sq->ctrl;

    return n->cq[sq->cqid];
}

static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
{
    NvmeSQueue *sq = req->sq;
    return sq->ctrl;
}

static inline uint16_t nvme_cid(NvmeRequest *req)
{
    if (!req) {
        return 0xffff;
    }

    return le16_to_cpu(req->cqe.cid);
}

static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n)
{
    PCIDevice *pci_dev = &n->parent_obj;
    NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev));

    if (pci_is_vf(pci_dev)) {
        return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)];
    }

    return NULL;
}

static inline NvmeSecCtrlEntry *nvme_sctrl_for_cntlid(NvmeCtrl *n,
                                                      uint16_t cntlid)
{
    NvmeSecCtrlList *list = &n->sec_ctrl_list;
    uint8_t i;

    for (i = 0; i < list->numcntl; i++) {
        if (le16_to_cpu(list->sec[i].scid) == cntlid) {
            return &list->sec[i];
        }
    }

    return NULL;
}

void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
                          NvmeTxDirection dir, NvmeRequest *req);
uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
                           NvmeTxDirection dir, NvmeRequest *req);
void nvme_rw_complete_cb(void *opaque, int ret);
uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
                       NvmeCmd *cmd);

#endif /* HW_NVME_NVME_H */