diff mbox series

[v5,08/26] nvme: refactor device realization

Message ID 20200204095208.269131-9-k.jensen@samsung.com (mailing list archive)
State New, archived
Headers show
Series nvme: support NVMe v1.3d, SGLs and multiple namespaces | expand

Commit Message

Klaus Jensen Feb. 4, 2020, 9:51 a.m. UTC
This patch splits up nvme_realize into multiple individual functions,
each initializing a different subset of the device.

Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
---
 hw/block/nvme.c | 175 +++++++++++++++++++++++++++++++-----------------
 hw/block/nvme.h |  21 ++++++
 2 files changed, 133 insertions(+), 63 deletions(-)

Comments

Maxim Levitsky Feb. 12, 2020, 9:27 a.m. UTC | #1
On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> This patch splits up nvme_realize into multiple individual functions,
> each initializing a different subset of the device.
> 
> Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
> ---
>  hw/block/nvme.c | 175 +++++++++++++++++++++++++++++++-----------------
>  hw/block/nvme.h |  21 ++++++
>  2 files changed, 133 insertions(+), 63 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index e1810260d40b..81514eaef63a 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -44,6 +44,7 @@
>  #include "nvme.h"
>  
>  #define NVME_SPEC_VER 0x00010201
> +#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE
>  
>  #define NVME_GUEST_ERR(trace, fmt, ...) \
>      do { \
> @@ -1325,67 +1326,106 @@ static const MemoryRegionOps nvme_cmb_ops = {
>      },
>  };
>  
> -static void nvme_realize(PCIDevice *pci_dev, Error **errp)
> +static int nvme_check_constraints(NvmeCtrl *n, Error **errp)
>  {
> -    NvmeCtrl *n = NVME(pci_dev);
> -    NvmeIdCtrl *id = &n->id_ctrl;
> -
> -    int i;
> -    int64_t bs_size;
> -    uint8_t *pci_conf;
> -
> -    if (!n->params.num_queues) {
> -        error_setg(errp, "num_queues can't be zero");
> -        return;
> -    }
> +    NvmeParams *params = &n->params;
>  
>      if (!n->conf.blk) {
> -        error_setg(errp, "drive property not set");
> -        return;
> +        error_setg(errp, "nvme: block backend not configured");
> +        return 1;
As a matter of taste, negative values indicate error, and 0 is the success value.
In Linux kernel this is even an official rule.
>      }
>  
> -    bs_size = blk_getlength(n->conf.blk);
> -    if (bs_size < 0) {
> -        error_setg(errp, "could not get backing file size");
> -        return;
> +    if (!params->serial) {
> +        error_setg(errp, "nvme: serial not configured");
> +        return 1;
>      }
>  
> -    if (!n->params.serial) {
> -        error_setg(errp, "serial property not set");
> -        return;
> +    if ((params->num_queues < 1 || params->num_queues > NVME_MAX_QS)) {
> +        error_setg(errp, "nvme: invalid queue configuration");
Maybe something like "nvme: invalid queue count specified, should be between 1 and ..."?
> +        return 1;
>      }
> +
> +    return 0;
> +}
> +
> +static int nvme_init_blk(NvmeCtrl *n, Error **errp)
> +{
>      blkconf_blocksizes(&n->conf);
>      if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
> -                                       false, errp)) {
> -        return;
> +        false, errp)) {
> +        return 1;
>      }
>  
> -    pci_conf = pci_dev->config;
> -    pci_conf[PCI_INTERRUPT_PIN] = 1;
> -    pci_config_set_prog_interface(pci_dev->config, 0x2);
> -    pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS);
> -    pcie_endpoint_cap_init(pci_dev, 0x80);
> +    return 0;
> +}
>  
> +static void nvme_init_state(NvmeCtrl *n)
> +{
>      n->num_namespaces = 1;
>      n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4);

Isn't that wrong?
First 4K of mmio (0x1000) is the registers, and that is followed by the doorbells,
and each doorbell takes 8 bytes (assuming regular doorbell stride).
so n->params.num_queues + 1 should be total number of queues, thus the 0x1004 should be 0x1000 IMHO.
I might miss some rounding magic here though.

> -    n->ns_size = bs_size / (uint64_t)n->num_namespaces;
> -
>      n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
>      n->sq = g_new0(NvmeSQueue *, n->params.num_queues);
>      n->cq = g_new0(NvmeCQueue *, n->params.num_queues);
> +}
>  
> -    memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n,
> -                          "nvme", n->reg_size);
> +static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
> +{
> +    NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
It would be nice to have #define for CMB bar number
> +    NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
> +
> +    NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
> +    NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
> +    NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
> +    NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
> +    NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
> +    NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2);
> +    NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
> +
> +    n->cmbloc = n->bar.cmbloc;
> +    n->cmbsz = n->bar.cmbsz;
> +
> +    n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
> +    memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
> +                            "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
> +    pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
Same here although since you read it here from the controller register,
then maybe leave it as is. I prefer though for this kind of thing
to have a #define and use it everywhere. 

> +        PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
> +        PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
> +}
> +
> +static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev)
> +{
> +    uint8_t *pci_conf = pci_dev->config;
> +
> +    pci_conf[PCI_INTERRUPT_PIN] = 1;
> +    pci_config_set_prog_interface(pci_conf, 0x2);
Nitpick: How about adding some #define for that as well?
(I know that this code is copied as is but still)
> +    pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
> +    pci_config_set_device_id(pci_conf, 0x5845);
> +    pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
> +    pcie_endpoint_cap_init(pci_dev, 0x80);
> +
> +    memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
> +        n->reg_size);

Code on split lines should start at column right after the '('
Now its my turn to notice this - our checkpatch.pl doesn't check this,
and I can't explain how often I am getting burnt on this myself.

There are *lot* of these issues, I pointed out some of them but you should
check all the patches for this.


>      pci_register_bar(pci_dev, 0,
>          PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
>          &n->iomem);
Split line alignment issue here as well.
>      msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL);
>  
> +    if (n->params.cmb_size_mb) {
> +        nvme_init_cmb(n, pci_dev);
> +    }
> +}
> +
> +static void nvme_init_ctrl(NvmeCtrl *n)
> +{
> +    NvmeIdCtrl *id = &n->id_ctrl;
> +    NvmeParams *params = &n->params;
> +    uint8_t *pci_conf = n->parent_obj.config;
> +
>      id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
>      id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
>      strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
>      strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
> -    strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
> +    strpadcpy((char *)id->sn, sizeof(id->sn), params->serial, ' ');
>      id->rab = 6;
>      id->ieee[0] = 0x00;
>      id->ieee[1] = 0x02;
> @@ -1431,46 +1471,55 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
>  
>      n->bar.vs = NVME_SPEC_VER;
>      n->bar.intmc = n->bar.intms = 0;
> +}
>  
> -    if (n->params.cmb_size_mb) {
> +static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
> +{
> +    int64_t bs_size;
> +    NvmeIdNs *id_ns = &ns->id_ns;
>  
> -        NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
> -        NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
> +    bs_size = blk_getlength(n->conf.blk);
> +    if (bs_size < 0) {
> +        error_setg_errno(errp, -bs_size, "blk_getlength");
> +        return 1;
> +    }
>  
> -        NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
> -        NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
> -        NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
> -        NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
> -        NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
> -        NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
> -        NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
> +    id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
> +    n->ns_size = bs_size;
>  
> -        n->cmbloc = n->bar.cmbloc;
> -        n->cmbsz = n->bar.cmbsz;
> +    id_ns->ncap = id_ns->nuse = id_ns->nsze =
> +        cpu_to_le64(nvme_ns_nlbas(n, ns));
I myself don't know how to align these splits to be honest.
I would just split this into multiple statements.
>  
> -        n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
> -        memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
> -                              "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
> -        pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
> -            PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
> -            PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
> +    return 0;
> +}
>  
> +static void nvme_realize(PCIDevice *pci_dev, Error **errp)
> +{
> +    NvmeCtrl *n = NVME(pci_dev);
> +    Error *local_err = NULL;
> +    int i;
> +
> +    if (nvme_check_constraints(n, &local_err)) {
> +        error_propagate_prepend(errp, local_err, "nvme_check_constraints: ");
Do we need that hint for the end user?
> +        return;
> +    }
> +
> +    nvme_init_state(n);
> +
> +    if (nvme_init_blk(n, &local_err)) {
> +        error_propagate_prepend(errp, local_err, "nvme_init_blk: ");
Same here
> +        return;
>      }
>  
>      for (i = 0; i < n->num_namespaces; i++) {
> -        NvmeNamespace *ns = &n->namespaces[i];
> -        NvmeIdNs *id_ns = &ns->id_ns;
> -        id_ns->nsfeat = 0;
> -        id_ns->nlbaf = 0;
> -        id_ns->flbas = 0;
> -        id_ns->mc = 0;
> -        id_ns->dpc = 0;
> -        id_ns->dps = 0;
> -        id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
> -        id_ns->ncap  = id_ns->nuse = id_ns->nsze =
> -            cpu_to_le64(n->ns_size >>
> -                id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
> +        if (nvme_init_namespace(n, &n->namespaces[i], &local_err)) {
> +            error_propagate_prepend(errp, local_err, "nvme_init_namespace: ");
And here
> +            return;
> +        }
>      }
> +
> +    nvme_init_pci(n, pci_dev);
> +    nvme_init_ctrl(n);
>  }
>  
>  static void nvme_exit(PCIDevice *pci_dev)
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 9957c4a200e2..a867bdfabafd 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -65,6 +65,22 @@ typedef struct NvmeNamespace {
>      NvmeIdNs        id_ns;
>  } NvmeNamespace;
>  
> +static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns)
> +{
Its not common to return a structure in C, usually pointer is returned to
avoid copying. In this case this doesn't matter that much though.
> +    NvmeIdNs *id_ns = &ns->id_ns;
> +    return id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)];
> +}
> +
> +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
> +{
> +    return nvme_ns_lbaf(ns).ds;
> +}
> +
> +static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns)
> +{
> +    return 1 << nvme_ns_lbads(ns);
> +}
> +
>  #define TYPE_NVME "nvme"
>  #define NVME(obj) \
>          OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
> @@ -101,4 +117,9 @@ typedef struct NvmeCtrl {
>      NvmeIdCtrl      id_ctrl;
>  } NvmeCtrl;
>  
> +static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +    return n->ns_size >> nvme_ns_lbads(ns);
> +}
Unless you need all these functions in the future, this feels like
it is a bit verbose.

> +
>  #endif /* HW_NVME_H */


Best regards,
	Maxim Levitsky
Klaus Jensen March 16, 2020, 7:43 a.m. UTC | #2
On Feb 12 11:27, Maxim Levitsky wrote:
> On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > This patch splits up nvme_realize into multiple individual functions,
> > each initializing a different subset of the device.
> > 
> > Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
> > ---
> >  hw/block/nvme.c | 175 +++++++++++++++++++++++++++++++-----------------
> >  hw/block/nvme.h |  21 ++++++
> >  2 files changed, 133 insertions(+), 63 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index e1810260d40b..81514eaef63a 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -44,6 +44,7 @@
> >  #include "nvme.h"
> >  
> >  #define NVME_SPEC_VER 0x00010201
> > +#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE
> >  
> >  #define NVME_GUEST_ERR(trace, fmt, ...) \
> >      do { \
> > @@ -1325,67 +1326,106 @@ static const MemoryRegionOps nvme_cmb_ops = {
> >      },
> >  };
> >  
> > -static void nvme_realize(PCIDevice *pci_dev, Error **errp)
> > +static int nvme_check_constraints(NvmeCtrl *n, Error **errp)
> >  {
> > -    NvmeCtrl *n = NVME(pci_dev);
> > -    NvmeIdCtrl *id = &n->id_ctrl;
> > -
> > -    int i;
> > -    int64_t bs_size;
> > -    uint8_t *pci_conf;
> > -
> > -    if (!n->params.num_queues) {
> > -        error_setg(errp, "num_queues can't be zero");
> > -        return;
> > -    }
> > +    NvmeParams *params = &n->params;
> >  
> >      if (!n->conf.blk) {
> > -        error_setg(errp, "drive property not set");
> > -        return;
> > +        error_setg(errp, "nvme: block backend not configured");
> > +        return 1;
> As a matter of taste, negative values indicate error, and 0 is the success value.
> In Linux kernel this is even an official rule.
> >      }

Fixed.

> >  
> > -    bs_size = blk_getlength(n->conf.blk);
> > -    if (bs_size < 0) {
> > -        error_setg(errp, "could not get backing file size");
> > -        return;
> > +    if (!params->serial) {
> > +        error_setg(errp, "nvme: serial not configured");
> > +        return 1;
> >      }
> >  
> > -    if (!n->params.serial) {
> > -        error_setg(errp, "serial property not set");
> > -        return;
> > +    if ((params->num_queues < 1 || params->num_queues > NVME_MAX_QS)) {
> > +        error_setg(errp, "nvme: invalid queue configuration");
> Maybe something like "nvme: invalid queue count specified, should be between 1 and ..."?
> > +        return 1;
> >      }

Fixed.

> > +
> > +    return 0;
> > +}
> > +
> > +static int nvme_init_blk(NvmeCtrl *n, Error **errp)
> > +{
> >      blkconf_blocksizes(&n->conf);
> >      if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
> > -                                       false, errp)) {
> > -        return;
> > +        false, errp)) {
> > +        return 1;
> >      }
> >  
> > -    pci_conf = pci_dev->config;
> > -    pci_conf[PCI_INTERRUPT_PIN] = 1;
> > -    pci_config_set_prog_interface(pci_dev->config, 0x2);
> > -    pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS);
> > -    pcie_endpoint_cap_init(pci_dev, 0x80);
> > +    return 0;
> > +}
> >  
> > +static void nvme_init_state(NvmeCtrl *n)
> > +{
> >      n->num_namespaces = 1;
> >      n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4);
> 
> Isn't that wrong?
> First 4K of mmio (0x1000) is the registers, and that is followed by the doorbells,
> and each doorbell takes 8 bytes (assuming regular doorbell stride).
> so n->params.num_queues + 1 should be total number of queues, thus the 0x1004 should be 0x1000 IMHO.
> I might miss some rounding magic here though.
> 

Yeah. I think you are right. It all becomes slightly more fishy due to
the num_queues device parameter being 1's based and accounts for the
admin queue pair.

But in get/set features, the value has to be 0's based and only account
for the I/O queues, so we need to subtract 2 from the value. It's
confusing all around.

Since the admin queue pair isn't really optional I think it would be
better that we introduces a new max_ioqpairs parameter that is 1's
based, counts number of pairs and obviously only accounts for the io
queues.

I guess we need to keep the num_queues parameter around for
compatibility.

The doorbells are only 4 bytes btw, but the calculation still looks
wrong. With a max_ioqpairs parameter in place, the reg_size should be

    pow2ceil(0x1008 + 2 * (n->params.max_ioqpairs) * 4)

Right? Thats 0x1000 for the core registers, 8 bytes for the sq/cq
doorbells for the admin queue pair, and then room for the i/o queue
pairs.

I added a patch for this in v6.

> > -    n->ns_size = bs_size / (uint64_t)n->num_namespaces;
> > -
> >      n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
> >      n->sq = g_new0(NvmeSQueue *, n->params.num_queues);
> >      n->cq = g_new0(NvmeCQueue *, n->params.num_queues);
> > +}
> >  
> > -    memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n,
> > -                          "nvme", n->reg_size);
> > +static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
> > +{
> > +    NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
> It would be nice to have #define for CMB bar number

Added.

> > +    NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
> > +
> > +    NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
> > +    NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
> > +    NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
> > +    NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
> > +    NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
> > +    NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2);
> > +    NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
> > +
> > +    n->cmbloc = n->bar.cmbloc;
> > +    n->cmbsz = n->bar.cmbsz;
> > +
> > +    n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
> > +    memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
> > +                            "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
> > +    pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
> Same here although since you read it here from the controller register,
> then maybe leave it as is. I prefer though for this kind of thing
> to have a #define and use it everywhere. 
> 

Done.

> > +        PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
> > +        PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
> > +}
> > +
> > +static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev)
> > +{
> > +    uint8_t *pci_conf = pci_dev->config;
> > +
> > +    pci_conf[PCI_INTERRUPT_PIN] = 1;
> > +    pci_config_set_prog_interface(pci_conf, 0x2);
> Nitpick: How about adding some #define for that as well?
> (I know that this code is copied as is but still)

Yeah. A PCI_PI_NVME or something would be nice. But this should probably
go to some pci related header file? Any idea where that would fit?

> > +    pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
> > +    pci_config_set_device_id(pci_conf, 0x5845);
> > +    pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
> > +    pcie_endpoint_cap_init(pci_dev, 0x80);
> > +
> > +    memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
> > +        n->reg_size);
> 
> Code on split lines should start at column right after the '('
> Now its my turn to notice this - our checkpatch.pl doesn't check this,
> and I can't explain how often I am getting burnt on this myself.
> 
> There are *lot* of these issues, I pointed out some of them but you should
> check all the patches for this.
> 

I fixed all that :)

> 
> >      pci_register_bar(pci_dev, 0,
> >          PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
> >          &n->iomem);
> Split line alignment issue here as well.
> >      msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL);
> >  
> > +    if (n->params.cmb_size_mb) {
> > +        nvme_init_cmb(n, pci_dev);
> > +    }
> > +}
> > +
> > +static void nvme_init_ctrl(NvmeCtrl *n)
> > +{
> > +    NvmeIdCtrl *id = &n->id_ctrl;
> > +    NvmeParams *params = &n->params;
> > +    uint8_t *pci_conf = n->parent_obj.config;
> > +
> >      id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
> >      id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
> >      strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
> >      strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
> > -    strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
> > +    strpadcpy((char *)id->sn, sizeof(id->sn), params->serial, ' ');
> >      id->rab = 6;
> >      id->ieee[0] = 0x00;
> >      id->ieee[1] = 0x02;
> > @@ -1431,46 +1471,55 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
> >  
> >      n->bar.vs = NVME_SPEC_VER;
> >      n->bar.intmc = n->bar.intms = 0;
> > +}
> >  
> > -    if (n->params.cmb_size_mb) {
> > +static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
> > +{
> > +    int64_t bs_size;
> > +    NvmeIdNs *id_ns = &ns->id_ns;
> >  
> > -        NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
> > -        NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
> > +    bs_size = blk_getlength(n->conf.blk);
> > +    if (bs_size < 0) {
> > +        error_setg_errno(errp, -bs_size, "blk_getlength");
> > +        return 1;
> > +    }
> >  
> > -        NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
> > -        NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
> > -        NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
> > -        NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
> > -        NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
> > -        NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
> > -        NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
> > +    id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
> > +    n->ns_size = bs_size;
> >  
> > -        n->cmbloc = n->bar.cmbloc;
> > -        n->cmbsz = n->bar.cmbsz;
> > +    id_ns->ncap = id_ns->nuse = id_ns->nsze =
> > +        cpu_to_le64(nvme_ns_nlbas(n, ns));
> I myself don't know how to align these splits to be honest.
> I would just split this into multiple statements.
> >  
> > -        n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
> > -        memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
> > -                              "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
> > -        pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
> > -            PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
> > -            PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
> > +    return 0;
> > +}
> >  
> > +static void nvme_realize(PCIDevice *pci_dev, Error **errp)
> > +{
> > +    NvmeCtrl *n = NVME(pci_dev);
> > +    Error *local_err = NULL;
> > +    int i;
> > +
> > +    if (nvme_check_constraints(n, &local_err)) {
> > +        error_propagate_prepend(errp, local_err, "nvme_check_constraints: ");
> Do we need that hint for the end user?

Removed.

> > +        return;
> > +    }
> > +
> > +    nvme_init_state(n);
> > +
> > +    if (nvme_init_blk(n, &local_err)) {
> > +        error_propagate_prepend(errp, local_err, "nvme_init_blk: ");
> Same here

Done.


> > +        return;
> >      }
> >  
> >      for (i = 0; i < n->num_namespaces; i++) {
> > -        NvmeNamespace *ns = &n->namespaces[i];
> > -        NvmeIdNs *id_ns = &ns->id_ns;
> > -        id_ns->nsfeat = 0;
> > -        id_ns->nlbaf = 0;
> > -        id_ns->flbas = 0;
> > -        id_ns->mc = 0;
> > -        id_ns->dpc = 0;
> > -        id_ns->dps = 0;
> > -        id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
> > -        id_ns->ncap  = id_ns->nuse = id_ns->nsze =
> > -            cpu_to_le64(n->ns_size >>
> > -                id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
> > +        if (nvme_init_namespace(n, &n->namespaces[i], &local_err)) {
> > +            error_propagate_prepend(errp, local_err, "nvme_init_namespace: ");
> And here

Done.


> > +            return;
> > +        }
> >      }
> > +
> > +    nvme_init_pci(n, pci_dev);
> > +    nvme_init_ctrl(n);
> >  }
> >  
> >  static void nvme_exit(PCIDevice *pci_dev)
> > diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> > index 9957c4a200e2..a867bdfabafd 100644
> > --- a/hw/block/nvme.h
> > +++ b/hw/block/nvme.h
> > @@ -65,6 +65,22 @@ typedef struct NvmeNamespace {
> >      NvmeIdNs        id_ns;
> >  } NvmeNamespace;
> >  
> > +static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns)
> > +{
> Its not common to return a structure in C, usually pointer is returned to
> avoid copying. In this case this doesn't matter that much though.

It's actually gonna be used a lot. So swapped to pointer.

> > +    NvmeIdNs *id_ns = &ns->id_ns;
> > +    return id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)];
> > +}
> > +
> > +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
> > +{
> > +    return nvme_ns_lbaf(ns).ds;
> > +}
> > +
> > +static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns)
> > +{
> > +    return 1 << nvme_ns_lbads(ns);
> > +}
> > +
> >  #define TYPE_NVME "nvme"
> >  #define NVME(obj) \
> >          OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
> > @@ -101,4 +117,9 @@ typedef struct NvmeCtrl {
> >      NvmeIdCtrl      id_ctrl;
> >  } NvmeCtrl;
> >  
> > +static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns)
> > +{
> > +    return n->ns_size >> nvme_ns_lbads(ns);
> > +}
> Unless you need all these functions in the future, this feels like
> it is a bit verbose.
> 

These will be used in various places later.
Maxim Levitsky March 25, 2020, 10:21 a.m. UTC | #3
On Mon, 2020-03-16 at 00:43 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 11:27, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > > This patch splits up nvme_realize into multiple individual functions,
> > > each initializing a different subset of the device.
> > > 
> > > Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
> > > ---
> > >  hw/block/nvme.c | 175 +++++++++++++++++++++++++++++++-----------------
> > >  hw/block/nvme.h |  21 ++++++
> > >  2 files changed, 133 insertions(+), 63 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index e1810260d40b..81514eaef63a 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -44,6 +44,7 @@
> > >  #include "nvme.h"
> > >  
> > >  #define NVME_SPEC_VER 0x00010201
> > > +#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE
> > >  
> > >  #define NVME_GUEST_ERR(trace, fmt, ...) \
> > >      do { \
> > > @@ -1325,67 +1326,106 @@ static const MemoryRegionOps nvme_cmb_ops = {
> > >      },
> > >  };
> > >  
> > > -static void nvme_realize(PCIDevice *pci_dev, Error **errp)
> > > +static int nvme_check_constraints(NvmeCtrl *n, Error **errp)
> > >  {
> > > -    NvmeCtrl *n = NVME(pci_dev);
> > > -    NvmeIdCtrl *id = &n->id_ctrl;
> > > -
> > > -    int i;
> > > -    int64_t bs_size;
> > > -    uint8_t *pci_conf;
> > > -
> > > -    if (!n->params.num_queues) {
> > > -        error_setg(errp, "num_queues can't be zero");
> > > -        return;
> > > -    }
> > > +    NvmeParams *params = &n->params;
> > >  
> > >      if (!n->conf.blk) {
> > > -        error_setg(errp, "drive property not set");
> > > -        return;
> > > +        error_setg(errp, "nvme: block backend not configured");
> > > +        return 1;
> > 
> > As a matter of taste, negative values indicate error, and 0 is the success value.
> > In Linux kernel this is even an official rule.
> > >      }
> 
> Fixed.
> 
> > >  
> > > -    bs_size = blk_getlength(n->conf.blk);
> > > -    if (bs_size < 0) {
> > > -        error_setg(errp, "could not get backing file size");
> > > -        return;
> > > +    if (!params->serial) {
> > > +        error_setg(errp, "nvme: serial not configured");
> > > +        return 1;
> > >      }
> > >  
> > > -    if (!n->params.serial) {
> > > -        error_setg(errp, "serial property not set");
> > > -        return;
> > > +    if ((params->num_queues < 1 || params->num_queues > NVME_MAX_QS)) {
> > > +        error_setg(errp, "nvme: invalid queue configuration");
> > 
> > Maybe something like "nvme: invalid queue count specified, should be between 1 and ..."?
> > > +        return 1;
> > >      }
> 
> Fixed.
Thanks
> 
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static int nvme_init_blk(NvmeCtrl *n, Error **errp)
> > > +{
> > >      blkconf_blocksizes(&n->conf);
> > >      if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
> > > -                                       false, errp)) {
> > > -        return;
> > > +        false, errp)) {
> > > +        return 1;
> > >      }
> > >  
> > > -    pci_conf = pci_dev->config;
> > > -    pci_conf[PCI_INTERRUPT_PIN] = 1;
> > > -    pci_config_set_prog_interface(pci_dev->config, 0x2);
> > > -    pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS);
> > > -    pcie_endpoint_cap_init(pci_dev, 0x80);
> > > +    return 0;
> > > +}
> > >  
> > > +static void nvme_init_state(NvmeCtrl *n)
> > > +{
> > >      n->num_namespaces = 1;
> > >      n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4);
> > 
> > Isn't that wrong?
> > First 4K of mmio (0x1000) is the registers, and that is followed by the doorbells,
> > and each doorbell takes 8 bytes (assuming regular doorbell stride).
> > so n->params.num_queues + 1 should be total number of queues, thus the 0x1004 should be 0x1000 IMHO.
> > I might miss some rounding magic here though.
> > 
> 
> Yeah. I think you are right. It all becomes slightly more fishy due to
> the num_queues device parameter being 1's based and accounts for the
> admin queue pair.
> 
> But in get/set features, the value has to be 0's based and only account
> for the I/O queues, so we need to subtract 2 from the value. It's
> confusing all around.
Yea, I can't agree more on that. The zero based values had bitten
me few times while I developed nvme-mdev as well.

> 
> Since the admin queue pair isn't really optional I think it would be
> better that we introduces a new max_ioqpairs parameter that is 1's
> based, counts number of pairs and obviously only accounts for the io
> queues.
> 
> I guess we need to keep the num_queues parameter around for
> compatibility.
> 
> The doorbells are only 4 bytes btw, but the calculation still looks
I don't understand that. Each doorbell is indeed 4 bytes, but they come
in pairs so each doorbell pair is 8 bytes.

BTW, the spec has so called doorbell stride, which allows to artificially increase
each doorbell by a power of two. This was intended for software implementations
(like my nvme-mdev), to make sure that each doorbell takes exactly one cacheline.

I personally wasn't able to notice any measurable difference, but then my  nvme-mdev
adds so little overhead, that it might not be measurable.
You might want to support this sometime in the future to increase the feature coverage
of this nvme device.

> wrong. With a max_ioqpairs parameter in place, the reg_size should be
> 
>     pow2ceil(0x1008 + 2 * (n->params.max_ioqpairs) * 4)
> 
> Right? Thats 0x1000 for the core registers, 8 bytes for the sq/cq
> doorbells for the admin queue pair, and then room for the i/o queue
> pairs.
Looks great.
BTW, 


> 
> I added a patch for this in v6.
> 
> > > -    n->ns_size = bs_size / (uint64_t)n->num_namespaces;
> > > -
> > >      n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
> > >      n->sq = g_new0(NvmeSQueue *, n->params.num_queues);
> > >      n->cq = g_new0(NvmeCQueue *, n->params.num_queues);
> > > +}
> > >  
> > > -    memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n,
> > > -                          "nvme", n->reg_size);
> > > +static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
> > > +{
> > > +    NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
> > 
> > It would be nice to have #define for CMB bar number
> 
> Added.
Thanks!
> 
> > > +    NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
> > > +
> > > +    NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
> > > +    NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
> > > +    NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
> > > +    NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
> > > +    NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
> > > +    NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2);
> > > +    NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
> > > +
> > > +    n->cmbloc = n->bar.cmbloc;
> > > +    n->cmbsz = n->bar.cmbsz;
> > > +
> > > +    n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
> > > +    memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
> > > +                            "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
> > > +    pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
> > 
> > Same here although since you read it here from the controller register,
> > then maybe leave it as is. I prefer though for this kind of thing
> > to have a #define and use it everywhere. 
> > 
> 
> Done.
> 
> > > +        PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
> > > +        PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
> > > +}
> > > +
> > > +static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev)
> > > +{
> > > +    uint8_t *pci_conf = pci_dev->config;
> > > +
> > > +    pci_conf[PCI_INTERRUPT_PIN] = 1;
> > > +    pci_config_set_prog_interface(pci_conf, 0x2);
> > 
> > Nitpick: How about adding some #define for that as well?
> > (I know that this code is copied as is but still)
> 
> Yeah. A PCI_PI_NVME or something would be nice. But this should probably
> go to some pci related header file? Any idea where that would fit?

in include/hw/pci/pci_ids.h maybe?

> 
> > > +    pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
> > > +    pci_config_set_device_id(pci_conf, 0x5845);
> > > +    pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
> > > +    pcie_endpoint_cap_init(pci_dev, 0x80);
> > > +
> > > +    memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
> > > +        n->reg_size);
> > 
> > Code on split lines should start at column right after the '('
> > Now its my turn to notice this - our checkpatch.pl doesn't check this,
> > and I can't explain how often I am getting burnt on this myself.
> > 
> > There are *lot* of these issues, I pointed out some of them but you should
> > check all the patches for this.
> > 
> 
> I fixed all that :)

Thanks, but I bet that some of this remained - taking from my experience,
since I also like you wasn't used to this rule, 
so I  didn't yet adopt that rule subconsciously, and our checkpatch.pl
doesn't check for it, so I keep on violating this rule in most patches I send
despite me checking each patch for few times.
I'll go over V6, and if I spot this I'll take a note, now that you fixed most of
this issues.
Thanks again.

> 
> > 
> > >      pci_register_bar(pci_dev, 0,
> > >          PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
> > >          &n->iomem);
> > 
> > Split line alignment issue here as well.
> > >      msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL);
> > >  
> > > +    if (n->params.cmb_size_mb) {
> > > +        nvme_init_cmb(n, pci_dev);
> > > +    }
> > > +}
> > > +
> > > +static void nvme_init_ctrl(NvmeCtrl *n)
> > > +{
> > > +    NvmeIdCtrl *id = &n->id_ctrl;
> > > +    NvmeParams *params = &n->params;
> > > +    uint8_t *pci_conf = n->parent_obj.config;
> > > +
> > >      id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
> > >      id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
> > >      strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
> > >      strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
> > > -    strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
> > > +    strpadcpy((char *)id->sn, sizeof(id->sn), params->serial, ' ');
> > >      id->rab = 6;
> > >      id->ieee[0] = 0x00;
> > >      id->ieee[1] = 0x02;
> > > @@ -1431,46 +1471,55 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
> > >  
> > >      n->bar.vs = NVME_SPEC_VER;
> > >      n->bar.intmc = n->bar.intms = 0;
> > > +}
> > >  
> > > -    if (n->params.cmb_size_mb) {
> > > +static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
> > > +{
> > > +    int64_t bs_size;
> > > +    NvmeIdNs *id_ns = &ns->id_ns;
> > >  
> > > -        NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
> > > -        NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
> > > +    bs_size = blk_getlength(n->conf.blk);
> > > +    if (bs_size < 0) {
> > > +        error_setg_errno(errp, -bs_size, "blk_getlength");
> > > +        return 1;
> > > +    }
> > >  
> > > -        NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
> > > -        NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
> > > -        NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
> > > -        NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
> > > -        NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
> > > -        NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
> > > -        NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
> > > +    id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
> > > +    n->ns_size = bs_size;
> > >  
> > > -        n->cmbloc = n->bar.cmbloc;
> > > -        n->cmbsz = n->bar.cmbsz;
> > > +    id_ns->ncap = id_ns->nuse = id_ns->nsze =
> > > +        cpu_to_le64(nvme_ns_nlbas(n, ns));
> > 
> > I myself don't know how to align these splits to be honest.
> > I would just split this into multiple statements.
> > >  
> > > -        n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
> > > -        memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
> > > -                              "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
> > > -        pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
> > > -            PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
> > > -            PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
> > > +    return 0;
> > > +}
> > >  
> > > +static void nvme_realize(PCIDevice *pci_dev, Error **errp)
> > > +{
> > > +    NvmeCtrl *n = NVME(pci_dev);
> > > +    Error *local_err = NULL;
> > > +    int i;
> > > +
> > > +    if (nvme_check_constraints(n, &local_err)) {
> > > +        error_propagate_prepend(errp, local_err, "nvme_check_constraints: ");
> > 
> > Do we need that hint for the end user?
> 
> Removed.
> 
> > > +        return;
> > > +    }
> > > +
> > > +    nvme_init_state(n);
> > > +
> > > +    if (nvme_init_blk(n, &local_err)) {
> > > +        error_propagate_prepend(errp, local_err, "nvme_init_blk: ");
> > 
> > Same here
> 
> Done.
> 
> 
> > > +        return;
> > >      }
> > >  
> > >      for (i = 0; i < n->num_namespaces; i++) {
> > > -        NvmeNamespace *ns = &n->namespaces[i];
> > > -        NvmeIdNs *id_ns = &ns->id_ns;
> > > -        id_ns->nsfeat = 0;
> > > -        id_ns->nlbaf = 0;
> > > -        id_ns->flbas = 0;
> > > -        id_ns->mc = 0;
> > > -        id_ns->dpc = 0;
> > > -        id_ns->dps = 0;
> > > -        id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
> > > -        id_ns->ncap  = id_ns->nuse = id_ns->nsze =
> > > -            cpu_to_le64(n->ns_size >>
> > > -                id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
> > > +        if (nvme_init_namespace(n, &n->namespaces[i], &local_err)) {
> > > +            error_propagate_prepend(errp, local_err, "nvme_init_namespace: ");
> > 
> > And here
> 
> Done.
> 
> 
> > > +            return;
> > > +        }
> > >      }
> > > +
> > > +    nvme_init_pci(n, pci_dev);
> > > +    nvme_init_ctrl(n);
> > >  }
> > >  
> > >  static void nvme_exit(PCIDevice *pci_dev)
> > > diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> > > index 9957c4a200e2..a867bdfabafd 100644
> > > --- a/hw/block/nvme.h
> > > +++ b/hw/block/nvme.h
> > > @@ -65,6 +65,22 @@ typedef struct NvmeNamespace {
> > >      NvmeIdNs        id_ns;
> > >  } NvmeNamespace;
> > >  
> > > +static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns)
> > > +{
> > 
> > Its not common to return a structure in C, usually pointer is returned to
> > avoid copying. In this case this doesn't matter that much though.
> 
> It's actually gonna be used a lot. So swapped to pointer.
Thanks.

> 
> > > +    NvmeIdNs *id_ns = &ns->id_ns;
> > > +    return id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)];
> > > +}
> > > +
> > > +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
> > > +{
> > > +    return nvme_ns_lbaf(ns).ds;
> > > +}
> > > +
> > > +static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns)
> > > +{
> > > +    return 1 << nvme_ns_lbads(ns);
> > > +}
> > > +
> > >  #define TYPE_NVME "nvme"
> > >  #define NVME(obj) \
> > >          OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
> > > @@ -101,4 +117,9 @@ typedef struct NvmeCtrl {
> > >      NvmeIdCtrl      id_ctrl;
> > >  } NvmeCtrl;
> > >  
> > > +static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns)
> > > +{
> > > +    return n->ns_size >> nvme_ns_lbads(ns);
> > > +}
> > 
> > Unless you need all these functions in the future, this feels like
> > it is a bit verbose.
> > 
> 
> These will be used in various places later.
OK, then it is all right.

>  
> 

Best regards,
	Maxim Levitsky
diff mbox series

Patch

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index e1810260d40b..81514eaef63a 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -44,6 +44,7 @@ 
 #include "nvme.h"
 
 #define NVME_SPEC_VER 0x00010201
+#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE
 
 #define NVME_GUEST_ERR(trace, fmt, ...) \
     do { \
@@ -1325,67 +1326,106 @@  static const MemoryRegionOps nvme_cmb_ops = {
     },
 };
 
-static void nvme_realize(PCIDevice *pci_dev, Error **errp)
+static int nvme_check_constraints(NvmeCtrl *n, Error **errp)
 {
-    NvmeCtrl *n = NVME(pci_dev);
-    NvmeIdCtrl *id = &n->id_ctrl;
-
-    int i;
-    int64_t bs_size;
-    uint8_t *pci_conf;
-
-    if (!n->params.num_queues) {
-        error_setg(errp, "num_queues can't be zero");
-        return;
-    }
+    NvmeParams *params = &n->params;
 
     if (!n->conf.blk) {
-        error_setg(errp, "drive property not set");
-        return;
+        error_setg(errp, "nvme: block backend not configured");
+        return 1;
     }
 
-    bs_size = blk_getlength(n->conf.blk);
-    if (bs_size < 0) {
-        error_setg(errp, "could not get backing file size");
-        return;
+    if (!params->serial) {
+        error_setg(errp, "nvme: serial not configured");
+        return 1;
     }
 
-    if (!n->params.serial) {
-        error_setg(errp, "serial property not set");
-        return;
+    if ((params->num_queues < 1 || params->num_queues > NVME_MAX_QS)) {
+        error_setg(errp, "nvme: invalid queue configuration");
+        return 1;
     }
+
+    return 0;
+}
+
+static int nvme_init_blk(NvmeCtrl *n, Error **errp)
+{
     blkconf_blocksizes(&n->conf);
     if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
-                                       false, errp)) {
-        return;
+        false, errp)) {
+        return 1;
     }
 
-    pci_conf = pci_dev->config;
-    pci_conf[PCI_INTERRUPT_PIN] = 1;
-    pci_config_set_prog_interface(pci_dev->config, 0x2);
-    pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS);
-    pcie_endpoint_cap_init(pci_dev, 0x80);
+    return 0;
+}
 
+static void nvme_init_state(NvmeCtrl *n)
+{
     n->num_namespaces = 1;
     n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4);
-    n->ns_size = bs_size / (uint64_t)n->num_namespaces;
-
     n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
     n->sq = g_new0(NvmeSQueue *, n->params.num_queues);
     n->cq = g_new0(NvmeCQueue *, n->params.num_queues);
+}
 
-    memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n,
-                          "nvme", n->reg_size);
+static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
+{
+    NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
+    NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
+
+    NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
+    NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
+    NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
+    NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
+    NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
+    NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2);
+    NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
+
+    n->cmbloc = n->bar.cmbloc;
+    n->cmbsz = n->bar.cmbsz;
+
+    n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
+    memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
+                            "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
+    pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
+        PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
+        PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
+}
+
+static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev)
+{
+    uint8_t *pci_conf = pci_dev->config;
+
+    pci_conf[PCI_INTERRUPT_PIN] = 1;
+    pci_config_set_prog_interface(pci_conf, 0x2);
+    pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
+    pci_config_set_device_id(pci_conf, 0x5845);
+    pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
+    pcie_endpoint_cap_init(pci_dev, 0x80);
+
+    memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
+        n->reg_size);
     pci_register_bar(pci_dev, 0,
         PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
         &n->iomem);
     msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL);
 
+    if (n->params.cmb_size_mb) {
+        nvme_init_cmb(n, pci_dev);
+    }
+}
+
+static void nvme_init_ctrl(NvmeCtrl *n)
+{
+    NvmeIdCtrl *id = &n->id_ctrl;
+    NvmeParams *params = &n->params;
+    uint8_t *pci_conf = n->parent_obj.config;
+
     id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
     id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
     strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
     strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
-    strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
+    strpadcpy((char *)id->sn, sizeof(id->sn), params->serial, ' ');
     id->rab = 6;
     id->ieee[0] = 0x00;
     id->ieee[1] = 0x02;
@@ -1431,46 +1471,55 @@  static void nvme_realize(PCIDevice *pci_dev, Error **errp)
 
     n->bar.vs = NVME_SPEC_VER;
     n->bar.intmc = n->bar.intms = 0;
+}
 
-    if (n->params.cmb_size_mb) {
+static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+{
+    int64_t bs_size;
+    NvmeIdNs *id_ns = &ns->id_ns;
 
-        NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
-        NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
+    bs_size = blk_getlength(n->conf.blk);
+    if (bs_size < 0) {
+        error_setg_errno(errp, -bs_size, "blk_getlength");
+        return 1;
+    }
 
-        NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
-        NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
-        NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
-        NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
-        NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
-        NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
-        NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
+    id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
+    n->ns_size = bs_size;
 
-        n->cmbloc = n->bar.cmbloc;
-        n->cmbsz = n->bar.cmbsz;
+    id_ns->ncap = id_ns->nuse = id_ns->nsze =
+        cpu_to_le64(nvme_ns_nlbas(n, ns));
 
-        n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
-        memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
-                              "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
-        pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
-            PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
-            PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
+    return 0;
+}
 
+static void nvme_realize(PCIDevice *pci_dev, Error **errp)
+{
+    NvmeCtrl *n = NVME(pci_dev);
+    Error *local_err = NULL;
+    int i;
+
+    if (nvme_check_constraints(n, &local_err)) {
+        error_propagate_prepend(errp, local_err, "nvme_check_constraints: ");
+        return;
+    }
+
+    nvme_init_state(n);
+
+    if (nvme_init_blk(n, &local_err)) {
+        error_propagate_prepend(errp, local_err, "nvme_init_blk: ");
+        return;
     }
 
     for (i = 0; i < n->num_namespaces; i++) {
-        NvmeNamespace *ns = &n->namespaces[i];
-        NvmeIdNs *id_ns = &ns->id_ns;
-        id_ns->nsfeat = 0;
-        id_ns->nlbaf = 0;
-        id_ns->flbas = 0;
-        id_ns->mc = 0;
-        id_ns->dpc = 0;
-        id_ns->dps = 0;
-        id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
-        id_ns->ncap  = id_ns->nuse = id_ns->nsze =
-            cpu_to_le64(n->ns_size >>
-                id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
+        if (nvme_init_namespace(n, &n->namespaces[i], &local_err)) {
+            error_propagate_prepend(errp, local_err, "nvme_init_namespace: ");
+            return;
+        }
     }
+
+    nvme_init_pci(n, pci_dev);
+    nvme_init_ctrl(n);
 }
 
 static void nvme_exit(PCIDevice *pci_dev)
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 9957c4a200e2..a867bdfabafd 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -65,6 +65,22 @@  typedef struct NvmeNamespace {
     NvmeIdNs        id_ns;
 } NvmeNamespace;
 
+static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns)
+{
+    NvmeIdNs *id_ns = &ns->id_ns;
+    return id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)];
+}
+
+static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
+{
+    return nvme_ns_lbaf(ns).ds;
+}
+
+static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns)
+{
+    return 1 << nvme_ns_lbads(ns);
+}
+
 #define TYPE_NVME "nvme"
 #define NVME(obj) \
         OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
@@ -101,4 +117,9 @@  typedef struct NvmeCtrl {
     NvmeIdCtrl      id_ctrl;
 } NvmeCtrl;
 
+static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns)
+{
+    return n->ns_size >> nvme_ns_lbads(ns);
+}
+
 #endif /* HW_NVME_H */