diff mbox series

[v5,12/26] nvme: add missing mandatory features

Message ID 20200204095208.269131-13-k.jensen@samsung.com (mailing list archive)
State New, archived
Headers show
Series nvme: support NVMe v1.3d, SGLs and multiple namespaces | expand

Commit Message

Klaus Jensen Feb. 4, 2020, 9:51 a.m. UTC
Add support for returning a resonable response to Get/Set Features of
mandatory features.

Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
---
 hw/block/nvme.c       | 57 ++++++++++++++++++++++++++++++++++++++++---
 hw/block/trace-events |  2 ++
 include/block/nvme.h  |  3 ++-
 3 files changed, 58 insertions(+), 4 deletions(-)

Comments

Maxim Levitsky Feb. 12, 2020, 10:27 a.m. UTC | #1
On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> Add support for returning a resonable response to Get/Set Features of
> mandatory features.
> 
> Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
> ---
>  hw/block/nvme.c       | 57 ++++++++++++++++++++++++++++++++++++++++---
>  hw/block/trace-events |  2 ++
>  include/block/nvme.h  |  3 ++-
>  3 files changed, 58 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index a186d95df020..3267ee2de47a 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1008,7 +1008,15 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>      uint32_t dw11 = le32_to_cpu(cmd->cdw11);
>      uint32_t result;
>  
> +    trace_nvme_dev_getfeat(nvme_cid(req), dw10);
> +
>      switch (dw10) {
> +    case NVME_ARBITRATION:
> +        result = cpu_to_le32(n->features.arbitration);
> +        break;
> +    case NVME_POWER_MANAGEMENT:
> +        result = cpu_to_le32(n->features.power_mgmt);
> +        break;
>      case NVME_TEMPERATURE_THRESHOLD:
>          result = 0;
>  
> @@ -1029,6 +1037,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>              break;
>          }
>  
> +        break;
> +    case NVME_ERROR_RECOVERY:
> +        result = cpu_to_le32(n->features.err_rec);
>          break;
>      case NVME_VOLATILE_WRITE_CACHE:
>          result = blk_enable_write_cache(n->conf.blk);

This is existing code but still like to point out that endianess conversion is missing.
Also we need to think if we need to do some flush if the write cache is disabled.
I don't know yet that area well enough.

> @@ -1041,6 +1052,19 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>          break;
>      case NVME_TIMESTAMP:
>          return nvme_get_feature_timestamp(n, cmd);
> +    case NVME_INTERRUPT_COALESCING:
> +        result = cpu_to_le32(n->features.int_coalescing);
> +        break;
> +    case NVME_INTERRUPT_VECTOR_CONF:
> +        if ((dw11 & 0xffff) > n->params.num_queues) {
Looks like it should be >= since interrupt vector is not zero based.
> +            return NVME_INVALID_FIELD | NVME_DNR;
> +        }
> +
> +        result = cpu_to_le32(n->features.int_vector_config[dw11 & 0xffff]);
> +        break;
> +    case NVME_WRITE_ATOMICITY:
> +        result = cpu_to_le32(n->features.write_atomicity);
> +        break;
>      case NVME_ASYNCHRONOUS_EVENT_CONF:
>          result = cpu_to_le32(n->features.async_config);
>          break;
> @@ -1076,6 +1100,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>      uint32_t dw10 = le32_to_cpu(cmd->cdw10);
>      uint32_t dw11 = le32_to_cpu(cmd->cdw11);
>  
> +    trace_nvme_dev_setfeat(nvme_cid(req), dw10, dw11);
> +
>      switch (dw10) {
>      case NVME_TEMPERATURE_THRESHOLD:
>          if (NVME_TEMP_TMPSEL(dw11)) {
> @@ -1116,6 +1142,13 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>      case NVME_ASYNCHRONOUS_EVENT_CONF:
>          n->features.async_config = dw11;
>          break;
> +    case NVME_ARBITRATION:
> +    case NVME_POWER_MANAGEMENT:
> +    case NVME_ERROR_RECOVERY:
> +    case NVME_INTERRUPT_COALESCING:
> +    case NVME_INTERRUPT_VECTOR_CONF:
> +    case NVME_WRITE_ATOMICITY:
> +        return NVME_FEAT_NOT_CHANGABLE | NVME_DNR;
>      default:
>          trace_nvme_dev_err_invalid_setfeat(dw10);
>          return NVME_INVALID_FIELD | NVME_DNR;
> @@ -1689,6 +1722,21 @@ static void nvme_init_state(NvmeCtrl *n)
>      n->temperature = NVME_TEMPERATURE;
>      n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
>      n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> +
> +    /*
> +     * There is no limit on the number of commands that the controller may
> +     * launch at one time from a particular Submission Queue.
> +     */
> +    n->features.arbitration = 0x7;
A nice #define in nvme.h stating that 0x7 means no burst limit would be nice.

> +
> +    n->features.int_vector_config = g_malloc0_n(n->params.num_queues,
> +        sizeof(*n->features.int_vector_config));
> +
> +    /* disable coalescing (not supported) */
> +    for (int i = 0; i < n->params.num_queues; i++) {
> +        n->features.int_vector_config[i] = i | (1 << 16);
Same here
> +    }
> +
>      n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
>  }
>  
> @@ -1782,15 +1830,17 @@ static void nvme_init_ctrl(NvmeCtrl *n)
>      id->nn = cpu_to_le32(n->num_namespaces);
>      id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP);
>  
> +
> +    if (blk_enable_write_cache(n->conf.blk)) {
> +        id->vwc = 1;
> +    }
> +
>      strcpy((char *) id->subnqn, "nqn.2019-08.org.qemu:");
>      pstrcat((char *) id->subnqn, sizeof(id->subnqn), n->params.serial);
>  
>      id->psd[0].mp = cpu_to_le16(0x9c4);
>      id->psd[0].enlat = cpu_to_le32(0x10);
>      id->psd[0].exlat = cpu_to_le32(0x4);
> -    if (blk_enable_write_cache(n->conf.blk)) {
> -        id->vwc = 1;
> -    }
>  
>      n->bar.cap = 0;
>      NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
> @@ -1861,6 +1911,7 @@ static void nvme_exit(PCIDevice *pci_dev)
>      g_free(n->cq);
>      g_free(n->sq);
>      g_free(n->aer_reqs);
> +    g_free(n->features.int_vector_config);
>  
>      if (n->params.cmb_size_mb) {
>          g_free(n->cmbuf);
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 3952c36774cf..4cf39961989d 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -41,6 +41,8 @@ nvme_dev_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
>  nvme_dev_identify_ctrl(void) "identify controller"
>  nvme_dev_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
>  nvme_dev_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
> +nvme_dev_getfeat(uint16_t cid, uint32_t fid) "cid %"PRIu16" fid 0x%"PRIx32""
> +nvme_dev_setfeat(uint16_t cid, uint32_t fid, uint32_t val) "cid %"PRIu16" fid 0x%"PRIx32" val 0x%"PRIx32""
>  nvme_dev_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
>  nvme_dev_getfeat_numq(int result) "get feature number of queues, result=%d"
>  nvme_dev_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index a24be047a311..09419ed499d0 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -445,7 +445,8 @@ enum NvmeStatusCodes {
>      NVME_FW_REQ_RESET           = 0x010b,
>      NVME_INVALID_QUEUE_DEL      = 0x010c,
>      NVME_FID_NOT_SAVEABLE       = 0x010d,
> -    NVME_FID_NOT_NSID_SPEC      = 0x010f,
> +    NVME_FEAT_NOT_CHANGABLE     = 0x010e,
> +    NVME_FEAT_NOT_NSID_SPEC     = 0x010f,
>      NVME_FW_REQ_SUSYSTEM_RESET  = 0x0110,
>      NVME_CONFLICTING_ATTRS      = 0x0180,
>      NVME_INVALID_PROT_INFO      = 0x0181,

Best regards,
	Maxim Levitsky
Klaus Jensen March 16, 2020, 7:47 a.m. UTC | #2
On Feb 12 12:27, Maxim Levitsky wrote:
> On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > Add support for returning a resonable response to Get/Set Features of
> > mandatory features.
> > 
> > Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
> > ---
> >  hw/block/nvme.c       | 57 ++++++++++++++++++++++++++++++++++++++++---
> >  hw/block/trace-events |  2 ++
> >  include/block/nvme.h  |  3 ++-
> >  3 files changed, 58 insertions(+), 4 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index a186d95df020..3267ee2de47a 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -1008,7 +1008,15 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> >      uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> >      uint32_t result;
> >  
> > +    trace_nvme_dev_getfeat(nvme_cid(req), dw10);
> > +
> >      switch (dw10) {
> > +    case NVME_ARBITRATION:
> > +        result = cpu_to_le32(n->features.arbitration);
> > +        break;
> > +    case NVME_POWER_MANAGEMENT:
> > +        result = cpu_to_le32(n->features.power_mgmt);
> > +        break;
> >      case NVME_TEMPERATURE_THRESHOLD:
> >          result = 0;
> >  
> > @@ -1029,6 +1037,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> >              break;
> >          }
> >  
> > +        break;
> > +    case NVME_ERROR_RECOVERY:
> > +        result = cpu_to_le32(n->features.err_rec);
> >          break;
> >      case NVME_VOLATILE_WRITE_CACHE:
> >          result = blk_enable_write_cache(n->conf.blk);
> 
> This is existing code but still like to point out that endianess conversion is missing.

Fixed.

> Also we need to think if we need to do some flush if the write cache is disabled.
> I don't know yet that area well enough.
> 

Looking at the block layer code it just sets a flag when disabling, but
subsequent requests will have BDRV_REQ_FUA set. So to make sure that
stuff in the cache is flushed, let's do a flush.

> > @@ -1041,6 +1052,19 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> >          break;
> >      case NVME_TIMESTAMP:
> >          return nvme_get_feature_timestamp(n, cmd);
> > +    case NVME_INTERRUPT_COALESCING:
> > +        result = cpu_to_le32(n->features.int_coalescing);
> > +        break;
> > +    case NVME_INTERRUPT_VECTOR_CONF:
> > +        if ((dw11 & 0xffff) > n->params.num_queues) {
> Looks like it should be >= since interrupt vector is not zero based.

Fixed in other patch.

> > +            return NVME_INVALID_FIELD | NVME_DNR;
> > +        }
> > +
> > +        result = cpu_to_le32(n->features.int_vector_config[dw11 & 0xffff]);
> > +        break;
> > +    case NVME_WRITE_ATOMICITY:
> > +        result = cpu_to_le32(n->features.write_atomicity);
> > +        break;
> >      case NVME_ASYNCHRONOUS_EVENT_CONF:
> >          result = cpu_to_le32(n->features.async_config);
> >          break;
> > @@ -1076,6 +1100,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> >      uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> >      uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> >  
> > +    trace_nvme_dev_setfeat(nvme_cid(req), dw10, dw11);
> > +
> >      switch (dw10) {
> >      case NVME_TEMPERATURE_THRESHOLD:
> >          if (NVME_TEMP_TMPSEL(dw11)) {
> > @@ -1116,6 +1142,13 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> >      case NVME_ASYNCHRONOUS_EVENT_CONF:
> >          n->features.async_config = dw11;
> >          break;
> > +    case NVME_ARBITRATION:
> > +    case NVME_POWER_MANAGEMENT:
> > +    case NVME_ERROR_RECOVERY:
> > +    case NVME_INTERRUPT_COALESCING:
> > +    case NVME_INTERRUPT_VECTOR_CONF:
> > +    case NVME_WRITE_ATOMICITY:
> > +        return NVME_FEAT_NOT_CHANGABLE | NVME_DNR;
> >      default:
> >          trace_nvme_dev_err_invalid_setfeat(dw10);
> >          return NVME_INVALID_FIELD | NVME_DNR;
> > @@ -1689,6 +1722,21 @@ static void nvme_init_state(NvmeCtrl *n)
> >      n->temperature = NVME_TEMPERATURE;
> >      n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
> >      n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > +
> > +    /*
> > +     * There is no limit on the number of commands that the controller may
> > +     * launch at one time from a particular Submission Queue.
> > +     */
> > +    n->features.arbitration = 0x7;
> A nice #define in nvme.h stating that 0x7 means no burst limit would be nice.
> 

Done.

> > +
> > +    n->features.int_vector_config = g_malloc0_n(n->params.num_queues,
> > +        sizeof(*n->features.int_vector_config));
> > +
> > +    /* disable coalescing (not supported) */
> > +    for (int i = 0; i < n->params.num_queues; i++) {
> > +        n->features.int_vector_config[i] = i | (1 << 16);
> Same here

Done.

> > +    }
> > +
> >      n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
> >  }
> >  
> > @@ -1782,15 +1830,17 @@ static void nvme_init_ctrl(NvmeCtrl *n)
> >      id->nn = cpu_to_le32(n->num_namespaces);
> >      id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP);
> >  
> > +
> > +    if (blk_enable_write_cache(n->conf.blk)) {
> > +        id->vwc = 1;
> > +    }
> > +
> >      strcpy((char *) id->subnqn, "nqn.2019-08.org.qemu:");
> >      pstrcat((char *) id->subnqn, sizeof(id->subnqn), n->params.serial);
> >  
> >      id->psd[0].mp = cpu_to_le16(0x9c4);
> >      id->psd[0].enlat = cpu_to_le32(0x10);
> >      id->psd[0].exlat = cpu_to_le32(0x4);
> > -    if (blk_enable_write_cache(n->conf.blk)) {
> > -        id->vwc = 1;
> > -    }
> >  
> >      n->bar.cap = 0;
> >      NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
> > @@ -1861,6 +1911,7 @@ static void nvme_exit(PCIDevice *pci_dev)
> >      g_free(n->cq);
> >      g_free(n->sq);
> >      g_free(n->aer_reqs);
> > +    g_free(n->features.int_vector_config);
> >  
> >      if (n->params.cmb_size_mb) {
> >          g_free(n->cmbuf);
> > diff --git a/hw/block/trace-events b/hw/block/trace-events
> > index 3952c36774cf..4cf39961989d 100644
> > --- a/hw/block/trace-events
> > +++ b/hw/block/trace-events
> > @@ -41,6 +41,8 @@ nvme_dev_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
> >  nvme_dev_identify_ctrl(void) "identify controller"
> >  nvme_dev_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
> >  nvme_dev_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
> > +nvme_dev_getfeat(uint16_t cid, uint32_t fid) "cid %"PRIu16" fid 0x%"PRIx32""
> > +nvme_dev_setfeat(uint16_t cid, uint32_t fid, uint32_t val) "cid %"PRIu16" fid 0x%"PRIx32" val 0x%"PRIx32""
> >  nvme_dev_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
> >  nvme_dev_getfeat_numq(int result) "get feature number of queues, result=%d"
> >  nvme_dev_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
> > diff --git a/include/block/nvme.h b/include/block/nvme.h
> > index a24be047a311..09419ed499d0 100644
> > --- a/include/block/nvme.h
> > +++ b/include/block/nvme.h
> > @@ -445,7 +445,8 @@ enum NvmeStatusCodes {
> >      NVME_FW_REQ_RESET           = 0x010b,
> >      NVME_INVALID_QUEUE_DEL      = 0x010c,
> >      NVME_FID_NOT_SAVEABLE       = 0x010d,
> > -    NVME_FID_NOT_NSID_SPEC      = 0x010f,
> > +    NVME_FEAT_NOT_CHANGABLE     = 0x010e,
> > +    NVME_FEAT_NOT_NSID_SPEC     = 0x010f,
> >      NVME_FW_REQ_SUSYSTEM_RESET  = 0x0110,
> >      NVME_CONFLICTING_ATTRS      = 0x0180,
> >      NVME_INVALID_PROT_INFO      = 0x0181,
> 
> Best regards,
> 	Maxim Levitsky
>
Maxim Levitsky March 25, 2020, 10:22 a.m. UTC | #3
On Mon, 2020-03-16 at 00:47 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 12:27, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > > Add support for returning a resonable response to Get/Set Features of
> > > mandatory features.
> > > 
> > > Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
> > > ---
> > >  hw/block/nvme.c       | 57 ++++++++++++++++++++++++++++++++++++++++---
> > >  hw/block/trace-events |  2 ++
> > >  include/block/nvme.h  |  3 ++-
> > >  3 files changed, 58 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index a186d95df020..3267ee2de47a 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -1008,7 +1008,15 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> > >      uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> > >      uint32_t result;
> > >  
> > > +    trace_nvme_dev_getfeat(nvme_cid(req), dw10);
> > > +
> > >      switch (dw10) {
> > > +    case NVME_ARBITRATION:
> > > +        result = cpu_to_le32(n->features.arbitration);
> > > +        break;
> > > +    case NVME_POWER_MANAGEMENT:
> > > +        result = cpu_to_le32(n->features.power_mgmt);
> > > +        break;
> > >      case NVME_TEMPERATURE_THRESHOLD:
> > >          result = 0;
> > >  
> > > @@ -1029,6 +1037,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> > >              break;
> > >          }
> > >  
> > > +        break;
> > > +    case NVME_ERROR_RECOVERY:
> > > +        result = cpu_to_le32(n->features.err_rec);
> > >          break;
> > >      case NVME_VOLATILE_WRITE_CACHE:
> > >          result = blk_enable_write_cache(n->conf.blk);
> > 
> > This is existing code but still like to point out that endianess conversion is missing.
> 
> Fixed.
> 
> > Also we need to think if we need to do some flush if the write cache is disabled.
> > I don't know yet that area well enough.
> > 
> 
> Looking at the block layer code it just sets a flag when disabling, but
> subsequent requests will have BDRV_REQ_FUA set. So to make sure that
> stuff in the cache is flushed, let's do a flush.
Good to know!

> 
> > > @@ -1041,6 +1052,19 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> > >          break;
> > >      case NVME_TIMESTAMP:
> > >          return nvme_get_feature_timestamp(n, cmd);
> > > +    case NVME_INTERRUPT_COALESCING:
> > > +        result = cpu_to_le32(n->features.int_coalescing);
> > > +        break;
> > > +    case NVME_INTERRUPT_VECTOR_CONF:
> > > +        if ((dw11 & 0xffff) > n->params.num_queues) {
> > 
> > Looks like it should be >= since interrupt vector is not zero based.
> 
> Fixed in other patch.
> 
> > > +            return NVME_INVALID_FIELD | NVME_DNR;
> > > +        }
> > > +
> > > +        result = cpu_to_le32(n->features.int_vector_config[dw11 & 0xffff]);
> > > +        break;
> > > +    case NVME_WRITE_ATOMICITY:
> > > +        result = cpu_to_le32(n->features.write_atomicity);
> > > +        break;
> > >      case NVME_ASYNCHRONOUS_EVENT_CONF:
> > >          result = cpu_to_le32(n->features.async_config);
> > >          break;
> > > @@ -1076,6 +1100,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> > >      uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> > >      uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> > >  
> > > +    trace_nvme_dev_setfeat(nvme_cid(req), dw10, dw11);
> > > +
> > >      switch (dw10) {
> > >      case NVME_TEMPERATURE_THRESHOLD:
> > >          if (NVME_TEMP_TMPSEL(dw11)) {
> > > @@ -1116,6 +1142,13 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> > >      case NVME_ASYNCHRONOUS_EVENT_CONF:
> > >          n->features.async_config = dw11;
> > >          break;
> > > +    case NVME_ARBITRATION:
> > > +    case NVME_POWER_MANAGEMENT:
> > > +    case NVME_ERROR_RECOVERY:
> > > +    case NVME_INTERRUPT_COALESCING:
> > > +    case NVME_INTERRUPT_VECTOR_CONF:
> > > +    case NVME_WRITE_ATOMICITY:
> > > +        return NVME_FEAT_NOT_CHANGABLE | NVME_DNR;
> > >      default:
> > >          trace_nvme_dev_err_invalid_setfeat(dw10);
> > >          return NVME_INVALID_FIELD | NVME_DNR;
> > > @@ -1689,6 +1722,21 @@ static void nvme_init_state(NvmeCtrl *n)
> > >      n->temperature = NVME_TEMPERATURE;
> > >      n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
> > >      n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > > +
> > > +    /*
> > > +     * There is no limit on the number of commands that the controller may
> > > +     * launch at one time from a particular Submission Queue.
> > > +     */
> > > +    n->features.arbitration = 0x7;
> > 
> > A nice #define in nvme.h stating that 0x7 means no burst limit would be nice.
> > 
> 
> Done.
> 
> > > +
> > > +    n->features.int_vector_config = g_malloc0_n(n->params.num_queues,
> > > +        sizeof(*n->features.int_vector_config));
> > > +
> > > +    /* disable coalescing (not supported) */
> > > +    for (int i = 0; i < n->params.num_queues; i++) {
> > > +        n->features.int_vector_config[i] = i | (1 << 16);
> > 
> > Same here
> 
> Done.
> 
> > > +    }
> > > +
> > >      n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
> > >  }
> > >  
> > > @@ -1782,15 +1830,17 @@ static void nvme_init_ctrl(NvmeCtrl *n)
> > >      id->nn = cpu_to_le32(n->num_namespaces);
> > >      id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP);
> > >  
> > > +
> > > +    if (blk_enable_write_cache(n->conf.blk)) {
> > > +        id->vwc = 1;
> > > +    }
> > > +
> > >      strcpy((char *) id->subnqn, "nqn.2019-08.org.qemu:");
> > >      pstrcat((char *) id->subnqn, sizeof(id->subnqn), n->params.serial);
> > >  
> > >      id->psd[0].mp = cpu_to_le16(0x9c4);
> > >      id->psd[0].enlat = cpu_to_le32(0x10);
> > >      id->psd[0].exlat = cpu_to_le32(0x4);
> > > -    if (blk_enable_write_cache(n->conf.blk)) {
> > > -        id->vwc = 1;
> > > -    }
> > >  
> > >      n->bar.cap = 0;
> > >      NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
> > > @@ -1861,6 +1911,7 @@ static void nvme_exit(PCIDevice *pci_dev)
> > >      g_free(n->cq);
> > >      g_free(n->sq);
> > >      g_free(n->aer_reqs);
> > > +    g_free(n->features.int_vector_config);
> > >  
> > >      if (n->params.cmb_size_mb) {
> > >          g_free(n->cmbuf);
> > > diff --git a/hw/block/trace-events b/hw/block/trace-events
> > > index 3952c36774cf..4cf39961989d 100644
> > > --- a/hw/block/trace-events
> > > +++ b/hw/block/trace-events
> > > @@ -41,6 +41,8 @@ nvme_dev_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
> > >  nvme_dev_identify_ctrl(void) "identify controller"
> > >  nvme_dev_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
> > >  nvme_dev_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
> > > +nvme_dev_getfeat(uint16_t cid, uint32_t fid) "cid %"PRIu16" fid 0x%"PRIx32""
> > > +nvme_dev_setfeat(uint16_t cid, uint32_t fid, uint32_t val) "cid %"PRIu16" fid 0x%"PRIx32" val 0x%"PRIx32""
> > >  nvme_dev_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
> > >  nvme_dev_getfeat_numq(int result) "get feature number of queues, result=%d"
> > >  nvme_dev_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
> > > diff --git a/include/block/nvme.h b/include/block/nvme.h
> > > index a24be047a311..09419ed499d0 100644
> > > --- a/include/block/nvme.h
> > > +++ b/include/block/nvme.h
> > > @@ -445,7 +445,8 @@ enum NvmeStatusCodes {
> > >      NVME_FW_REQ_RESET           = 0x010b,
> > >      NVME_INVALID_QUEUE_DEL      = 0x010c,
> > >      NVME_FID_NOT_SAVEABLE       = 0x010d,
> > > -    NVME_FID_NOT_NSID_SPEC      = 0x010f,
> > > +    NVME_FEAT_NOT_CHANGABLE     = 0x010e,
> > > +    NVME_FEAT_NOT_NSID_SPEC     = 0x010f,
> > >      NVME_FW_REQ_SUSYSTEM_RESET  = 0x0110,
> > >      NVME_CONFLICTING_ATTRS      = 0x0180,
> > >      NVME_INVALID_PROT_INFO      = 0x0181,
> > 
> > Best regards,
> > 	Maxim Levitsky
> > 
> 
> 

Thanks,
Best regards,
	Maxim Levitsky
diff mbox series

Patch

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index a186d95df020..3267ee2de47a 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1008,7 +1008,15 @@  static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     uint32_t dw11 = le32_to_cpu(cmd->cdw11);
     uint32_t result;
 
+    trace_nvme_dev_getfeat(nvme_cid(req), dw10);
+
     switch (dw10) {
+    case NVME_ARBITRATION:
+        result = cpu_to_le32(n->features.arbitration);
+        break;
+    case NVME_POWER_MANAGEMENT:
+        result = cpu_to_le32(n->features.power_mgmt);
+        break;
     case NVME_TEMPERATURE_THRESHOLD:
         result = 0;
 
@@ -1029,6 +1037,9 @@  static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
             break;
         }
 
+        break;
+    case NVME_ERROR_RECOVERY:
+        result = cpu_to_le32(n->features.err_rec);
         break;
     case NVME_VOLATILE_WRITE_CACHE:
         result = blk_enable_write_cache(n->conf.blk);
@@ -1041,6 +1052,19 @@  static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         break;
     case NVME_TIMESTAMP:
         return nvme_get_feature_timestamp(n, cmd);
+    case NVME_INTERRUPT_COALESCING:
+        result = cpu_to_le32(n->features.int_coalescing);
+        break;
+    case NVME_INTERRUPT_VECTOR_CONF:
+        if ((dw11 & 0xffff) > n->params.num_queues) {
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+
+        result = cpu_to_le32(n->features.int_vector_config[dw11 & 0xffff]);
+        break;
+    case NVME_WRITE_ATOMICITY:
+        result = cpu_to_le32(n->features.write_atomicity);
+        break;
     case NVME_ASYNCHRONOUS_EVENT_CONF:
         result = cpu_to_le32(n->features.async_config);
         break;
@@ -1076,6 +1100,8 @@  static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
     uint32_t dw11 = le32_to_cpu(cmd->cdw11);
 
+    trace_nvme_dev_setfeat(nvme_cid(req), dw10, dw11);
+
     switch (dw10) {
     case NVME_TEMPERATURE_THRESHOLD:
         if (NVME_TEMP_TMPSEL(dw11)) {
@@ -1116,6 +1142,13 @@  static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     case NVME_ASYNCHRONOUS_EVENT_CONF:
         n->features.async_config = dw11;
         break;
+    case NVME_ARBITRATION:
+    case NVME_POWER_MANAGEMENT:
+    case NVME_ERROR_RECOVERY:
+    case NVME_INTERRUPT_COALESCING:
+    case NVME_INTERRUPT_VECTOR_CONF:
+    case NVME_WRITE_ATOMICITY:
+        return NVME_FEAT_NOT_CHANGABLE | NVME_DNR;
     default:
         trace_nvme_dev_err_invalid_setfeat(dw10);
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -1689,6 +1722,21 @@  static void nvme_init_state(NvmeCtrl *n)
     n->temperature = NVME_TEMPERATURE;
     n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
     n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+
+    /*
+     * There is no limit on the number of commands that the controller may
+     * launch at one time from a particular Submission Queue.
+     */
+    n->features.arbitration = 0x7;
+
+    n->features.int_vector_config = g_malloc0_n(n->params.num_queues,
+        sizeof(*n->features.int_vector_config));
+
+    /* disable coalescing (not supported) */
+    for (int i = 0; i < n->params.num_queues; i++) {
+        n->features.int_vector_config[i] = i | (1 << 16);
+    }
+
     n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
 }
 
@@ -1782,15 +1830,17 @@  static void nvme_init_ctrl(NvmeCtrl *n)
     id->nn = cpu_to_le32(n->num_namespaces);
     id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP);
 
+
+    if (blk_enable_write_cache(n->conf.blk)) {
+        id->vwc = 1;
+    }
+
     strcpy((char *) id->subnqn, "nqn.2019-08.org.qemu:");
     pstrcat((char *) id->subnqn, sizeof(id->subnqn), n->params.serial);
 
     id->psd[0].mp = cpu_to_le16(0x9c4);
     id->psd[0].enlat = cpu_to_le32(0x10);
     id->psd[0].exlat = cpu_to_le32(0x4);
-    if (blk_enable_write_cache(n->conf.blk)) {
-        id->vwc = 1;
-    }
 
     n->bar.cap = 0;
     NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
@@ -1861,6 +1911,7 @@  static void nvme_exit(PCIDevice *pci_dev)
     g_free(n->cq);
     g_free(n->sq);
     g_free(n->aer_reqs);
+    g_free(n->features.int_vector_config);
 
     if (n->params.cmb_size_mb) {
         g_free(n->cmbuf);
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 3952c36774cf..4cf39961989d 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -41,6 +41,8 @@  nvme_dev_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
 nvme_dev_identify_ctrl(void) "identify controller"
 nvme_dev_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
 nvme_dev_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
+nvme_dev_getfeat(uint16_t cid, uint32_t fid) "cid %"PRIu16" fid 0x%"PRIx32""
+nvme_dev_setfeat(uint16_t cid, uint32_t fid, uint32_t val) "cid %"PRIu16" fid 0x%"PRIx32" val 0x%"PRIx32""
 nvme_dev_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
 nvme_dev_getfeat_numq(int result) "get feature number of queues, result=%d"
 nvme_dev_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
diff --git a/include/block/nvme.h b/include/block/nvme.h
index a24be047a311..09419ed499d0 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -445,7 +445,8 @@  enum NvmeStatusCodes {
     NVME_FW_REQ_RESET           = 0x010b,
     NVME_INVALID_QUEUE_DEL      = 0x010c,
     NVME_FID_NOT_SAVEABLE       = 0x010d,
-    NVME_FID_NOT_NSID_SPEC      = 0x010f,
+    NVME_FEAT_NOT_CHANGABLE     = 0x010e,
+    NVME_FEAT_NOT_NSID_SPEC     = 0x010f,
     NVME_FW_REQ_SUSYSTEM_RESET  = 0x0110,
     NVME_CONFLICTING_ATTRS      = 0x0180,
     NVME_INVALID_PROT_INFO      = 0x0181,