diff mbox series

[v3,07/18] hw/block/nvme: add support for the get log page command

Message ID 20200706061303.246057-8-its@irrelevant.dk (mailing list archive)
State New, archived
Headers show
Series hw/block/nvme: bump to v1.3 | expand

Commit Message

Klaus Jensen July 6, 2020, 6:12 a.m. UTC
From: Klaus Jensen <k.jensen@samsung.com>

Add support for the Get Log Page command and basic implementations of
the mandatory Error Information, SMART / Health Information and Firmware
Slot Information log pages.

In violation of the specification, the SMART / Health Information log
page does not persist information over the lifetime of the controller
because the device has no place to store such persistent state.

Note that the LPA field in the Identify Controller data structure
intentionally has bit 0 cleared because there is no namespace specific
information in the SMART / Health information log page.

Required for compliance with NVMe revision 1.3d. See NVM Express 1.3d,
Section 5.14 ("Get Log Page command").

Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Acked-by: Keith Busch <kbusch@kernel.org>
---
 hw/block/nvme.c       | 140 +++++++++++++++++++++++++++++++++++++++++-
 hw/block/nvme.h       |   2 +
 hw/block/trace-events |   2 +
 include/block/nvme.h  |   8 ++-
 4 files changed, 149 insertions(+), 3 deletions(-)

Comments

Dmitry Fomichev July 8, 2020, 7:22 p.m. UTC | #1
Looks good,

Reviewed-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>

On Mon, 2020-07-06 at 08:12 +0200, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Add support for the Get Log Page command and basic implementations of
> the mandatory Error Information, SMART / Health Information and Firmware
> Slot Information log pages.
> 
> In violation of the specification, the SMART / Health Information log
> page does not persist information over the lifetime of the controller
> because the device has no place to store such persistent state.
> 
> Note that the LPA field in the Identify Controller data structure
> intentionally has bit 0 cleared because there is no namespace specific
> information in the SMART / Health information log page.
> 
> Required for compliance with NVMe revision 1.3d. See NVM Express 1.3d,
> Section 5.14 ("Get Log Page command").
> 
> Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> Acked-by: Keith Busch <kbusch@kernel.org>
> ---
>  hw/block/nvme.c       | 140 +++++++++++++++++++++++++++++++++++++++++-
>  hw/block/nvme.h       |   2 +
>  hw/block/trace-events |   2 +
>  include/block/nvme.h  |   8 ++-
>  4 files changed, 149 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index b6bc75eb61a2..7cb3787638f6 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -606,6 +606,140 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
>      return NVME_SUCCESS;
>  }
>  
> +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> +                                uint64_t off, NvmeRequest *req)
> +{
> +    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> +    uint32_t nsid = le32_to_cpu(cmd->nsid);
> +
> +    uint32_t trans_len;
> +    time_t current_ms;
> +    uint64_t units_read = 0, units_written = 0;
> +    uint64_t read_commands = 0, write_commands = 0;
> +    NvmeSmartLog smart;
> +    BlockAcctStats *s;
> +
> +    if (nsid && nsid != 0xffffffff) {
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
> +
> +    s = blk_get_stats(n->conf.blk);
> +
> +    units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
> +    units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
> +    read_commands = s->nr_ops[BLOCK_ACCT_READ];
> +    write_commands = s->nr_ops[BLOCK_ACCT_WRITE];
> +
> +    if (off > sizeof(smart)) {
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
> +
> +    trans_len = MIN(sizeof(smart) - off, buf_len);
> +
> +    memset(&smart, 0x0, sizeof(smart));
> +
> +    smart.data_units_read[0] = cpu_to_le64(units_read / 1000);
> +    smart.data_units_written[0] = cpu_to_le64(units_written / 1000);
> +    smart.host_read_commands[0] = cpu_to_le64(read_commands);
> +    smart.host_write_commands[0] = cpu_to_le64(write_commands);
> +
> +    smart.temperature = cpu_to_le16(n->temperature);
> +
> +    if ((n->temperature >= n->features.temp_thresh_hi) ||
> +        (n->temperature <= n->features.temp_thresh_low)) {
> +        smart.critical_warning |= NVME_SMART_TEMPERATURE;
> +    }
> +
> +    current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> +    smart.power_on_hours[0] =
> +        cpu_to_le64((((current_ms - n->starttime_ms) / 1000) / 60) / 60);
> +
> +    return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
> +                             prp2);
> +}
> +
> +static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> +                                 uint64_t off, NvmeRequest *req)
> +{
> +    uint32_t trans_len;
> +    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> +    NvmeFwSlotInfoLog fw_log = {
> +        .afi = 0x1,
> +    };
> +
> +    strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
> +
> +    if (off > sizeof(fw_log)) {
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
> +
> +    trans_len = MIN(sizeof(fw_log) - off, buf_len);
> +
> +    return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1,
> +                             prp2);
> +}
> +
> +static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> +                                uint64_t off, NvmeRequest *req)
> +{
> +    uint32_t trans_len;
> +    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> +    NvmeErrorLog errlog;
> +
> +    if (off > sizeof(errlog)) {
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
> +
> +    memset(&errlog, 0x0, sizeof(errlog));
> +
> +    trans_len = MIN(sizeof(errlog) - off, buf_len);
> +
> +    return nvme_dma_read_prp(n, (uint8_t *)&errlog, trans_len, prp1, prp2);
> +}
> +
> +static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +{
> +    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> +    uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> +    uint32_t dw12 = le32_to_cpu(cmd->cdw12);
> +    uint32_t dw13 = le32_to_cpu(cmd->cdw13);
> +    uint8_t  lid = dw10 & 0xff;
> +    uint8_t  lsp = (dw10 >> 8) & 0xf;
> +    uint8_t  rae = (dw10 >> 15) & 0x1;
> +    uint32_t numdl, numdu;
> +    uint64_t off, lpol, lpou;
> +    size_t   len;
> +
> +    numdl = (dw10 >> 16);
> +    numdu = (dw11 & 0xffff);
> +    lpol = dw12;
> +    lpou = dw13;
> +
> +    len = (((numdu << 16) | numdl) + 1) << 2;
> +    off = (lpou << 32ULL) | lpol;
> +
> +    if (off & 0x3) {
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
> +
> +    trace_pci_nvme_get_log(nvme_cid(req), lid, lsp, rae, len, off);
> +
> +    switch (lid) {
> +    case NVME_LOG_ERROR_INFO:
> +        return nvme_error_info(n, cmd, len, off, req);
> +    case NVME_LOG_SMART_INFO:
> +        return nvme_smart_info(n, cmd, len, off, req);
> +    case NVME_LOG_FW_SLOT_INFO:
> +        return nvme_fw_log_info(n, cmd, len, off, req);
> +    default:
> +        trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
> +}
> +
>  static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
>  {
>      n->cq[cq->cqid] = NULL;
> @@ -960,6 +1094,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>          return nvme_del_sq(n, cmd);
>      case NVME_ADM_CMD_CREATE_SQ:
>          return nvme_create_sq(n, cmd);
> +    case NVME_ADM_CMD_GET_LOG_PAGE:
> +        return nvme_get_log(n, cmd, req);
>      case NVME_ADM_CMD_DELETE_CQ:
>          return nvme_del_cq(n, cmd);
>      case NVME_ADM_CMD_CREATE_CQ:
> @@ -1511,7 +1647,9 @@ static void nvme_init_state(NvmeCtrl *n)
>      n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
>      n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1);
>      n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1);
> +    n->temperature = NVME_TEMPERATURE;
>      n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
> +    n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
>  }
>  
>  static void nvme_init_blk(NvmeCtrl *n, Error **errp)
> @@ -1668,7 +1806,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
>       */
>      id->acl = 3;
>      id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO;
> -    id->lpa = 1 << 0;
> +    id->lpa = NVME_LPA_EXTENDED;
>  
>      /* recommended default value (~70 C) */
>      id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index e3a2c907e210..8228978e93de 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -98,6 +98,8 @@ typedef struct NvmeCtrl {
>      uint32_t    irq_status;
>      uint64_t    host_timestamp;                 /* Timestamp sent by the host */
>      uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
> +    uint64_t    starttime_ms;
> +    uint16_t    temperature;
>  
>      HostMemoryBackend *pmrdev;
>  
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index c40c0d2e4b28..3330d74e48db 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -45,6 +45,7 @@ pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16""
>  pci_nvme_identify_ctrl(void) "identify controller"
>  pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
>  pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
> +pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
>  pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
>  pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
>  pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
> @@ -94,6 +95,7 @@ pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completi
>  pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
>  pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
>  pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
> +pci_nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16""
>  pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
>  pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
>  pci_nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index d639e8bbee92..49ce97ae1ab4 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -704,9 +704,9 @@ typedef struct NvmeErrorLog {
>      uint8_t     resv[35];
>  } NvmeErrorLog;
>  
> -typedef struct NvmeSmartLog {
> +typedef struct QEMU_PACKED NvmeSmartLog {
>      uint8_t     critical_warning;
> -    uint8_t     temperature[2];
> +    uint16_t    temperature;
>      uint8_t     available_spare;
>      uint8_t     available_spare_threshold;
>      uint8_t     percentage_used;
> @@ -846,6 +846,10 @@ enum NvmeIdCtrlFrmw {
>      NVME_FRMW_SLOT1_RO = 1 << 0,
>  };
>  
> +enum NvmeIdCtrlLpa {
> +    NVME_LPA_EXTENDED = 1 << 2,
> +};
> +
>  #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf)
>  #define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf)
>  #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf)
Maxim Levitsky July 29, 2020, 10:24 a.m. UTC | #2
On Mon, 2020-07-06 at 08:12 +0200, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Add support for the Get Log Page command and basic implementations of
> the mandatory Error Information, SMART / Health Information and Firmware
> Slot Information log pages.
> 
> In violation of the specification, the SMART / Health Information log
> page does not persist information over the lifetime of the controller
> because the device has no place to store such persistent state.
> 
> Note that the LPA field in the Identify Controller data structure
> intentionally has bit 0 cleared because there is no namespace specific
> information in the SMART / Health information log page.
> 
> Required for compliance with NVMe revision 1.3d. See NVM Express 1.3d,
> Section 5.14 ("Get Log Page command").
> 
> Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> Acked-by: Keith Busch <kbusch@kernel.org>
> ---
>  hw/block/nvme.c       | 140 +++++++++++++++++++++++++++++++++++++++++-
>  hw/block/nvme.h       |   2 +
>  hw/block/trace-events |   2 +
>  include/block/nvme.h  |   8 ++-
>  4 files changed, 149 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index b6bc75eb61a2..7cb3787638f6 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -606,6 +606,140 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
>      return NVME_SUCCESS;
>  }
>  
> +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> +                                uint64_t off, NvmeRequest *req)
> +{
> +    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> +    uint32_t nsid = le32_to_cpu(cmd->nsid);
> +
> +    uint32_t trans_len;
> +    time_t current_ms;
> +    uint64_t units_read = 0, units_written = 0;
> +    uint64_t read_commands = 0, write_commands = 0;
> +    NvmeSmartLog smart;
> +    BlockAcctStats *s;
> +
> +    if (nsid && nsid != 0xffffffff) {
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
Correct.
> +
> +    s = blk_get_stats(n->conf.blk);
> +
> +    units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
> +    units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
> +    read_commands = s->nr_ops[BLOCK_ACCT_READ];
> +    write_commands = s->nr_ops[BLOCK_ACCT_WRITE];
> +
> +    if (off > sizeof(smart)) {
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
> +
> +    trans_len = MIN(sizeof(smart) - off, buf_len);
> +
> +    memset(&smart, 0x0, sizeof(smart));
> +
> +    smart.data_units_read[0] = cpu_to_le64(units_read / 1000);
> +    smart.data_units_written[0] = cpu_to_le64(units_written / 1000);
Tiny nitpick - the spec asks the value to be rounded up

> +    smart.host_read_commands[0] = cpu_to_le64(read_commands);
> +    smart.host_write_commands[0] = cpu_to_le64(write_commands);
> +
> +    smart.temperature = cpu_to_le16(n->temperature);
> +
> +    if ((n->temperature >= n->features.temp_thresh_hi) ||
> +        (n->temperature <= n->features.temp_thresh_low)) {
> +        smart.critical_warning |= NVME_SMART_TEMPERATURE;
> +    }
> +
> +    current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> +    smart.power_on_hours[0] =
> +        cpu_to_le64((((current_ms - n->starttime_ms) / 1000) / 60) / 60);
> +
> +    return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
> +                             prp2);
> +}
> +
> +static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> +                                 uint64_t off, NvmeRequest *req)
> +{
> +    uint32_t trans_len;
> +    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> +    NvmeFwSlotInfoLog fw_log = {
> +        .afi = 0x1,
> +    };
> +
> +    strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');

I always thought that firmware log can be just zeroed out, but this is correct
now that I checked the spec again.
> +
> +    if (off > sizeof(fw_log)) {
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
> +
> +    trans_len = MIN(sizeof(fw_log) - off, buf_len);
> +
> +    return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1,
> +                             prp2);
> +}
> +
> +static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> +                                uint64_t off, NvmeRequest *req)
> +{
> +    uint32_t trans_len;
> +    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> +    NvmeErrorLog errlog;
> +
> +    if (off > sizeof(errlog)) {
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
> +
> +    memset(&errlog, 0x0, sizeof(errlog));
> +
> +    trans_len = MIN(sizeof(errlog) - off, buf_len);
> +
> +    return nvme_dma_read_prp(n, (uint8_t *)&errlog, trans_len, prp1, prp2);
Looks good.
> +}
> +
> +static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +{
> +    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> +    uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> +    uint32_t dw12 = le32_to_cpu(cmd->cdw12);
> +    uint32_t dw13 = le32_to_cpu(cmd->cdw13);
> +    uint8_t  lid = dw10 & 0xff;
> +    uint8_t  lsp = (dw10 >> 8) & 0xf;
> +    uint8_t  rae = (dw10 >> 15) & 0x1;
> +    uint32_t numdl, numdu;
> +    uint64_t off, lpol, lpou;
> +    size_t   len;
> +
Nitpick: don't we want to check NSID=0 || NSID=0xFFFFFFFF here too?

> +    numdl = (dw10 >> 16);
> +    numdu = (dw11 & 0xffff);
> +    lpol = dw12;
> +    lpou = dw13;
> +
> +    len = (((numdu << 16) | numdl) + 1) << 2;
> +    off = (lpou << 32ULL) | lpol;
> +
> +    if (off & 0x3) {
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
Looks OK
> +
> +    trace_pci_nvme_get_log(nvme_cid(req), lid, lsp, rae, len, off);
> +
> +    switch (lid) {
> +    case NVME_LOG_ERROR_INFO:
> +        return nvme_error_info(n, cmd, len, off, req);
> +    case NVME_LOG_SMART_INFO:
> +        return nvme_smart_info(n, cmd, len, off, req);
> +    case NVME_LOG_FW_SLOT_INFO:
> +        return nvme_fw_log_info(n, cmd, len, off, req);
> +    default:
> +        trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
> +}
> +
>  static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
>  {
>      n->cq[cq->cqid] = NULL;
> @@ -960,6 +1094,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>          return nvme_del_sq(n, cmd);
>      case NVME_ADM_CMD_CREATE_SQ:
>          return nvme_create_sq(n, cmd);
> +    case NVME_ADM_CMD_GET_LOG_PAGE:
> +        return nvme_get_log(n, cmd, req);
>      case NVME_ADM_CMD_DELETE_CQ:
>          return nvme_del_cq(n, cmd);
>      case NVME_ADM_CMD_CREATE_CQ:
> @@ -1511,7 +1647,9 @@ static void nvme_init_state(NvmeCtrl *n)
>      n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
>      n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1);
>      n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1);
> +    n->temperature = NVME_TEMPERATURE;
>      n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
> +    n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
>  }
>  
>  static void nvme_init_blk(NvmeCtrl *n, Error **errp)
> @@ -1668,7 +1806,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
>       */
>      id->acl = 3;
>      id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO;
> -    id->lpa = 1 << 0;
> +    id->lpa = NVME_LPA_EXTENDED;
>  
>      /* recommended default value (~70 C) */
>      id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index e3a2c907e210..8228978e93de 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -98,6 +98,8 @@ typedef struct NvmeCtrl {
>      uint32_t    irq_status;
>      uint64_t    host_timestamp;                 /* Timestamp sent by the host */
>      uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
> +    uint64_t    starttime_ms;
> +    uint16_t    temperature;
>  
>      HostMemoryBackend *pmrdev;
>  
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index c40c0d2e4b28..3330d74e48db 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -45,6 +45,7 @@ pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16""
>  pci_nvme_identify_ctrl(void) "identify controller"
>  pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
>  pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
> +pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
>  pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
>  pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
>  pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
> @@ -94,6 +95,7 @@ pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completi
>  pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
>  pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
>  pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
> +pci_nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16""
>  pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
>  pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
>  pci_nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index d639e8bbee92..49ce97ae1ab4 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -704,9 +704,9 @@ typedef struct NvmeErrorLog {
>      uint8_t     resv[35];
>  } NvmeErrorLog;
>  
> -typedef struct NvmeSmartLog {
> +typedef struct QEMU_PACKED NvmeSmartLog {
>      uint8_t     critical_warning;
> -    uint8_t     temperature[2];
> +    uint16_t    temperature;
>      uint8_t     available_spare;
>      uint8_t     available_spare_threshold;
>      uint8_t     percentage_used;
> @@ -846,6 +846,10 @@ enum NvmeIdCtrlFrmw {
>      NVME_FRMW_SLOT1_RO = 1 << 0,
>  };
>  
> +enum NvmeIdCtrlLpa {
> +    NVME_LPA_EXTENDED = 1 << 2,
> +};
> +
>  #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf)
>  #define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf)
>  #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf)

Other than few nitpicks that don't matter much,

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>

Best regards,
	Maxim Levitsky
Klaus Jensen July 29, 2020, 11:44 a.m. UTC | #3
On Jul 29 13:24, Maxim Levitsky wrote:
> On Mon, 2020-07-06 at 08:12 +0200, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Add support for the Get Log Page command and basic implementations of
> > the mandatory Error Information, SMART / Health Information and Firmware
> > Slot Information log pages.
> > 
> > In violation of the specification, the SMART / Health Information log
> > page does not persist information over the lifetime of the controller
> > because the device has no place to store such persistent state.
> > 
> > Note that the LPA field in the Identify Controller data structure
> > intentionally has bit 0 cleared because there is no namespace specific
> > information in the SMART / Health information log page.
> > 
> > Required for compliance with NVMe revision 1.3d. See NVM Express 1.3d,
> > Section 5.14 ("Get Log Page command").
> > 
> > Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > Acked-by: Keith Busch <kbusch@kernel.org>
> > ---
> >  hw/block/nvme.c       | 140 +++++++++++++++++++++++++++++++++++++++++-
> >  hw/block/nvme.h       |   2 +
> >  hw/block/trace-events |   2 +
> >  include/block/nvme.h  |   8 ++-
> >  4 files changed, 149 insertions(+), 3 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index b6bc75eb61a2..7cb3787638f6 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -606,6 +606,140 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
> >      return NVME_SUCCESS;
> >  }
> >  
> > +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> > +                                uint64_t off, NvmeRequest *req)
> > +{
> > +    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> > +    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> > +    uint32_t nsid = le32_to_cpu(cmd->nsid);
> > +
> > +    uint32_t trans_len;
> > +    time_t current_ms;
> > +    uint64_t units_read = 0, units_written = 0;
> > +    uint64_t read_commands = 0, write_commands = 0;
> > +    NvmeSmartLog smart;
> > +    BlockAcctStats *s;
> > +
> > +    if (nsid && nsid != 0xffffffff) {
> > +        return NVME_INVALID_FIELD | NVME_DNR;
> > +    }
> Correct.
> > +
> > +    s = blk_get_stats(n->conf.blk);
> > +
> > +    units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
> > +    units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
> > +    read_commands = s->nr_ops[BLOCK_ACCT_READ];
> > +    write_commands = s->nr_ops[BLOCK_ACCT_WRITE];
> > +
> > +    if (off > sizeof(smart)) {
> > +        return NVME_INVALID_FIELD | NVME_DNR;
> > +    }
> > +
> > +    trans_len = MIN(sizeof(smart) - off, buf_len);
> > +
> > +    memset(&smart, 0x0, sizeof(smart));
> > +
> > +    smart.data_units_read[0] = cpu_to_le64(units_read / 1000);
> > +    smart.data_units_written[0] = cpu_to_le64(units_written / 1000);
> Tiny nitpick - the spec asks the value to be rounded up
> 

Ouch. You are correct. I'll swap that for a DIV_ROUND_UP.

> > +static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> > +{
> > +    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> > +    uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> > +    uint32_t dw12 = le32_to_cpu(cmd->cdw12);
> > +    uint32_t dw13 = le32_to_cpu(cmd->cdw13);
> > +    uint8_t  lid = dw10 & 0xff;
> > +    uint8_t  lsp = (dw10 >> 8) & 0xf;
> > +    uint8_t  rae = (dw10 >> 15) & 0x1;
> > +    uint32_t numdl, numdu;
> > +    uint64_t off, lpol, lpou;
> > +    size_t   len;
> > +
> Nitpick: don't we want to check NSID=0 || NSID=0xFFFFFFFF here too?
> 

The spec lists Get Log Page with "Yes" under "Namespace Identifier Used"
but no log pages in v1.3 or v1.4 are namespace specific so we expect
NSID to always be 0 or 0xffffffff. But, there are TPs that have
namespace specific log pages (i.e. TP 4053 Zoned Namepaces). So, it is
not invalid to have NSID set to something.

So, I think we have to defer handling of NSID values to the individual
log pages (like we do for the SMART page).
Maxim Levitsky July 29, 2020, 6:35 p.m. UTC | #4
On Wed, 2020-07-29 at 13:44 +0200, Klaus Jensen wrote:
> On Jul 29 13:24, Maxim Levitsky wrote:
> > On Mon, 2020-07-06 at 08:12 +0200, Klaus Jensen wrote:
> > > From: Klaus Jensen <k.jensen@samsung.com>
> > > 
> > > Add support for the Get Log Page command and basic implementations of
> > > the mandatory Error Information, SMART / Health Information and Firmware
> > > Slot Information log pages.
> > > 
> > > In violation of the specification, the SMART / Health Information log
> > > page does not persist information over the lifetime of the controller
> > > because the device has no place to store such persistent state.
> > > 
> > > Note that the LPA field in the Identify Controller data structure
> > > intentionally has bit 0 cleared because there is no namespace specific
> > > information in the SMART / Health information log page.
> > > 
> > > Required for compliance with NVMe revision 1.3d. See NVM Express 1.3d,
> > > Section 5.14 ("Get Log Page command").
> > > 
> > > Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
> > > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > > Acked-by: Keith Busch <kbusch@kernel.org>
> > > ---
> > >  hw/block/nvme.c       | 140 +++++++++++++++++++++++++++++++++++++++++-
> > >  hw/block/nvme.h       |   2 +
> > >  hw/block/trace-events |   2 +
> > >  include/block/nvme.h  |   8 ++-
> > >  4 files changed, 149 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index b6bc75eb61a2..7cb3787638f6 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -606,6 +606,140 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
> > >      return NVME_SUCCESS;
> > >  }
> > >  
> > > +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> > > +                                uint64_t off, NvmeRequest *req)
> > > +{
> > > +    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> > > +    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> > > +    uint32_t nsid = le32_to_cpu(cmd->nsid);
> > > +
> > > +    uint32_t trans_len;
> > > +    time_t current_ms;
> > > +    uint64_t units_read = 0, units_written = 0;
> > > +    uint64_t read_commands = 0, write_commands = 0;
> > > +    NvmeSmartLog smart;
> > > +    BlockAcctStats *s;
> > > +
> > > +    if (nsid && nsid != 0xffffffff) {
> > > +        return NVME_INVALID_FIELD | NVME_DNR;
> > > +    }
> > Correct.
> > > +
> > > +    s = blk_get_stats(n->conf.blk);
> > > +
> > > +    units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
> > > +    units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
> > > +    read_commands = s->nr_ops[BLOCK_ACCT_READ];
> > > +    write_commands = s->nr_ops[BLOCK_ACCT_WRITE];
> > > +
> > > +    if (off > sizeof(smart)) {
> > > +        return NVME_INVALID_FIELD | NVME_DNR;
> > > +    }
> > > +
> > > +    trans_len = MIN(sizeof(smart) - off, buf_len);
> > > +
> > > +    memset(&smart, 0x0, sizeof(smart));
> > > +
> > > +    smart.data_units_read[0] = cpu_to_le64(units_read / 1000);
> > > +    smart.data_units_written[0] = cpu_to_le64(units_written / 1000);
> > Tiny nitpick - the spec asks the value to be rounded up
> > 
> 
> Ouch. You are correct. I'll swap that for a DIV_ROUND_UP.
Not a big deal though as these values don't matter much to anybody since we don't have
way of storing them permanently.

> 
> > > +static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> > > +{
> > > +    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> > > +    uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> > > +    uint32_t dw12 = le32_to_cpu(cmd->cdw12);
> > > +    uint32_t dw13 = le32_to_cpu(cmd->cdw13);
> > > +    uint8_t  lid = dw10 & 0xff;
> > > +    uint8_t  lsp = (dw10 >> 8) & 0xf;
> > > +    uint8_t  rae = (dw10 >> 15) & 0x1;
> > > +    uint32_t numdl, numdu;
> > > +    uint64_t off, lpol, lpou;
> > > +    size_t   len;
> > > +
> > Nitpick: don't we want to check NSID=0 || NSID=0xFFFFFFFF here too?
> > 
> 
> The spec lists Get Log Page with "Yes" under "Namespace Identifier Used"
> but no log pages in v1.3 or v1.4 are namespace specific so we expect
> NSID to always be 0 or 0xffffffff. But, there are TPs that have
> namespace specific log pages (i.e. TP 4053 Zoned Namepaces). So, it is
> not invalid to have NSID set to something.
> 
> So, I think we have to defer handling of NSID values to the individual
> log pages (like we do for the SMART page).
Ah, OK.

> 


Best regards,
	Maxim Levitsky
Peter Maydell Sept. 29, 2020, 1:11 p.m. UTC | #5
On Mon, 6 Jul 2020 at 07:15, Klaus Jensen <its@irrelevant.dk> wrote:
>
> From: Klaus Jensen <k.jensen@samsung.com>
>
> Add support for the Get Log Page command and basic implementations of
> the mandatory Error Information, SMART / Health Information and Firmware
> Slot Information log pages.
>
> In violation of the specification, the SMART / Health Information log
> page does not persist information over the lifetime of the controller
> because the device has no place to store such persistent state.
>
> Note that the LPA field in the Identify Controller data structure
> intentionally has bit 0 cleared because there is no namespace specific
> information in the SMART / Health information log page.
>
> Required for compliance with NVMe revision 1.3d. See NVM Express 1.3d,
> Section 5.14 ("Get Log Page command").

Hi; Coverity reports a potential issue in this code
(CID 1432413):

> +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> +                                uint64_t off, NvmeRequest *req)
> +{
> +    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> +    uint32_t nsid = le32_to_cpu(cmd->nsid);
> +
> +    uint32_t trans_len;
> +    time_t current_ms;
> +    uint64_t units_read = 0, units_written = 0;
> +    uint64_t read_commands = 0, write_commands = 0;
> +    NvmeSmartLog smart;
> +    BlockAcctStats *s;
> +
> +    if (nsid && nsid != 0xffffffff) {
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
> +
> +    s = blk_get_stats(n->conf.blk);
> +
> +    units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
> +    units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
> +    read_commands = s->nr_ops[BLOCK_ACCT_READ];
> +    write_commands = s->nr_ops[BLOCK_ACCT_WRITE];
> +
> +    if (off > sizeof(smart)) {
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }

Here we check for off > sizeof(smart), which means that we allow
off == sizeof(smart)...

> +
> +    trans_len = MIN(sizeof(smart) - off, buf_len);

> +    return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
> +                             prp2);

...in which case the pointer we pass to nvme_dma_read_prp() will
be off the end of the 'smart' object.

Now we are passing 0 as the trans_len, so I *think* this function
will not actually read the buffer (Coverity is not smart
enough to see this); so I could just close the Coverity issue as
a false-positive. But maybe there is a clearer-to-humans as well
as clearer-to-Coverity way to write this. What do you think ?

> +static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> +                                 uint64_t off, NvmeRequest *req)
> +{
> +    uint32_t trans_len;
> +    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> +    NvmeFwSlotInfoLog fw_log = {
> +        .afi = 0x1,
> +    };
> +
> +    strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
> +
> +    if (off > sizeof(fw_log)) {
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
> +
> +    trans_len = MIN(sizeof(fw_log) - off, buf_len);
> +
> +    return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1,
> +                             prp2);

Coverity warns about the same structure here (CID 1432411).

thanks
-- PMM
Klaus Jensen Sept. 29, 2020, 9:46 p.m. UTC | #6
On Sep 29 14:11, Peter Maydell wrote:
> On Mon, 6 Jul 2020 at 07:15, Klaus Jensen <its@irrelevant.dk> wrote:
> >
> > From: Klaus Jensen <k.jensen@samsung.com>
> >
> > Add support for the Get Log Page command and basic implementations of
> > the mandatory Error Information, SMART / Health Information and Firmware
> > Slot Information log pages.
> >
> > In violation of the specification, the SMART / Health Information log
> > page does not persist information over the lifetime of the controller
> > because the device has no place to store such persistent state.
> >
> > Note that the LPA field in the Identify Controller data structure
> > intentionally has bit 0 cleared because there is no namespace specific
> > information in the SMART / Health information log page.
> >
> > Required for compliance with NVMe revision 1.3d. See NVM Express 1.3d,
> > Section 5.14 ("Get Log Page command").
> 
> Hi; Coverity reports a potential issue in this code
> (CID 1432413):
> 
> > +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> > +                                uint64_t off, NvmeRequest *req)
> > +{
> > +    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> > +    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> > +    uint32_t nsid = le32_to_cpu(cmd->nsid);
> > +
> > +    uint32_t trans_len;
> > +    time_t current_ms;
> > +    uint64_t units_read = 0, units_written = 0;
> > +    uint64_t read_commands = 0, write_commands = 0;
> > +    NvmeSmartLog smart;
> > +    BlockAcctStats *s;
> > +
> > +    if (nsid && nsid != 0xffffffff) {
> > +        return NVME_INVALID_FIELD | NVME_DNR;
> > +    }
> > +
> > +    s = blk_get_stats(n->conf.blk);
> > +
> > +    units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
> > +    units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
> > +    read_commands = s->nr_ops[BLOCK_ACCT_READ];
> > +    write_commands = s->nr_ops[BLOCK_ACCT_WRITE];
> > +
> > +    if (off > sizeof(smart)) {
> > +        return NVME_INVALID_FIELD | NVME_DNR;
> > +    }
> 
> Here we check for off > sizeof(smart), which means that we allow
> off == sizeof(smart)...
> 
> > +
> > +    trans_len = MIN(sizeof(smart) - off, buf_len);
> 
> > +    return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
> > +                             prp2);
> 
> ...in which case the pointer we pass to nvme_dma_read_prp() will
> be off the end of the 'smart' object.
> 
> Now we are passing 0 as the trans_len, so I *think* this function
> will not actually read the buffer (Coverity is not smart
> enough to see this); so I could just close the Coverity issue as
> a false-positive. But maybe there is a clearer-to-humans as well
> as clearer-to-Coverity way to write this. What do you think ?
> 
> > +static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> > +                                 uint64_t off, NvmeRequest *req)
> > +{
> > +    uint32_t trans_len;
> > +    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> > +    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> > +    NvmeFwSlotInfoLog fw_log = {
> > +        .afi = 0x1,
> > +    };
> > +
> > +    strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
> > +
> > +    if (off > sizeof(fw_log)) {
> > +        return NVME_INVALID_FIELD | NVME_DNR;
> > +    }
> > +
> > +    trans_len = MIN(sizeof(fw_log) - off, buf_len);
> > +
> > +    return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1,
> > +                             prp2);
> 
> Coverity warns about the same structure here (CID 1432411).
> 
> thanks
> -- PMM

Hi Peter,

Thanks. This is somewhere in the middle of a bunch of patches I got
merged I think, commit 94a7897c41db? I just requested Coverity access.

What happens is that nvme_dma_read_prp will call into nvme_map_prp which
wont map anything because len is 0. This will cause the statically
allocated QEMUSGList and QEMUIOVector in the request to be
uninitialized. Returning from nvme_map_prp, nvme_dma_read_prp will
notice that req->qsg.nsg is zero so it will default to the iov and move
into qemu_iovec_{to,from}_buf(&req->iov, ...). In there we actually pass
the NULL struct iovec, but since there is a __builtin_constant_p(bytes)
condition at the end of it all, we never follow it.

Not "serious" I think, but definitely not good. We will of course fix
this up.

@keith, do you agree with my analysis?
Keith Busch Sept. 29, 2020, 10:34 p.m. UTC | #7
On Tue, Sep 29, 2020 at 11:46:00PM +0200, Klaus Jensen wrote:
> On Sep 29 14:11, Peter Maydell wrote:
> > > +static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> > > +                                 uint64_t off, NvmeRequest *req)
> > > +{
> > > +    uint32_t trans_len;
> > > +    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> > > +    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> > > +    NvmeFwSlotInfoLog fw_log = {
> > > +        .afi = 0x1,
> > > +    };
> > > +
> > > +    strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
> > > +
> > > +    if (off > sizeof(fw_log)) {
> > > +        return NVME_INVALID_FIELD | NVME_DNR;
> > > +    }
> > > +
> > > +    trans_len = MIN(sizeof(fw_log) - off, buf_len);
> > > +
> > > +    return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1,
> > > +                             prp2);
> > 
> > Coverity warns about the same structure here (CID 1432411).
> > 
> > thanks
> > -- PMM
> 
> Hi Peter,
> 
> Thanks. This is somewhere in the middle of a bunch of patches I got
> merged I think, commit 94a7897c41db? I just requested Coverity access.
> 
> What happens is that nvme_dma_read_prp will call into nvme_map_prp which
> wont map anything because len is 0. This will cause the statically
> allocated QEMUSGList and QEMUIOVector in the request to be
> uninitialized. Returning from nvme_map_prp, nvme_dma_read_prp will
> notice that req->qsg.nsg is zero so it will default to the iov and move
> into qemu_iovec_{to,from}_buf(&req->iov, ...). In there we actually pass
> the NULL struct iovec, but since there is a __builtin_constant_p(bytes)
> condition at the end of it all, we never follow it.
> 
> Not "serious" I think, but definitely not good. We will of course fix
> this up.
> 
> @keith, do you agree with my analysis?

Yeah, looks safe as-is, but we're missing out on returning the spec
required 'Invalid Field'.
Klaus Jensen Sept. 29, 2020, 10:42 p.m. UTC | #8
On Sep 29 15:34, Keith Busch wrote:
> On Tue, Sep 29, 2020 at 11:46:00PM +0200, Klaus Jensen wrote:
> > On Sep 29 14:11, Peter Maydell wrote:
> > > > +static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> > > > +                                 uint64_t off, NvmeRequest *req)
> > > > +{
> > > > +    uint32_t trans_len;
> > > > +    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> > > > +    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> > > > +    NvmeFwSlotInfoLog fw_log = {
> > > > +        .afi = 0x1,
> > > > +    };
> > > > +
> > > > +    strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
> > > > +
> > > > +    if (off > sizeof(fw_log)) {
> > > > +        return NVME_INVALID_FIELD | NVME_DNR;
> > > > +    }
> > > > +
> > > > +    trans_len = MIN(sizeof(fw_log) - off, buf_len);
> > > > +
> > > > +    return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1,
> > > > +                             prp2);
> > > 
> > > Coverity warns about the same structure here (CID 1432411).
> > > 
> > > thanks
> > > -- PMM
> > 
> > Hi Peter,
> > 
> > Thanks. This is somewhere in the middle of a bunch of patches I got
> > merged I think, commit 94a7897c41db? I just requested Coverity access.
> > 
> > What happens is that nvme_dma_read_prp will call into nvme_map_prp which
> > wont map anything because len is 0. This will cause the statically
> > allocated QEMUSGList and QEMUIOVector in the request to be
> > uninitialized. Returning from nvme_map_prp, nvme_dma_read_prp will
> > notice that req->qsg.nsg is zero so it will default to the iov and move
> > into qemu_iovec_{to,from}_buf(&req->iov, ...). In there we actually pass
> > the NULL struct iovec, but since there is a __builtin_constant_p(bytes)
> > condition at the end of it all, we never follow it.
> > 
> > Not "serious" I think, but definitely not good. We will of course fix
> > this up.
> > 
> > @keith, do you agree with my analysis?
> 
> Yeah, looks safe as-is, but we're missing out on returning the spec
> required 'Invalid Field'.

I can't see where it says that we should do that? Invalid Field in
Command if offset is *greater* than the size of the log page.

Some dynamic log pages have side-effects of being read, so while this is
a super wierd way of specifying that we want nothing returned, I think
it is valid?
Keith Busch Sept. 29, 2020, 10:57 p.m. UTC | #9
On Wed, Sep 30, 2020 at 12:42:48AM +0200, Klaus Jensen wrote:
> On Sep 29 15:34, Keith Busch wrote:
> > Yeah, looks safe as-is, but we're missing out on returning the spec
> > required 'Invalid Field'.
> 
> I can't see where it says that we should do that? Invalid Field in
> Command if offset is *greater* than the size of the log page.
> 
> Some dynamic log pages have side-effects of being read, so while this is
> a super wierd way of specifying that we want nothing returned, I think
> it is valid?

Eh, when spec says "size of the log page", I assume they're using the
"zeroes based" definition for size as aligned with the NUMD field. So
512 is bigger than the sizeof the smart log occupying bytes 0-511.

But I guess there's room to see it the other way, so maybe it is a
way to request a no data log.
diff mbox series

Patch

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index b6bc75eb61a2..7cb3787638f6 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -606,6 +606,140 @@  static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
     return NVME_SUCCESS;
 }
 
+static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
+                                uint64_t off, NvmeRequest *req)
+{
+    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
+    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
+    uint32_t nsid = le32_to_cpu(cmd->nsid);
+
+    uint32_t trans_len;
+    time_t current_ms;
+    uint64_t units_read = 0, units_written = 0;
+    uint64_t read_commands = 0, write_commands = 0;
+    NvmeSmartLog smart;
+    BlockAcctStats *s;
+
+    if (nsid && nsid != 0xffffffff) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    s = blk_get_stats(n->conf.blk);
+
+    units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
+    units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
+    read_commands = s->nr_ops[BLOCK_ACCT_READ];
+    write_commands = s->nr_ops[BLOCK_ACCT_WRITE];
+
+    if (off > sizeof(smart)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    trans_len = MIN(sizeof(smart) - off, buf_len);
+
+    memset(&smart, 0x0, sizeof(smart));
+
+    smart.data_units_read[0] = cpu_to_le64(units_read / 1000);
+    smart.data_units_written[0] = cpu_to_le64(units_written / 1000);
+    smart.host_read_commands[0] = cpu_to_le64(read_commands);
+    smart.host_write_commands[0] = cpu_to_le64(write_commands);
+
+    smart.temperature = cpu_to_le16(n->temperature);
+
+    if ((n->temperature >= n->features.temp_thresh_hi) ||
+        (n->temperature <= n->features.temp_thresh_low)) {
+        smart.critical_warning |= NVME_SMART_TEMPERATURE;
+    }
+
+    current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+    smart.power_on_hours[0] =
+        cpu_to_le64((((current_ms - n->starttime_ms) / 1000) / 60) / 60);
+
+    return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
+                             prp2);
+}
+
+static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
+                                 uint64_t off, NvmeRequest *req)
+{
+    uint32_t trans_len;
+    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
+    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
+    NvmeFwSlotInfoLog fw_log = {
+        .afi = 0x1,
+    };
+
+    strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
+
+    if (off > sizeof(fw_log)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    trans_len = MIN(sizeof(fw_log) - off, buf_len);
+
+    return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1,
+                             prp2);
+}
+
+static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
+                                uint64_t off, NvmeRequest *req)
+{
+    uint32_t trans_len;
+    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
+    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
+    NvmeErrorLog errlog;
+
+    if (off > sizeof(errlog)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    memset(&errlog, 0x0, sizeof(errlog));
+
+    trans_len = MIN(sizeof(errlog) - off, buf_len);
+
+    return nvme_dma_read_prp(n, (uint8_t *)&errlog, trans_len, prp1, prp2);
+}
+
+static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+{
+    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+    uint32_t dw11 = le32_to_cpu(cmd->cdw11);
+    uint32_t dw12 = le32_to_cpu(cmd->cdw12);
+    uint32_t dw13 = le32_to_cpu(cmd->cdw13);
+    uint8_t  lid = dw10 & 0xff;
+    uint8_t  lsp = (dw10 >> 8) & 0xf;
+    uint8_t  rae = (dw10 >> 15) & 0x1;
+    uint32_t numdl, numdu;
+    uint64_t off, lpol, lpou;
+    size_t   len;
+
+    numdl = (dw10 >> 16);
+    numdu = (dw11 & 0xffff);
+    lpol = dw12;
+    lpou = dw13;
+
+    len = (((numdu << 16) | numdl) + 1) << 2;
+    off = (lpou << 32ULL) | lpol;
+
+    if (off & 0x3) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    trace_pci_nvme_get_log(nvme_cid(req), lid, lsp, rae, len, off);
+
+    switch (lid) {
+    case NVME_LOG_ERROR_INFO:
+        return nvme_error_info(n, cmd, len, off, req);
+    case NVME_LOG_SMART_INFO:
+        return nvme_smart_info(n, cmd, len, off, req);
+    case NVME_LOG_FW_SLOT_INFO:
+        return nvme_fw_log_info(n, cmd, len, off, req);
+    default:
+        trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+}
+
 static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
 {
     n->cq[cq->cqid] = NULL;
@@ -960,6 +1094,8 @@  static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         return nvme_del_sq(n, cmd);
     case NVME_ADM_CMD_CREATE_SQ:
         return nvme_create_sq(n, cmd);
+    case NVME_ADM_CMD_GET_LOG_PAGE:
+        return nvme_get_log(n, cmd, req);
     case NVME_ADM_CMD_DELETE_CQ:
         return nvme_del_cq(n, cmd);
     case NVME_ADM_CMD_CREATE_CQ:
@@ -1511,7 +1647,9 @@  static void nvme_init_state(NvmeCtrl *n)
     n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
     n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1);
     n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1);
+    n->temperature = NVME_TEMPERATURE;
     n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
+    n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
 }
 
 static void nvme_init_blk(NvmeCtrl *n, Error **errp)
@@ -1668,7 +1806,7 @@  static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
      */
     id->acl = 3;
     id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO;
-    id->lpa = 1 << 0;
+    id->lpa = NVME_LPA_EXTENDED;
 
     /* recommended default value (~70 C) */
     id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index e3a2c907e210..8228978e93de 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -98,6 +98,8 @@  typedef struct NvmeCtrl {
     uint32_t    irq_status;
     uint64_t    host_timestamp;                 /* Timestamp sent by the host */
     uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
+    uint64_t    starttime_ms;
+    uint16_t    temperature;
 
     HostMemoryBackend *pmrdev;
 
diff --git a/hw/block/trace-events b/hw/block/trace-events
index c40c0d2e4b28..3330d74e48db 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -45,6 +45,7 @@  pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16""
 pci_nvme_identify_ctrl(void) "identify controller"
 pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
 pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
+pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
 pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
 pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
 pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
@@ -94,6 +95,7 @@  pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completi
 pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
 pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
 pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
+pci_nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16""
 pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
 pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
 pci_nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"
diff --git a/include/block/nvme.h b/include/block/nvme.h
index d639e8bbee92..49ce97ae1ab4 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -704,9 +704,9 @@  typedef struct NvmeErrorLog {
     uint8_t     resv[35];
 } NvmeErrorLog;
 
-typedef struct NvmeSmartLog {
+typedef struct QEMU_PACKED NvmeSmartLog {
     uint8_t     critical_warning;
-    uint8_t     temperature[2];
+    uint16_t    temperature;
     uint8_t     available_spare;
     uint8_t     available_spare_threshold;
     uint8_t     percentage_used;
@@ -846,6 +846,10 @@  enum NvmeIdCtrlFrmw {
     NVME_FRMW_SLOT1_RO = 1 << 0,
 };
 
+enum NvmeIdCtrlLpa {
+    NVME_LPA_EXTENDED = 1 << 2,
+};
+
 #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf)
 #define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf)
 #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf)