From patchwork Fri Jul 5 07:23:18 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032089 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id CA52A1510 for ; Fri, 5 Jul 2019 07:26:24 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id B4BEC28A28 for ; Fri, 5 Jul 2019 07:26:24 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id A5BBC28A2E; Fri, 5 Jul 2019 07:26:24 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 456B928A28 for ; Fri, 5 Jul 2019 07:26:24 +0000 (UTC) Received: from localhost ([::1]:50100 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIbv-0000Nv-KZ for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:26:23 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44098) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIZZ-0005Gz-Lb for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:01 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIZX-0005k5-UF for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:23:57 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50494) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZV-0005fm-1a; Fri, 05 Jul 2019 03:23:53 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id D78D1BF644; Fri, 5 Jul 2019 07:23:50 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:18 +0200 Message-Id: <20190705072333.17171-2-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 01/16] nvme: simplify namespace code X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP The device model currently only supports a single namespace and also specifically sets num_namespaces to 1. Take this into account and simplify the code. Signed-off-by: Klaus Birkelund Jensen --- hw/block/nvme.c | 26 +++++++------------------- hw/block/nvme.h | 2 +- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 36d6a8bb3a3e..28ebaf1368b1 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -424,7 +424,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_INVALID_NSID | NVME_DNR; } - ns = &n->namespaces[nsid - 1]; + ns = &n->namespace; switch (cmd->opcode) { case NVME_CMD_FLUSH: return nvme_flush(n, ns, cmd, req); @@ -670,7 +670,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) return NVME_INVALID_NSID | NVME_DNR; } - ns = &n->namespaces[nsid - 1]; + ns = &n->namespace; return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), prp1, prp2); @@ -1306,8 +1306,8 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) { NvmeCtrl *n = NVME(pci_dev); NvmeIdCtrl *id = &n->id_ctrl; + NvmeIdNs *id_ns = &n->namespace.id_ns; - int i; int64_t bs_size; uint8_t *pci_conf; @@ -1347,7 +1347,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4); n->ns_size = bs_size / (uint64_t)n->num_namespaces; - n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); n->sq = g_new0(NvmeSQueue *, n->num_queues); n->cq = g_new0(NvmeCQueue *, n->num_queues); @@ -1416,20 +1415,10 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) } - for (i = 0; i < n->num_namespaces; i++) { - NvmeNamespace *ns = &n->namespaces[i]; - NvmeIdNs *id_ns = &ns->id_ns; - id_ns->nsfeat = 0; - id_ns->nlbaf = 0; - id_ns->flbas = 0; - id_ns->mc = 0; - id_ns->dpc = 0; - id_ns->dps = 0; - id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; - id_ns->ncap = id_ns->nuse = id_ns->nsze = - cpu_to_le64(n->ns_size >> - id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds); - } + id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; + id_ns->ncap = id_ns->nuse = id_ns->nsze = + cpu_to_le64(n->ns_size >> + id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)].ds); } static void nvme_exit(PCIDevice *pci_dev) @@ -1437,7 +1426,6 @@ static void nvme_exit(PCIDevice *pci_dev) NvmeCtrl *n = NVME(pci_dev); nvme_clear_ctrl(n); - g_free(n->namespaces); g_free(n->cq); g_free(n->sq); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 557194ee1954..40cedb1ec932 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -83,7 +83,7 @@ typedef struct NvmeCtrl { uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ char *serial; - NvmeNamespace *namespaces; + NvmeNamespace namespace; NvmeSQueue **sq; NvmeCQueue **cq; NvmeSQueue admin_sq; From patchwork Fri Jul 5 07:23:19 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032129 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DE374138D for ; Fri, 5 Jul 2019 07:40:33 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id BEFBB2867F for ; Fri, 5 Jul 2019 07:40:33 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id AB20328683; Fri, 5 Jul 2019 07:40:33 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 106B12867F for ; Fri, 5 Jul 2019 07:40:33 +0000 (UTC) Received: from localhost ([::1]:50254 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIpc-0006pa-Ec for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:40:32 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44152) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIZc-0005Hh-9i for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:06 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIZZ-0005lc-Km for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:23:59 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50500) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZV-0005fq-3D; Fri, 05 Jul 2019 03:23:53 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id 26C99BF69F; Fri, 5 Jul 2019 07:23:51 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:19 +0200 Message-Id: <20190705072333.17171-3-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 02/16] nvme: move device parameters to separate struct X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP Move device configuration parameters to separate struct to make it explicit what is configurable and what is set internally. Also, clean up some includes. Signed-off-by: Klaus Birkelund Jensen --- hw/block/nvme.c | 54 +++++++++++++++++++++++-------------------------- hw/block/nvme.h | 16 ++++++++++++--- 2 files changed, 38 insertions(+), 32 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 28ebaf1368b1..a3f83f3c2135 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -27,18 +27,14 @@ #include "qemu/osdep.h" #include "qemu/units.h" +#include "qemu/cutils.h" +#include "qemu/log.h" #include "hw/block/block.h" -#include "hw/hw.h" #include "hw/pci/msix.h" -#include "hw/pci/pci.h" #include "sysemu/sysemu.h" -#include "qapi/error.h" -#include "qapi/visitor.h" #include "sysemu/block-backend.h" +#include "qapi/error.h" -#include "qemu/log.h" -#include "qemu/module.h" -#include "qemu/cutils.h" #include "trace.h" #include "nvme.h" @@ -63,12 +59,12 @@ static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) { - return sqid < n->num_queues && n->sq[sqid] != NULL ? 0 : -1; + return sqid < n->params.num_queues && n->sq[sqid] != NULL ? 0 : -1; } static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid) { - return cqid < n->num_queues && n->cq[cqid] != NULL ? 0 : -1; + return cqid < n->params.num_queues && n->cq[cqid] != NULL ? 0 : -1; } static void nvme_inc_cq_tail(NvmeCQueue *cq) @@ -630,7 +626,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) trace_nvme_err_invalid_create_cq_addr(prp1); return NVME_INVALID_FIELD | NVME_DNR; } - if (unlikely(vector > n->num_queues)) { + if (unlikely(vector > n->params.num_queues)) { trace_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; } @@ -782,7 +778,8 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); break; case NVME_NUMBER_OF_QUEUES: - result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); + result = cpu_to_le32((n->params.num_queues - 2) | + ((n->params.num_queues - 2) << 16)); trace_nvme_getfeat_numq(result); break; case NVME_TIMESTAMP: @@ -827,9 +824,10 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) case NVME_NUMBER_OF_QUEUES: trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, ((dw11 >> 16) & 0xFFFF) + 1, - n->num_queues - 1, n->num_queues - 1); - req->cqe.result = - cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); + n->params.num_queues - 1, + n->params.num_queues - 1); + req->cqe.result = cpu_to_le32((n->params.num_queues - 2) | + ((n->params.num_queues - 2) << 16)); break; case NVME_TIMESTAMP: @@ -903,12 +901,12 @@ static void nvme_clear_ctrl(NvmeCtrl *n) blk_drain(n->conf.blk); - for (i = 0; i < n->num_queues; i++) { + for (i = 0; i < n->params.num_queues; i++) { if (n->sq[i] != NULL) { nvme_free_sq(n->sq[i], n); } } - for (i = 0; i < n->num_queues; i++) { + for (i = 0; i < n->params.num_queues; i++) { if (n->cq[i] != NULL) { nvme_free_cq(n->cq[i], n); } @@ -1311,7 +1309,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) int64_t bs_size; uint8_t *pci_conf; - if (!n->num_queues) { + if (!n->params.num_queues) { error_setg(errp, "num_queues can't be zero"); return; } @@ -1327,7 +1325,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) return; } - if (!n->serial) { + if (!n->params.serial) { error_setg(errp, "serial property not set"); return; } @@ -1344,24 +1342,24 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) pcie_endpoint_cap_init(pci_dev, 0x80); n->num_namespaces = 1; - n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4); + n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4); n->ns_size = bs_size / (uint64_t)n->num_namespaces; - n->sq = g_new0(NvmeSQueue *, n->num_queues); - n->cq = g_new0(NvmeCQueue *, n->num_queues); + n->sq = g_new0(NvmeSQueue *, n->params.num_queues); + n->cq = g_new0(NvmeCQueue *, n->params.num_queues); memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", n->reg_size); pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem); - msix_init_exclusive_bar(pci_dev, n->num_queues, 4, NULL); + msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL); id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); - strpadcpy((char *)id->sn, sizeof(id->sn), n->serial, ' '); + strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' '); id->rab = 6; id->ieee[0] = 0x00; id->ieee[1] = 0x02; @@ -1390,7 +1388,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->bar.vs = 0x00010200; n->bar.intmc = n->bar.intms = 0; - if (n->cmb_size_mb) { + if (n->params.cmb_size_mb) { NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); @@ -1401,7 +1399,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->cmb_size_mb); + NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); n->cmbloc = n->bar.cmbloc; n->cmbsz = n->bar.cmbsz; @@ -1429,7 +1427,7 @@ static void nvme_exit(PCIDevice *pci_dev) g_free(n->cq); g_free(n->sq); - if (n->cmb_size_mb) { + if (n->params.cmb_size_mb) { g_free(n->cmbuf); } msix_uninit_exclusive_bar(pci_dev); @@ -1437,9 +1435,7 @@ static void nvme_exit(PCIDevice *pci_dev) static Property nvme_props[] = { DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), - DEFINE_PROP_STRING("serial", NvmeCtrl, serial), - DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0), - DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64), + DEFINE_NVME_PROPERTIES(NvmeCtrl, params), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 40cedb1ec932..77fe6fb46b71 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -1,7 +1,19 @@ #ifndef HW_NVME_H #define HW_NVME_H + #include "block/nvme.h" +#define DEFINE_NVME_PROPERTIES(_state, _props) \ + DEFINE_PROP_STRING("serial", _state, _props.serial), \ + DEFINE_PROP_UINT32("cmb_size_mb", _state, _props.cmb_size_mb, 0), \ + DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64) + +typedef struct NvmeParams { + char *serial; + uint32_t num_queues; + uint32_t cmb_size_mb; +} NvmeParams; + typedef struct NvmeAsyncEvent { QSIMPLEQ_ENTRY(NvmeAsyncEvent) entry; NvmeAerResult result; @@ -63,6 +75,7 @@ typedef struct NvmeCtrl { MemoryRegion ctrl_mem; NvmeBar bar; BlockConf conf; + NvmeParams params; uint32_t page_size; uint16_t page_bits; @@ -71,10 +84,8 @@ typedef struct NvmeCtrl { uint16_t sqe_size; uint32_t reg_size; uint32_t num_namespaces; - uint32_t num_queues; uint32_t max_q_ents; uint64_t ns_size; - uint32_t cmb_size_mb; uint32_t cmbsz; uint32_t cmbloc; uint8_t *cmbuf; @@ -82,7 +93,6 @@ typedef struct NvmeCtrl { uint64_t host_timestamp; /* Timestamp sent by the host */ uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ - char *serial; NvmeNamespace namespace; NvmeSQueue **sq; NvmeCQueue **cq; From patchwork Fri Jul 5 07:23:20 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032107 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id CA2081510 for ; Fri, 5 Jul 2019 07:28:50 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id B7E3728A28 for ; Fri, 5 Jul 2019 07:28:50 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id AC3E928A2E; Fri, 5 Jul 2019 07:28:50 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 6261828A28 for ; Fri, 5 Jul 2019 07:28:50 +0000 (UTC) Received: from localhost ([::1]:50124 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIeH-0003Z9-PX for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:28:49 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44086) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIZZ-0005GY-5K for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:01 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIZX-0005ja-Eh for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:23:56 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50510) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZV-0005g0-3x; Fri, 05 Jul 2019 03:23:53 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id 7CA09BFF3D; Fri, 5 Jul 2019 07:23:51 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:20 +0200 Message-Id: <20190705072333.17171-4-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 03/16] nvme: fix lpa field X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP The Log Page Attributes in the Identify Controller structure indicates that the controller supports the SMART / Health Information log page on a per namespace basis. It does not, given that neither this log page or the Get Log Page command is implemented. Signed-off-by: Klaus Birkelund Jensen --- hw/block/nvme.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index a3f83f3c2135..ce2e5365385b 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1366,7 +1366,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) id->ieee[2] = 0xb3; id->oacs = cpu_to_le16(0); id->frmw = 7 << 1; - id->lpa = 1 << 0; id->sqes = (0x6 << 4) | 0x6; id->cqes = (0x4 << 4) | 0x4; id->nn = cpu_to_le32(n->num_namespaces); From patchwork Fri Jul 5 07:23:21 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032087 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A2A96138D for ; Fri, 5 Jul 2019 07:25:53 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 8DE502892E for ; Fri, 5 Jul 2019 07:25:53 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 819F42892A; Fri, 5 Jul 2019 07:25:53 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 2BBB02892A for ; Fri, 5 Jul 2019 07:25:53 +0000 (UTC) Received: from localhost ([::1]:50098 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIbQ-00080e-2c for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:25:52 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44102) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIZZ-0005HD-Mj for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:01 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIZY-0005kh-D6 for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:23:57 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50524) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZV-0005g7-B5; Fri, 05 Jul 2019 03:23:53 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id B8668BFFA7; Fri, 5 Jul 2019 07:23:51 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:21 +0200 Message-Id: <20190705072333.17171-5-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 04/16] nvme: add missing fields in identify controller X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP Not used by the device model but added for completeness. See NVM Express 1.2.1, Section 5.11 ("Identify command"), Figure 90. Signed-off-by: Klaus Birkelund Jensen --- include/block/nvme.h | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/include/block/nvme.h b/include/block/nvme.h index 3ec8efcc435e..1b0accd4fe2b 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -543,7 +543,13 @@ typedef struct NvmeIdCtrl { uint8_t ieee[3]; uint8_t cmic; uint8_t mdts; - uint8_t rsvd255[178]; + uint16_t cntlid; + uint32_t ver; + uint16_t rtd3r; + uint32_t rtd3e; + uint32_t oaes; + uint32_t ctratt; + uint8_t rsvd255[156]; uint16_t oacs; uint8_t acl; uint8_t aerl; @@ -551,10 +557,22 @@ typedef struct NvmeIdCtrl { uint8_t lpa; uint8_t elpe; uint8_t npss; - uint8_t rsvd511[248]; + uint8_t avscc; + uint8_t apsta; + uint16_t wctemp; + uint16_t cctemp; + uint16_t mtfa; + uint32_t hmpre; + uint32_t hmmin; + uint8_t tnvmcap[16]; + uint8_t unvmcap[16]; + uint32_t rpmbs; + uint8_t rsvd319[4]; + uint16_t kas; + uint8_t rsvd511[190]; uint8_t sqes; uint8_t cqes; - uint16_t rsvd515; + uint16_t maxcmd; uint32_t nn; uint16_t oncs; uint16_t fuses; @@ -562,8 +580,14 @@ typedef struct NvmeIdCtrl { uint8_t vwc; uint16_t awun; uint16_t awupf; - uint8_t rsvd703[174]; - uint8_t rsvd2047[1344]; + uint8_t nvscc; + uint8_t rsvd531; + uint16_t acwu; + uint16_t rsvd535; + uint32_t sgls; + uint8_t rsvd767[228]; + uint8_t subnqn[256]; + uint8_t rsvd2047[1024]; NvmePSD psd[32]; uint8_t vs[1024]; } NvmeIdCtrl; From patchwork Fri Jul 5 07:23:22 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032119 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1DE8A138D for ; Fri, 5 Jul 2019 07:34:59 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 0CB3528A1E for ; Fri, 5 Jul 2019 07:34:59 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id F34CB28A2E; Fri, 5 Jul 2019 07:34:58 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 6B15428A28 for ; Fri, 5 Jul 2019 07:34:58 +0000 (UTC) Received: from localhost ([::1]:50176 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIkD-0000qV-Pc for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:34:57 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44151) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIZc-0005Hg-9K for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:03 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIZZ-0005m2-RG for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:23:59 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50526) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZW-0005h6-2O; Fri, 05 Jul 2019 03:23:54 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id F0817C00FF; Fri, 5 Jul 2019 07:23:51 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:22 +0200 Message-Id: <20190705072333.17171-6-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 05/16] nvme: populate the mandatory subnqn and ver fields X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP Required for compliance with NVMe revision 1.2.1 or later. See NVM Express 1.2.1, Section 5.11 ("Identify command"), Figure 90 and Section 7.9 ("NVMe Qualified Names"). This also bumps the supported version to 1.2.1. Signed-off-by: Klaus Birkelund Jensen --- hw/block/nvme.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index ce2e5365385b..3c392dc336a8 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1364,12 +1364,18 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) id->ieee[0] = 0x00; id->ieee[1] = 0x02; id->ieee[2] = 0xb3; + id->ver = cpu_to_le32(0x00010201); id->oacs = cpu_to_le16(0); id->frmw = 7 << 1; id->sqes = (0x6 << 4) | 0x6; id->cqes = (0x4 << 4) | 0x4; id->nn = cpu_to_le32(n->num_namespaces); id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP); + + strcpy((char *) id->subnqn, "nqn.2014-08.org.nvmexpress:uuid:"); + qemu_uuid_unparse(&qemu_uuid, + (char *) id->subnqn + strlen((char *) id->subnqn)); + id->psd[0].mp = cpu_to_le16(0x9c4); id->psd[0].enlat = cpu_to_le32(0x10); id->psd[0].exlat = cpu_to_le32(0x4); @@ -1384,7 +1390,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) NVME_CAP_SET_CSS(n->bar.cap, 1); NVME_CAP_SET_MPSMAX(n->bar.cap, 4); - n->bar.vs = 0x00010200; + n->bar.vs = 0x00010201; n->bar.intmc = n->bar.intms = 0; if (n->params.cmb_size_mb) { From patchwork Fri Jul 5 07:23:23 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032113 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 0460E138D for ; Fri, 5 Jul 2019 07:31:19 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id E7874289F2 for ; Fri, 5 Jul 2019 07:31:18 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id DAFE028A2A; Fri, 5 Jul 2019 07:31:18 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 7805F289F2 for ; Fri, 5 Jul 2019 07:31:18 +0000 (UTC) Received: from localhost ([::1]:50148 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIgf-0006HL-Jn for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:31:17 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44154) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIZc-0005Hj-9M for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:03 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIZZ-0005lW-Kl for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:23:59 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50528) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZW-0005h5-1g; Fri, 05 Jul 2019 03:23:54 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id 38D32C04E0; Fri, 5 Jul 2019 07:23:52 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:23 +0200 Message-Id: <20190705072333.17171-7-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 06/16] nvme: support completion queue in cmb X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP While not particularly useful, allow completion queues in the controller memory buffer. Could be useful for testing. Signed-off-by: Klaus Birkelund Jensen --- hw/block/nvme.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 3c392dc336a8..b31e5ff681bd 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -57,6 +57,16 @@ static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) } } +static void nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size) +{ + if (n->cmbsz && addr >= n->ctrl_mem.addr && + addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) { + memcpy((void *)&n->cmbuf[addr - n->ctrl_mem.addr], buf, size); + return; + } + pci_dma_write(&n->parent_obj, addr, buf, size); +} + static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) { return sqid < n->params.num_queues && n->sq[sqid] != NULL ? 0 : -1; @@ -276,6 +286,7 @@ static void nvme_post_cqes(void *opaque) QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) { NvmeSQueue *sq; + NvmeCqe *cqe = &req->cqe; hwaddr addr; if (nvme_cq_full(cq)) { @@ -289,8 +300,7 @@ static void nvme_post_cqes(void *opaque) req->cqe.sq_head = cpu_to_le16(sq->head); addr = cq->dma_addr + cq->tail * n->cqe_size; nvme_inc_cq_tail(cq); - pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe, - sizeof(req->cqe)); + nvme_addr_write(n, addr, (void *) cqe, sizeof(*cqe)); QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); } if (cq->tail != cq->head) { @@ -1399,7 +1409,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); From patchwork Fri Jul 5 07:23:24 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032111 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 56BA8138D for ; Fri, 5 Jul 2019 07:29:42 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 448E428A28 for ; Fri, 5 Jul 2019 07:29:42 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 3806F28A2E; Fri, 5 Jul 2019 07:29:42 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id C5C5B28A28 for ; Fri, 5 Jul 2019 07:29:41 +0000 (UTC) Received: from localhost ([::1]:50128 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIf7-0004CW-6c for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:29:41 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44153) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIZc-0005Hi-9Y for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:06 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIZZ-0005lv-NB for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:23:59 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50530) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZW-0005h8-41; Fri, 05 Jul 2019 03:23:54 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id 98055C0579; Fri, 5 Jul 2019 07:23:52 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:24 +0200 Message-Id: <20190705072333.17171-8-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 07/16] nvme: support Abort command X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP Required for compliance with NVMe revision 1.2.1. See NVM Express 1.2.1, Section 5.1 ("Abort command"). Extracted from Keith's qemu-nvme tree. Modified to only consider queued and not executing commands. Signed-off-by: Klaus Birkelund Jensen --- hw/block/nvme.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index b31e5ff681bd..4b9ff51868c0 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -38,6 +38,7 @@ #include "trace.h" #include "nvme.h" +#define NVME_OP_ABORTED 0xff #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ (trace_##trace)(__VA_ARGS__); \ @@ -848,6 +849,54 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) trace_nvme_err_invalid_setfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; } + + return NVME_SUCCESS; +} + +static uint16_t nvme_abort(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + NvmeSQueue *sq; + NvmeRequest *new; + uint32_t index = 0; + uint16_t sqid = cmd->cdw10 & 0xffff; + uint16_t cid = (cmd->cdw10 >> 16) & 0xffff; + + req->cqe.result = 1; + if (nvme_check_sqid(n, sqid)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + sq = n->sq[sqid]; + + /* only consider queued (and not executing) commands for abort */ + while ((sq->head + index) % sq->size != sq->tail) { + NvmeCmd abort_cmd; + hwaddr addr; + + addr = sq->dma_addr + ((sq->head + index) % sq->size) * n->sqe_size; + + nvme_addr_read(n, addr, (void *) &abort_cmd, sizeof(abort_cmd)); + if (abort_cmd.cid == cid) { + req->cqe.result = 0; + new = QTAILQ_FIRST(&sq->req_list); + QTAILQ_REMOVE(&sq->req_list, new, entry); + QTAILQ_INSERT_TAIL(&sq->out_req_list, new, entry); + + memset(&new->cqe, 0, sizeof(new->cqe)); + new->cqe.cid = cid; + new->status = NVME_CMD_ABORT_REQ; + + abort_cmd.opcode = NVME_OP_ABORTED; + nvme_addr_write(n, addr, (void *) &abort_cmd, sizeof(abort_cmd)); + + nvme_enqueue_req_completion(n->cq[sq->cqid], new); + + return NVME_SUCCESS; + } + + ++index; + } + return NVME_SUCCESS; } @@ -868,6 +917,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return nvme_set_feature(n, cmd, req); case NVME_ADM_CMD_GET_FEATURES: return nvme_get_feature(n, cmd, req); + case NVME_ADM_CMD_ABORT: + return nvme_abort(n, cmd, req); default: trace_nvme_err_invalid_admin_opc(cmd->opcode); return NVME_INVALID_OPCODE | NVME_DNR; @@ -890,6 +941,10 @@ static void nvme_process_sq(void *opaque) nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd)); nvme_inc_sq_head(sq); + if (cmd.opcode == NVME_OP_ABORTED) { + continue; + } + req = QTAILQ_FIRST(&sq->req_list); QTAILQ_REMOVE(&sq->req_list, req, entry); QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry); @@ -1376,6 +1431,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) id->ieee[2] = 0xb3; id->ver = cpu_to_le32(0x00010201); id->oacs = cpu_to_le16(0); + id->acl = 3; id->frmw = 7 << 1; id->sqes = (0x6 << 4) | 0x6; id->cqes = (0x4 << 4) | 0x4; From patchwork Fri Jul 5 07:23:25 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032125 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 7A615112C for ; Fri, 5 Jul 2019 07:37:18 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 675C328A09 for ; Fri, 5 Jul 2019 07:37:18 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 58B5E28A2E; Fri, 5 Jul 2019 07:37:18 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id B0C1928A09 for ; Fri, 5 Jul 2019 07:37:17 +0000 (UTC) Received: from localhost ([::1]:50208 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjImS-0003fZ-SJ for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:37:16 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44208) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIZr-0005R8-Ao for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:17 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIZn-0005tc-5v for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:15 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50532) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZW-0005hB-3J; Fri, 05 Jul 2019 03:23:54 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id D1365C0628; Fri, 5 Jul 2019 07:23:52 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:25 +0200 Message-Id: <20190705072333.17171-9-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 08/16] nvme: refactor device realization X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP Signed-off-by: Klaus Birkelund Jensen --- hw/block/nvme.c | 196 ++++++++++++++++++++++++++++++++++-------------- hw/block/nvme.h | 11 +++ 2 files changed, 152 insertions(+), 55 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 4b9ff51868c0..eb6af6508e2d 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -38,6 +38,7 @@ #include "trace.h" #include "nvme.h" +#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE #define NVME_OP_ABORTED 0xff #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ @@ -1365,66 +1366,105 @@ static const MemoryRegionOps nvme_cmb_ops = { }, }; -static void nvme_realize(PCIDevice *pci_dev, Error **errp) +static int nvme_check_constraints(NvmeCtrl *n, Error **errp) { - NvmeCtrl *n = NVME(pci_dev); - NvmeIdCtrl *id = &n->id_ctrl; - NvmeIdNs *id_ns = &n->namespace.id_ns; - - int64_t bs_size; - uint8_t *pci_conf; - - if (!n->params.num_queues) { - error_setg(errp, "num_queues can't be zero"); - return; - } + NvmeParams *params = &n->params; if (!n->conf.blk) { - error_setg(errp, "drive property not set"); - return; + error_setg(errp, "nvme: block backend not configured"); + return 1; } - bs_size = blk_getlength(n->conf.blk); - if (bs_size < 0) { - error_setg(errp, "could not get backing file size"); - return; + if (!params->serial) { + error_setg(errp, "nvme: serial not configured"); + return 1; } - if (!n->params.serial) { - error_setg(errp, "serial property not set"); - return; + if ((params->num_queues < 1 || params->num_queues > NVME_MAX_QS)) { + error_setg(errp, "nvme: invalid queue configuration"); + return 1; } + + return 0; +} + +static int nvme_init_blk(NvmeCtrl *n, Error **errp) +{ blkconf_blocksizes(&n->conf); if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), - false, errp)) { - return; + false, errp)) { + return 1; } - pci_conf = pci_dev->config; - pci_conf[PCI_INTERRUPT_PIN] = 1; - pci_config_set_prog_interface(pci_dev->config, 0x2); - pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS); - pcie_endpoint_cap_init(pci_dev, 0x80); + return 0; +} +static void nvme_init_state(NvmeCtrl *n) +{ n->num_namespaces = 1; n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4); - n->ns_size = bs_size / (uint64_t)n->num_namespaces; - n->sq = g_new0(NvmeSQueue *, n->params.num_queues); n->cq = g_new0(NvmeCQueue *, n->params.num_queues); +} - memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, - "nvme", n->reg_size); +static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) +{ + NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); + NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); + + NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); + NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); + + n->cmbloc = n->bar.cmbloc; + n->cmbsz = n->bar.cmbsz; + + n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); + memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, + "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); + pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); +} + +static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) +{ + uint8_t *pci_conf = pci_dev->config; + + pci_conf[PCI_INTERRUPT_PIN] = 1; + pci_config_set_prog_interface(pci_conf, 0x2); + pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL); + pci_config_set_device_id(pci_conf, 0x5845); + pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); + pcie_endpoint_cap_init(pci_dev, 0x80); + + memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", + n->reg_size); pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem); msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL); + if (n->params.cmb_size_mb) { + nvme_init_cmb(n, pci_dev); + } +} + +static void nvme_init_ctrl(NvmeCtrl *n) +{ + NvmeIdCtrl *id = &n->id_ctrl; + NvmeParams *params = &n->params; + uint8_t *pci_conf = n->parent_obj.config; + id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); - strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' '); + strpadcpy((char *)id->sn, sizeof(id->sn), params->serial, ' '); id->rab = 6; id->ieee[0] = 0x00; id->ieee[1] = 0x02; @@ -1458,36 +1498,82 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->bar.vs = 0x00010201; n->bar.intmc = n->bar.intms = 0; +} - if (n->params.cmb_size_mb) { +static uint64_t nvme_ns_calc_blks(NvmeCtrl *n, NvmeNamespace *ns) +{ + return n->ns_size / nvme_ns_lbads_bytes(ns); +} - NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); - NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); +static void nvme_ns_init_identify(NvmeCtrl *n, NvmeIdNs *id_ns) +{ + id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; + id_ns->ncap = id_ns->nuse = id_ns->nsze = + cpu_to_le64(n->ns_size >> + id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)].ds); +} - NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); - NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); +static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) +{ + uint64_t ns_blks; + NvmeIdNs *id_ns = &ns->id_ns; - n->cmbloc = n->bar.cmbloc; - n->cmbsz = n->bar.cmbsz; + nvme_ns_init_identify(n, id_ns); - n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); - memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, - "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); - pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), - PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | - PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); + ns_blks = nvme_ns_calc_blks(n, ns); + id_ns->nuse = id_ns->ncap = id_ns->nsze = cpu_to_le64(ns_blks); + return 0; +} + +static int nvme_init_namespaces(NvmeCtrl *n, Error **errp) +{ + int64_t bs_size; + Error *local_err = NULL; + NvmeNamespace *ns = &n->namespace; + + bs_size = blk_getlength(n->conf.blk); + if (bs_size < 0) { + error_setg_errno(errp, -bs_size, "blk_getlength"); + return 1; } - id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; - id_ns->ncap = id_ns->nuse = id_ns->nsze = - cpu_to_le64(n->ns_size >> - id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)].ds); + n->ns_size = bs_size / (uint64_t) n->num_namespaces; + + if (nvme_init_namespace(n, ns, &local_err)) { + error_propagate_prepend(errp, local_err, + "nvme_init_namespace: "); + return 1; + } + + return 0; +} + +static void nvme_realize(PCIDevice *pci_dev, Error **errp) +{ + NvmeCtrl *n = NVME(pci_dev); + Error *local_err = NULL; + + if (nvme_check_constraints(n, &local_err)) { + error_propagate_prepend(errp, local_err, "nvme_check_constraints: "); + return; + } + + nvme_init_state(n); + + if (nvme_init_blk(n, &local_err)) { + error_propagate_prepend(errp, local_err, "nvme_init_blk: "); + return; + } + + if (nvme_init_namespaces(n, &local_err)) { + error_propagate_prepend(errp, local_err, + "nvme_init_namespaces: "); + return; + } + + nvme_init_pci(n, pci_dev); + nvme_init_ctrl(n); } static void nvme_exit(PCIDevice *pci_dev) diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 77fe6fb46b71..bea622ea71e0 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -101,4 +101,15 @@ typedef struct NvmeCtrl { NvmeIdCtrl id_ctrl; } NvmeCtrl; +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) +{ + NvmeIdNs *id = &ns->id_ns; + return id->lbaf[NVME_ID_NS_FLBAS_INDEX(id->flbas)].ds; +} + +static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns) +{ + return 1 << nvme_ns_lbads(ns); +} + #endif /* HW_NVME_H */ From patchwork Fri Jul 5 07:23:26 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032117 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E2F09112C for ; Fri, 5 Jul 2019 07:31:58 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id CDB2528A28 for ; Fri, 5 Jul 2019 07:31:58 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id BE69E28A2E; Fri, 5 Jul 2019 07:31:58 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 211FB28A28 for ; Fri, 5 Jul 2019 07:31:58 +0000 (UTC) Received: from localhost ([::1]:50152 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIhJ-00072q-G1 for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:31:57 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44206) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIZr-0005R6-Ac for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:17 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIZn-0005te-5y for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:15 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50534) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZW-0005hD-3l; Fri, 05 Jul 2019 03:23:54 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id 26896C062D; Fri, 5 Jul 2019 07:23:53 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:26 +0200 Message-Id: <20190705072333.17171-10-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 09/16] nvme: support Asynchronous Event Request command X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP Required for compliance with NVMe revision 1.2.1. See NVM Express 1.2.1, Section 5.2 ("Asynchronous Event Request command"). Modified from Keith's qemu-nvme tree. Signed-off-by: Klaus Birkelund Jensen --- hw/block/nvme.c | 88 ++++++++++++++++++++++++++++++++++++++++++- hw/block/nvme.h | 7 ++++ hw/block/trace-events | 7 ++++ 3 files changed, 100 insertions(+), 2 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index eb6af6508e2d..a20576654f1b 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -39,6 +39,7 @@ #include "nvme.h" #define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE +#define NVME_AERL 3 #define NVME_OP_ABORTED 0xff #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ @@ -318,6 +319,51 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); } +static void nvme_process_aers(void *opaque) +{ + NvmeCtrl *n = opaque; + NvmeRequest *req; + NvmeAerResult *result; + NvmeAsyncEvent *event, *next; + + trace_nvme_process_aers(); + + QSIMPLEQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) { + /* can't post cqe if there is nothing to complete */ + if (!n->outstanding_aers) { + trace_nvme_no_outstanding_aers(); + break; + } + + /* ignore if masked (cqe posted, but event not cleared) */ + if (n->aer_mask & (1 << event->result.event_type)) { + trace_nvme_aer_masked(event->result.event_type, n->aer_mask); + continue; + } + + QSIMPLEQ_REMOVE_HEAD(&n->aer_queue, entry); + + n->aer_mask |= 1 << event->result.event_type; + n->aer_mask_queued &= ~(1 << event->result.event_type); + n->outstanding_aers--; + + req = n->aer_reqs[n->outstanding_aers]; + + result = (NvmeAerResult *) &req->cqe.result; + result->event_type = event->result.event_type; + result->event_info = event->result.event_info; + result->log_page = event->result.log_page; + g_free(event); + + req->status = NVME_SUCCESS; + + trace_nvme_aer_post_cqe(result->event_type, result->event_info, + result->log_page); + + nvme_enqueue_req_completion(&n->admin_cq, req); + } +} + static void nvme_rw_cb(void *opaque, int ret) { NvmeRequest *req = opaque; @@ -796,6 +842,8 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) break; case NVME_TIMESTAMP: return nvme_get_feature_timestamp(n, cmd); + case NVME_ASYNCHRONOUS_EVENT_CONF: + result = cpu_to_le32(n->features.async_config); break; default: trace_nvme_err_invalid_getfeat(dw10); @@ -841,11 +889,11 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) req->cqe.result = cpu_to_le32((n->params.num_queues - 2) | ((n->params.num_queues - 2) << 16)); break; - case NVME_TIMESTAMP: return nvme_set_feature_timestamp(n, cmd); + case NVME_ASYNCHRONOUS_EVENT_CONF: + n->features.async_config = dw11; break; - default: trace_nvme_err_invalid_setfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; @@ -854,6 +902,22 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_SUCCESS; } +static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + trace_nvme_aer(req->cqe.cid); + + if (n->outstanding_aers > NVME_AERL) { + trace_nvme_aer_aerl_exceeded(); + return NVME_AER_LIMIT_EXCEEDED; + } + + n->aer_reqs[n->outstanding_aers] = req; + timer_mod(n->aer_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + n->outstanding_aers++; + + return NVME_NO_COMPLETE; +} + static uint16_t nvme_abort(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { NvmeSQueue *sq; @@ -918,6 +982,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return nvme_set_feature(n, cmd, req); case NVME_ADM_CMD_GET_FEATURES: return nvme_get_feature(n, cmd, req); + case NVME_ADM_CMD_ASYNC_EV_REQ: + return nvme_aer(n, cmd, req); case NVME_ADM_CMD_ABORT: return nvme_abort(n, cmd, req); default: @@ -963,6 +1029,7 @@ static void nvme_process_sq(void *opaque) static void nvme_clear_ctrl(NvmeCtrl *n) { + NvmeAsyncEvent *event; int i; blk_drain(n->conf.blk); @@ -978,8 +1045,19 @@ static void nvme_clear_ctrl(NvmeCtrl *n) } } + if (n->aer_timer) { + timer_del(n->aer_timer); + timer_free(n->aer_timer); + n->aer_timer = NULL; + } + while ((event = QSIMPLEQ_FIRST(&n->aer_queue)) != NULL) { + QSIMPLEQ_REMOVE_HEAD(&n->aer_queue, entry); + g_free(event); + } + blk_flush(n->conf.blk); n->bar.cc = 0; + n->outstanding_aers = 0; } static int nvme_start_ctrl(NvmeCtrl *n) @@ -1074,6 +1152,9 @@ static int nvme_start_ctrl(NvmeCtrl *n) nvme_set_timestamp(n, 0ULL); + n->aer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_aers, n); + QSIMPLEQ_INIT(&n->aer_queue); + return 0; } @@ -1405,6 +1486,7 @@ static void nvme_init_state(NvmeCtrl *n) n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4); n->sq = g_new0(NvmeSQueue *, n->params.num_queues); n->cq = g_new0(NvmeCQueue *, n->params.num_queues); + n->aer_reqs = g_new0(NvmeRequest *, NVME_AERL + 1); } static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) @@ -1472,6 +1554,7 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->ver = cpu_to_le32(0x00010201); id->oacs = cpu_to_le16(0); id->acl = 3; + id->aerl = NVME_AERL; id->frmw = 7 << 1; id->sqes = (0x6 << 4) | 0x6; id->cqes = (0x4 << 4) | 0x4; @@ -1583,6 +1666,7 @@ static void nvme_exit(PCIDevice *pci_dev) nvme_clear_ctrl(n); g_free(n->cq); g_free(n->sq); + g_free(n->aer_reqs); if (n->params.cmb_size_mb) { g_free(n->cmbuf); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index bea622ea71e0..48d26108eef1 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -86,18 +86,25 @@ typedef struct NvmeCtrl { uint32_t num_namespaces; uint32_t max_q_ents; uint64_t ns_size; + uint8_t outstanding_aers; uint32_t cmbsz; uint32_t cmbloc; uint8_t *cmbuf; uint64_t irq_status; uint64_t host_timestamp; /* Timestamp sent by the host */ uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ + QEMUTimer *aer_timer; + uint8_t aer_mask; + uint8_t aer_mask_queued; + NvmeRequest **aer_reqs; + QSIMPLEQ_HEAD(, NvmeAsyncEvent) aer_queue; NvmeNamespace namespace; NvmeSQueue **sq; NvmeCQueue **cq; NvmeSQueue admin_sq; NvmeCQueue admin_cq; + NvmeFeatureVal features; NvmeIdCtrl id_ctrl; } NvmeCtrl; diff --git a/hw/block/trace-events b/hw/block/trace-events index 13d1b21dd4e4..5e3919752a90 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -46,6 +46,13 @@ nvme_getfeat_numq(int result) "get feature number of queues, result=%d" nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" +nvme_process_aers(void) "processing aers" +nvme_aer(uint16_t cid) "cid %"PRIu16"" +nvme_aer_aerl_exceeded(void) "aerl exceeded" +nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8"" +nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" From patchwork Fri Jul 5 07:23:27 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032109 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A75091510 for ; Fri, 5 Jul 2019 07:28:59 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 934FE28A28 for ; Fri, 5 Jul 2019 07:28:59 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 8494728A2E; Fri, 5 Jul 2019 07:28:59 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 999D528A28 for ; Fri, 5 Jul 2019 07:28:58 +0000 (UTC) Received: from localhost ([::1]:50126 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIeP-0003eQ-VP for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:28:57 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44207) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIZr-0005R7-Ao for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:18 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIZn-0005tf-5y for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:15 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50538) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZW-0005hX-Ck; Fri, 05 Jul 2019 03:23:54 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id 5F8A5C0637; Fri, 5 Jul 2019 07:23:53 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:27 +0200 Message-Id: <20190705072333.17171-11-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 10/16] nvme: support Get Log Page command X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP Add support for the Get Log Page command and stub/dumb implementations of the mandatory Error Information, SMART/Health Information and Firmware Slot Information log pages. Required for compliance with NVMe revision 1.2.1. See NVM Express 1.2.1, Section 5.10 ("Get Log Page command"). Signed-off-by: Klaus Birkelund Jensen --- hw/block/nvme.c | 209 ++++++++++++++++++++++++++++++++++++++++++ hw/block/nvme.h | 3 + hw/block/trace-events | 3 + include/block/nvme.h | 4 +- 4 files changed, 217 insertions(+), 2 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index a20576654f1b..93f5dff197e0 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -39,6 +39,8 @@ #include "nvme.h" #define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE +#define NVME_TEMPERATURE 0x143 +#define NVME_ELPE 3 #define NVME_AERL 3 #define NVME_OP_ABORTED 0xff #define NVME_GUEST_ERR(trace, fmt, ...) \ @@ -319,6 +321,36 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); } +static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type, + uint8_t event_info, uint8_t log_page) +{ + NvmeAsyncEvent *event; + + trace_nvme_enqueue_event(event_type, event_info, log_page); + + /* + * Do not enqueue the event if something of this type is already queued. + * This bounds the size of the event queue and makes sure it does not grow + * indefinitely when events are not processed by the host (i.e. does not + * issue any AERs). + */ + if (n->aer_mask_queued & (1 << event_type)) { + return; + } + n->aer_mask_queued |= (1 << event_type); + + event = g_new(NvmeAsyncEvent, 1); + event->result = (NvmeAerResult) { + .event_type = event_type, + .event_info = event_info, + .log_page = log_page, + }; + + QSIMPLEQ_INSERT_TAIL(&n->aer_queue, event, entry); + + timer_mod(n->aer_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); +} + static void nvme_process_aers(void *opaque) { NvmeCtrl *n = opaque; @@ -831,6 +863,10 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t result; switch (dw10) { + case NVME_TEMPERATURE_THRESHOLD: + result = cpu_to_le32(n->features.temp_thresh); + break; + case NVME_ERROR_RECOVERY: case NVME_VOLATILE_WRITE_CACHE: result = blk_enable_write_cache(n->conf.blk); trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); @@ -878,6 +914,13 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t dw11 = le32_to_cpu(cmd->cdw11); switch (dw10) { + case NVME_TEMPERATURE_THRESHOLD: + n->features.temp_thresh = dw11; + if (n->features.temp_thresh <= n->temperature) { + nvme_enqueue_event(n, NVME_AER_TYPE_SMART, + NVME_AER_INFO_SMART_TEMP_THRESH, NVME_LOG_SMART_INFO); + } + break; case NVME_VOLATILE_WRITE_CACHE: blk_set_enable_write_cache(n->conf.blk, dw11 & 1); break; @@ -902,6 +945,137 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_SUCCESS; } +static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type) +{ + n->aer_mask &= ~(1 << event_type); + if (!QSIMPLEQ_EMPTY(&n->aer_queue)) { + timer_mod(n->aer_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + } +} + +static uint16_t nvme_error_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, + uint32_t buf_len, uint64_t off, NvmeRequest *req) +{ + uint32_t trans_len; + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + + if (off > sizeof(*n->elpes) * (NVME_ELPE + 1)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trans_len = MIN(sizeof(*n->elpes) * (NVME_ELPE + 1) - off, buf_len); + + if (!rae) { + nvme_clear_events(n, NVME_AER_TYPE_ERROR); + } + + return nvme_dma_read_prp(n, (uint8_t *) n->elpes + off, trans_len, prp1, + prp2); +} + +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, + uint32_t buf_len, uint64_t off, NvmeRequest *req) +{ + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + + uint32_t trans_len; + time_t current_ms; + NvmeSmartLog smart; + + if (cmd->nsid != 0 && cmd->nsid != 0xffffffff) { + trace_nvme_err(req->cqe.cid, "smart log not supported for namespace", + NVME_INVALID_FIELD | NVME_DNR); + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (off > sizeof(smart)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trans_len = MIN(sizeof(smart) - off, buf_len); + + memset(&smart, 0x0, sizeof(smart)); + smart.number_of_error_log_entries[0] = cpu_to_le64(0); + smart.temperature[0] = n->temperature & 0xff; + smart.temperature[1] = (n->temperature >> 8) & 0xff; + + if (n->features.temp_thresh <= n->temperature) { + smart.critical_warning |= NVME_SMART_TEMPERATURE; + } + + current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + smart.power_on_hours[0] = cpu_to_le64( + (((current_ms - n->starttime_ms) / 1000) / 60) / 60); + + if (!rae) { + nvme_clear_events(n, NVME_AER_TYPE_SMART); + } + + return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1, + prp2); +} + +static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + uint32_t trans_len; + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + NvmeFwSlotInfoLog fw_log; + + if (off > sizeof(fw_log)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + memset(&fw_log, 0, sizeof(NvmeFwSlotInfoLog)); + + trans_len = MIN(sizeof(fw_log) - off, buf_len); + + return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1, + prp2); +} + +static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t dw11 = le32_to_cpu(cmd->cdw11); + uint32_t dw12 = le32_to_cpu(cmd->cdw12); + uint32_t dw13 = le32_to_cpu(cmd->cdw13); + uint16_t lid = dw10 & 0xff; + uint8_t rae = (dw10 >> 15) & 0x1; + uint32_t numdl, numdu, len; + uint64_t off, lpol, lpou; + + numdl = (dw10 >> 16); + numdu = (dw11 & 0xffff); + lpol = dw12; + lpou = dw13; + + len = (((numdu << 16) | numdl) + 1) << 2; + off = (lpou << 32ULL) | lpol; + + if (off & 0x3) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trace_nvme_get_log(req->cqe.cid, lid); + + switch (lid) { + case NVME_LOG_ERROR_INFO: + return nvme_error_log_info(n, cmd, rae, len, off, req); + case NVME_LOG_SMART_INFO: + return nvme_smart_info(n, cmd, rae, len, off, req); + case NVME_LOG_FW_SLOT_INFO: + return nvme_fw_log_info(n, cmd, len, off, req); + default: + trace_nvme_err_invalid_log_page(req->cqe.cid, lid); + return NVME_INVALID_LOG_ID | NVME_DNR; + } +} + static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { trace_nvme_aer(req->cqe.cid); @@ -982,6 +1156,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return nvme_set_feature(n, cmd, req); case NVME_ADM_CMD_GET_FEATURES: return nvme_get_feature(n, cmd, req); + case NVME_ADM_CMD_GET_LOG_PAGE: + return nvme_get_log(n, cmd, req); case NVME_ADM_CMD_ASYNC_EV_REQ: return nvme_aer(n, cmd, req); case NVME_ADM_CMD_ABORT: @@ -1347,6 +1523,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) "completion queue doorbell write" " for nonexistent queue," " sqid=%"PRIu32", ignoring", qid); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_REGISTER, + NVME_LOG_ERROR_INFO); + } + return; } @@ -1357,6 +1540,12 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) " beyond queue size, sqid=%"PRIu32"," " new_head=%"PRIu16", ignoring", qid, new_head); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO); + } + return; } @@ -1385,6 +1574,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) "submission queue doorbell write" " for nonexistent queue," " sqid=%"PRIu32", ignoring", qid); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_REGISTER, + NVME_LOG_ERROR_INFO); + } + return; } @@ -1395,6 +1591,12 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) " beyond queue size, sqid=%"PRIu32"," " new_tail=%"PRIu16", ignoring", qid, new_tail); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO); + } + return; } @@ -1484,9 +1686,13 @@ static void nvme_init_state(NvmeCtrl *n) { n->num_namespaces = 1; n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4); + n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); n->sq = g_new0(NvmeSQueue *, n->params.num_queues); n->cq = g_new0(NvmeCQueue *, n->params.num_queues); + n->elpes = g_new0(NvmeErrorLog, NVME_ELPE + 1); n->aer_reqs = g_new0(NvmeRequest *, NVME_AERL + 1); + n->temperature = NVME_TEMPERATURE; + n->features.temp_thresh = 0x14d; } static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) @@ -1556,6 +1762,8 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->acl = 3; id->aerl = NVME_AERL; id->frmw = 7 << 1; + id->lpa = 1 << 2; + id->elpe = NVME_ELPE; id->sqes = (0x6 << 4) | 0x6; id->cqes = (0x4 << 4) | 0x4; id->nn = cpu_to_le32(n->num_namespaces); @@ -1666,6 +1874,7 @@ static void nvme_exit(PCIDevice *pci_dev) nvme_clear_ctrl(n); g_free(n->cq); g_free(n->sq); + g_free(n->elpes); g_free(n->aer_reqs); if (n->params.cmb_size_mb) { diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 48d26108eef1..ed3fa3faa718 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -77,6 +77,8 @@ typedef struct NvmeCtrl { BlockConf conf; NvmeParams params; + uint64_t starttime_ms; + uint16_t temperature; uint32_t page_size; uint16_t page_bits; uint16_t max_prp_ents; @@ -99,6 +101,7 @@ typedef struct NvmeCtrl { NvmeRequest **aer_reqs; QSIMPLEQ_HEAD(, NvmeAsyncEvent) aer_queue; + NvmeErrorLog *elpes; NvmeNamespace namespace; NvmeSQueue **sq; NvmeCQueue **cq; diff --git a/hw/block/trace-events b/hw/block/trace-events index 5e3919752a90..ed666bbc94f2 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -46,6 +46,7 @@ nvme_getfeat_numq(int result) "get feature number of queues, result=%d" nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" +nvme_get_log(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16"" nvme_process_aers(void) "processing aers" nvme_aer(uint16_t cid) "cid %"PRIu16"" nvme_aer_aerl_exceeded(void) "aerl exceeded" @@ -67,6 +68,7 @@ nvme_mmio_shutdown_set(void) "shutdown bit set" nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" # nvme traces for error conditions +nvme_err(uint16_t cid, const char *s, uint16_t status) "cid %"PRIu16" \"%s\" status 0x%"PRIx16"" nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" @@ -92,6 +94,7 @@ nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion q nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" +nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16"" nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" diff --git a/include/block/nvme.h b/include/block/nvme.h index 1b0accd4fe2b..5a10b8b67468 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -386,8 +386,8 @@ enum NvmeAsyncEventRequest { NVME_AER_TYPE_SMART = 1, NVME_AER_TYPE_IO_SPECIFIC = 6, NVME_AER_TYPE_VENDOR_SPECIFIC = 7, - NVME_AER_INFO_ERR_INVALID_SQ = 0, - NVME_AER_INFO_ERR_INVALID_DB = 1, + NVME_AER_INFO_ERR_INVALID_DB_REGISTER = 0, + NVME_AER_INFO_ERR_INVALID_DB_VALUE = 1, NVME_AER_INFO_ERR_DIAG_FAIL = 2, NVME_AER_INFO_ERR_PERS_INTERNAL_ERR = 3, NVME_AER_INFO_ERR_TRANS_INTERNAL_ERR = 4, From patchwork Fri Jul 5 07:23:28 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032115 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E284013A4 for ; Fri, 5 Jul 2019 07:31:22 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id D2789289F2 for ; Fri, 5 Jul 2019 07:31:22 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id C65E228A2A; Fri, 5 Jul 2019 07:31:22 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 4CF7D289F2 for ; Fri, 5 Jul 2019 07:31:22 +0000 (UTC) Received: from localhost ([::1]:50150 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIgj-0006P9-IO for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:31:21 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44321) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIZy-0005ZM-E2 for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:26 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIZw-00060I-Sz for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:22 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50542) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZr-0005he-MG; Fri, 05 Jul 2019 03:24:16 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id 99C8DC063B; Fri, 5 Jul 2019 07:23:53 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:28 +0200 Message-Id: <20190705072333.17171-12-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 11/16] nvme: add missing mandatory Features X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP Add support for returning a resonable response to Get/Set Features of mandatory features. Signed-off-by: Klaus Birkelund Jensen --- hw/block/nvme.c | 49 ++++++++++++++++++++++++++++++++++++++++--- hw/block/trace-events | 2 ++ include/block/nvme.h | 3 ++- 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 93f5dff197e0..8259dd7c1d6c 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -860,13 +860,24 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd) static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t dw11 = le32_to_cpu(cmd->cdw11); uint32_t result; + trace_nvme_getfeat(dw10); + switch (dw10) { + case NVME_ARBITRATION: + result = cpu_to_le32(n->features.arbitration); + break; + case NVME_POWER_MANAGEMENT: + result = cpu_to_le32(n->features.power_mgmt); + break; case NVME_TEMPERATURE_THRESHOLD: result = cpu_to_le32(n->features.temp_thresh); break; case NVME_ERROR_RECOVERY: + result = cpu_to_le32(n->features.err_rec); + break; case NVME_VOLATILE_WRITE_CACHE: result = blk_enable_write_cache(n->conf.blk); trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); @@ -878,6 +889,19 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) break; case NVME_TIMESTAMP: return nvme_get_feature_timestamp(n, cmd); + case NVME_INTERRUPT_COALESCING: + result = cpu_to_le32(n->features.int_coalescing); + break; + case NVME_INTERRUPT_VECTOR_CONF: + if ((dw11 & 0xffff) > n->params.num_queues) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + result = cpu_to_le32(n->features.int_vector_config[dw11 & 0xffff]); + break; + case NVME_WRITE_ATOMICITY: + result = cpu_to_le32(n->features.write_atomicity); + break; case NVME_ASYNCHRONOUS_EVENT_CONF: result = cpu_to_le32(n->features.async_config); break; @@ -913,6 +937,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t dw10 = le32_to_cpu(cmd->cdw10); uint32_t dw11 = le32_to_cpu(cmd->cdw11); + trace_nvme_setfeat(dw10, dw11); + switch (dw10) { case NVME_TEMPERATURE_THRESHOLD: n->features.temp_thresh = dw11; @@ -937,6 +963,13 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) case NVME_ASYNCHRONOUS_EVENT_CONF: n->features.async_config = dw11; break; + case NVME_ARBITRATION: + case NVME_POWER_MANAGEMENT: + case NVME_ERROR_RECOVERY: + case NVME_INTERRUPT_COALESCING: + case NVME_INTERRUPT_VECTOR_CONF: + case NVME_WRITE_ATOMICITY: + return NVME_FEAT_NOT_CHANGABLE | NVME_DNR; default: trace_nvme_err_invalid_setfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; @@ -1693,6 +1726,14 @@ static void nvme_init_state(NvmeCtrl *n) n->aer_reqs = g_new0(NvmeRequest *, NVME_AERL + 1); n->temperature = NVME_TEMPERATURE; n->features.temp_thresh = 0x14d; + n->features.int_vector_config = g_malloc0_n(n->params.num_queues, + sizeof(*n->features.int_vector_config)); + + /* disable coalescing (not supported) */ + for (int i = 0; i < n->params.num_queues; i++) { + n->features.int_vector_config[i] = i | (1 << 16); + } + } static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) @@ -1769,6 +1810,10 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->nn = cpu_to_le32(n->num_namespaces); id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP); + if (blk_enable_write_cache(n->conf.blk)) { + id->vwc = 1; + } + strcpy((char *) id->subnqn, "nqn.2014-08.org.nvmexpress:uuid:"); qemu_uuid_unparse(&qemu_uuid, (char *) id->subnqn + strlen((char *) id->subnqn)); @@ -1776,9 +1821,6 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->psd[0].mp = cpu_to_le16(0x9c4); id->psd[0].enlat = cpu_to_le32(0x10); id->psd[0].exlat = cpu_to_le32(0x4); - if (blk_enable_write_cache(n->conf.blk)) { - id->vwc = 1; - } n->bar.cap = 0; NVME_CAP_SET_MQES(n->bar.cap, 0x7ff); @@ -1876,6 +1918,7 @@ static void nvme_exit(PCIDevice *pci_dev) g_free(n->sq); g_free(n->elpes); g_free(n->aer_reqs); + g_free(n->features.int_vector_config); if (n->params.cmb_size_mb) { g_free(n->cmbuf); diff --git a/hw/block/trace-events b/hw/block/trace-events index ed666bbc94f2..17485bb0375b 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -41,6 +41,8 @@ nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16"" nvme_identify_ctrl(void) "identify controller" nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" +nvme_getfeat(uint32_t fid) "fid 0x%"PRIx32"" +nvme_setfeat(uint32_t fid, uint32_t val) "fid 0x%"PRIx32" val 0x%"PRIx32"" nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" nvme_getfeat_numq(int result) "get feature number of queues, result=%d" nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" diff --git a/include/block/nvme.h b/include/block/nvme.h index 5a10b8b67468..d24b1f28e0fc 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -445,7 +445,8 @@ enum NvmeStatusCodes { NVME_FW_REQ_RESET = 0x010b, NVME_INVALID_QUEUE_DEL = 0x010c, NVME_FID_NOT_SAVEABLE = 0x010d, - NVME_FID_NOT_NSID_SPEC = 0x010f, + NVME_FEAT_NOT_CHANGABLE = 0x010e, + NVME_FEAT_NOT_NSID_SPEC = 0x010f, NVME_FW_REQ_SUSYSTEM_RESET = 0x0110, NVME_CONFLICTING_ATTRS = 0x0180, NVME_INVALID_PROT_INFO = 0x0181, From patchwork Fri Jul 5 07:23:29 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032091 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 085241510 for ; Fri, 5 Jul 2019 07:26:27 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id E6FB928A28 for ; Fri, 5 Jul 2019 07:26:26 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id DAC8F28A2E; Fri, 5 Jul 2019 07:26:26 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 2AE7E28A28 for ; Fri, 5 Jul 2019 07:26:26 +0000 (UTC) Received: from localhost ([::1]:50102 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIbx-0000QK-HY for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:26:25 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44352) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIa1-0005aE-2D for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:28 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIZy-000622-VE for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:25 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50544) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZr-0005hm-N3; Fri, 05 Jul 2019 03:24:16 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id CE5ACC063F; Fri, 5 Jul 2019 07:23:53 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:29 +0200 Message-Id: <20190705072333.17171-13-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 12/16] nvme: bump supported NVMe revision to 1.3d X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP Add the new Namespace Identification Descriptor List (CNS 03h) and track creation of queues to enable the controller to return Command Sequence Error if Set Features is called for Number of Queues after any queues have been created. Signed-off-by: Klaus Birkelund Jensen --- hw/block/nvme.c | 84 ++++++++++++++++++++++++++++++++++++------- hw/block/nvme.h | 1 + hw/block/trace-events | 4 ++- include/block/nvme.h | 30 +++++++++++++--- 4 files changed, 102 insertions(+), 17 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 8259dd7c1d6c..8ad95fdfa261 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -9,20 +9,22 @@ */ /** - * Reference Specs: http://www.nvmexpress.org, 1.2, 1.1, 1.0e + * Reference Specs: http://www.nvmexpress.org, 1.3d, 1.2, 1.1, 1.0e * * http://www.nvmexpress.org/resources/ */ /** * Usage: add options: - * -drive file=,if=none,id= - * -device nvme,drive=,serial=,id=, \ - * cmb_size_mb=, \ - * num_queues= + * -drive file=,if=none,id= + * -device nvme,drive=,serial=,id= * - * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at - * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. + * Advanced optional options: + * + * num_queues= : Maximum number of IO Queues. + * Default: 64 + * cmb_size_mb= : Size of Controller Memory Buffer in MBs. + * Default: 0 (disabled) */ #include "qemu/osdep.h" @@ -43,6 +45,7 @@ #define NVME_ELPE 3 #define NVME_AERL 3 #define NVME_OP_ABORTED 0xff + #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ (trace_##trace)(__VA_ARGS__); \ @@ -316,6 +319,8 @@ static void nvme_post_cqes(void *opaque) static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) { assert(cq->cqid == req->sq->cqid); + + trace_nvme_enqueue_req_completion(req->cqe.cid, cq->cqid); QTAILQ_REMOVE(&req->sq->out_req_list, req, entry); QTAILQ_INSERT_TAIL(&cq->req_list, req, entry); timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); @@ -534,6 +539,7 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n) if (sq->sqid) { g_free(sq); } + n->qs_created--; } static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) @@ -600,6 +606,7 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, cq = n->cq[cqid]; QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry); n->sq[sqid] = sq; + n->qs_created++; } static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) @@ -649,6 +656,7 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) if (cq->cqid) { g_free(cq); } + n->qs_created--; } static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) @@ -689,6 +697,7 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, msix_vector_use(&n->parent_obj, cq->vector); n->cq[cqid] = cq; cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq); + n->qs_created++; } static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) @@ -762,7 +771,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) prp1, prp2); } -static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) +static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c) { static const int data_len = 4 * KiB; uint32_t min_nsid = le32_to_cpu(c->nsid); @@ -772,7 +781,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) uint16_t ret; int i, j = 0; - trace_nvme_identify_nslist(min_nsid); + trace_nvme_identify_ns_list(min_nsid); list = g_malloc0(data_len); for (i = 0; i < n->num_namespaces; i++) { @@ -789,6 +798,47 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) return ret; } +static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *c) +{ + static const int data_len = 4 * KiB; + + /* + * The device model does not have anywhere to store a persistent UUID, so + * conjure up something that is reproducible. We generate an UUID of the + * form "00000000-0000-0000-0000-", where nsid is similar to, say, + * 000000000001. + */ + struct ns_descr { + uint8_t nidt; + uint8_t nidl; + uint8_t rsvd[14]; + uint32_t nid; + }; + + uint32_t nsid = le32_to_cpu(c->nsid); + uint64_t prp1 = le64_to_cpu(c->prp1); + uint64_t prp2 = le64_to_cpu(c->prp2); + + struct ns_descr *list; + uint16_t ret; + + trace_nvme_identify_ns_descriptor_list(nsid); + + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); + return NVME_INVALID_NSID | NVME_DNR; + } + + list = g_malloc0(data_len); + list->nidt = 0x3; + list->nidl = 0x10; + list->nid = cpu_to_be32(nsid); + + ret = nvme_dma_read_prp(n, (uint8_t *) list, data_len, prp1, prp2); + g_free(list); + return ret; +} + static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) { NvmeIdentify *c = (NvmeIdentify *)cmd; @@ -799,7 +849,9 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) case 0x01: return nvme_identify_ctrl(n, c); case 0x02: - return nvme_identify_nslist(n, c); + return nvme_identify_ns_list(n, c); + case 0x03: + return nvme_identify_ns_descriptor_list(n, cmd); default: trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); return NVME_INVALID_FIELD | NVME_DNR; @@ -951,6 +1003,14 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) blk_set_enable_write_cache(n->conf.blk, dw11 & 1); break; case NVME_NUMBER_OF_QUEUES: + if (n->qs_created > 2) { + return NVME_CMD_SEQ_ERROR | NVME_DNR; + } + + if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) { + return NVME_INVALID_FIELD | NVME_DNR; + } + trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, ((dw11 >> 16) & 0xFFFF) + 1, n->params.num_queues - 1, @@ -1798,7 +1858,7 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->ieee[0] = 0x00; id->ieee[1] = 0x02; id->ieee[2] = 0xb3; - id->ver = cpu_to_le32(0x00010201); + id->ver = cpu_to_le32(0x00010300); id->oacs = cpu_to_le16(0); id->acl = 3; id->aerl = NVME_AERL; @@ -1829,7 +1889,7 @@ static void nvme_init_ctrl(NvmeCtrl *n) NVME_CAP_SET_CSS(n->bar.cap, 1); NVME_CAP_SET_MPSMAX(n->bar.cap, 4); - n->bar.vs = 0x00010201; + n->bar.vs = 0x00010300; n->bar.intmc = n->bar.intms = 0; } diff --git a/hw/block/nvme.h b/hw/block/nvme.h index ed3fa3faa718..a502a3dbbbfd 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -95,6 +95,7 @@ typedef struct NvmeCtrl { uint64_t irq_status; uint64_t host_timestamp; /* Timestamp sent by the host */ uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ + uint32_t qs_created; QEMUTimer *aer_timer; uint8_t aer_mask; uint8_t aer_mask_queued; diff --git a/hw/block/trace-events b/hw/block/trace-events index 17485bb0375b..66f6c2c07d20 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -40,7 +40,8 @@ nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16"" nvme_identify_ctrl(void) "identify controller" nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" -nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" +nvme_identify_ns_list(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" +nvme_identify_ns_descriptor_list(uint16_t ns) "identify namespace descriptor list, nsid=%"PRIu16"" nvme_getfeat(uint32_t fid) "fid 0x%"PRIx32"" nvme_setfeat(uint32_t fid, uint32_t val) "fid 0x%"PRIx32" val 0x%"PRIx32"" nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" @@ -54,6 +55,7 @@ nvme_aer(uint16_t cid) "cid %"PRIu16"" nvme_aer_aerl_exceeded(void) "aerl exceeded" nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8"" nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid) "cid %"PRIu16" cqid %"PRIu16"" nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" diff --git a/include/block/nvme.h b/include/block/nvme.h index d24b1f28e0fc..30f1d8b00fc5 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -550,7 +550,9 @@ typedef struct NvmeIdCtrl { uint32_t rtd3e; uint32_t oaes; uint32_t ctratt; - uint8_t rsvd255[156]; + uint8_t rsvd111[12]; + uint8_t fguid[16]; + uint8_t rsvd255[128]; uint16_t oacs; uint8_t acl; uint8_t aerl; @@ -568,9 +570,15 @@ typedef struct NvmeIdCtrl { uint8_t tnvmcap[16]; uint8_t unvmcap[16]; uint32_t rpmbs; - uint8_t rsvd319[4]; + uint16_t edstt; + uint8_t dsto; + uint8_t fwug; uint16_t kas; - uint8_t rsvd511[190]; + uint16_t hctma; + uint16_t mntmt; + uint16_t mxtmt; + uint32_t sanicap; + uint8_t rsvd511[180]; uint8_t sqes; uint8_t cqes; uint16_t maxcmd; @@ -678,7 +686,21 @@ typedef struct NvmeIdNs { uint8_t mc; uint8_t dpc; uint8_t dps; - uint8_t res30[98]; + uint8_t nmic; + uint8_t rescap; + uint8_t fpi; + uint8_t dlfeat; + uint16_t nawun; + uint16_t nawupf; + uint16_t nacwu; + uint16_t nabsn; + uint16_t nabo; + uint16_t nabspf; + uint16_t noiob; + uint8_t nvmcap[16]; + uint8_t resv103[40]; + uint8_t nguid[16]; + uint64_t eui64; NvmeLBAF lbaf[16]; uint8_t res192[192]; uint8_t vs[3712]; From patchwork Fri Jul 5 07:23:30 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032127 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DB7D9138D for ; Fri, 5 Jul 2019 07:38:59 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id B3AA328A09 for ; Fri, 5 Jul 2019 07:38:59 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 8EEC828A2E; Fri, 5 Jul 2019 07:38:59 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 4DB1C28A09 for ; Fri, 5 Jul 2019 07:38:58 +0000 (UTC) Received: from localhost ([::1]:50240 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIo5-0005Bw-M6 for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:38:57 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44405) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIaA-0005g2-E2 for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:40 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIa4-00066d-Pp for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:32 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50546) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZr-0005iS-NW; Fri, 05 Jul 2019 03:24:16 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id 14D02C0647; Fri, 5 Jul 2019 07:23:54 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:30 +0200 Message-Id: <20190705072333.17171-14-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 13/16] nvme: simplify dma/cmb mappings X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP Instead of handling both QSGs and IOVs in multiple places, simply use QSGs everywhere by assuming that the request does not involve the controller memory buffer (CMB). If the request is found to involve the CMB, convert the QSG to an IOV and issue the I/O. The QSG is converted to an IOV by the dma helpers anyway, so the CMB path is not unfairly affected by this simplifying change. As a side-effect, this patch also allows PRPs to be located in the CMB. The logic ensures that if some of the PRP is in the CMB, all of it must be located there. Signed-off-by: Klaus Birkelund Jensen --- hw/block/nvme.c | 277 ++++++++++++++++++++++++++++-------------- hw/block/nvme.h | 3 +- hw/block/trace-events | 1 + include/block/nvme.h | 1 + 4 files changed, 187 insertions(+), 95 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 8ad95fdfa261..02888dbfdbc1 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -55,14 +55,21 @@ static void nvme_process_sq(void *opaque); +static inline uint8_t nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) +{ + return n->cmbsz && addr >= n->ctrl_mem.addr && + addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)); +} + static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) { - if (n->cmbsz && addr >= n->ctrl_mem.addr && - addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) { + if (nvme_addr_is_cmb(n, addr)) { memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size); - } else { - pci_dma_read(&n->parent_obj, addr, buf, size); + + return; } + + pci_dma_read(&n->parent_obj, addr, buf, size); } static void nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size) @@ -151,139 +158,200 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) } } -static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, - uint64_t prp2, uint32_t len, NvmeCtrl *n) +static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, uint64_t prp1, + uint64_t prp2, uint32_t len, NvmeRequest *req) { hwaddr trans_len = n->page_size - (prp1 % n->page_size); trans_len = MIN(len, trans_len); int num_prps = (len >> n->page_bits) + 1; + uint16_t status = NVME_SUCCESS; + bool prp_list_in_cmb = false; + + trace_nvme_map_prp(req->cmd.opcode, trans_len, len, prp1, prp2, num_prps); if (unlikely(!prp1)) { trace_nvme_err_invalid_prp(); return NVME_INVALID_FIELD | NVME_DNR; - } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr && - prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { - qsg->nsg = 0; - qemu_iovec_init(iov, num_prps); - qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len); - } else { - pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); - qemu_sglist_add(qsg, prp1, trans_len); } + + if (nvme_addr_is_cmb(n, prp1)) { + req->is_cmb = true; + } + + pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); + qemu_sglist_add(qsg, prp1, trans_len); + len -= trans_len; if (len) { if (unlikely(!prp2)) { trace_nvme_err_invalid_prp2_missing(); + status = NVME_INVALID_FIELD | NVME_DNR; goto unmap; } + if (len > n->page_size) { uint64_t prp_list[n->max_prp_ents]; uint32_t nents, prp_trans; int i = 0; + if (nvme_addr_is_cmb(n, prp2)) { + prp_list_in_cmb = true; + } + nents = (len + n->page_size - 1) >> n->page_bits; prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); - nvme_addr_read(n, prp2, (void *)prp_list, prp_trans); + nvme_addr_read(n, prp2, (void *) prp_list, prp_trans); while (len != 0) { + bool addr_is_cmb; uint64_t prp_ent = le64_to_cpu(prp_list[i]); if (i == n->max_prp_ents - 1 && len > n->page_size) { if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { trace_nvme_err_invalid_prplist_ent(prp_ent); + status = NVME_INVALID_FIELD | NVME_DNR; + goto unmap; + } + + addr_is_cmb = nvme_addr_is_cmb(n, prp_ent); + if ((prp_list_in_cmb && !addr_is_cmb) || + (!prp_list_in_cmb && addr_is_cmb)) { + status = NVME_INVALID_USE_OF_CMB | NVME_DNR; goto unmap; } i = 0; nents = (len + n->page_size - 1) >> n->page_bits; prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); - nvme_addr_read(n, prp_ent, (void *)prp_list, - prp_trans); + nvme_addr_read(n, prp_ent, (void *) prp_list, prp_trans); prp_ent = le64_to_cpu(prp_list[i]); } if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { trace_nvme_err_invalid_prplist_ent(prp_ent); + status = NVME_INVALID_FIELD | NVME_DNR; goto unmap; } - trans_len = MIN(len, n->page_size); - if (qsg->nsg){ - qemu_sglist_add(qsg, prp_ent, trans_len); - } else { - qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len); + addr_is_cmb = nvme_addr_is_cmb(n, prp_ent); + if ((req->is_cmb && !addr_is_cmb) || + (!req->is_cmb && addr_is_cmb)) { + status = NVME_INVALID_USE_OF_CMB | NVME_DNR; + goto unmap; } + + trans_len = MIN(len, n->page_size); + qemu_sglist_add(qsg, prp_ent, trans_len); + len -= trans_len; i++; } } else { + bool addr_is_cmb = nvme_addr_is_cmb(n, prp2); + if ((req->is_cmb && !addr_is_cmb) || + (!req->is_cmb && addr_is_cmb)) { + status = NVME_INVALID_USE_OF_CMB | NVME_DNR; + goto unmap; + } + if (unlikely(prp2 & (n->page_size - 1))) { trace_nvme_err_invalid_prp2_align(prp2); + status = NVME_INVALID_FIELD | NVME_DNR; goto unmap; } - if (qsg->nsg) { - qemu_sglist_add(qsg, prp2, len); - } else { - qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len); - } + + qemu_sglist_add(qsg, prp2, len); } } + return NVME_SUCCESS; - unmap: +unmap: qemu_sglist_destroy(qsg); - return NVME_INVALID_FIELD | NVME_DNR; + + return status; +} + +static void dma_to_cmb(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov) +{ + for (int i = 0; i < qsg->nsg; i++) { + void *addr = &n->cmbuf[qsg->sg[i].base - n->ctrl_mem.addr]; + qemu_iovec_add(iov, addr, qsg->sg[i].len); + } } static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - uint64_t prp1, uint64_t prp2) + uint64_t prp1, uint64_t prp2, NvmeRequest *req) { QEMUSGList qsg; - QEMUIOVector iov; - uint16_t status = NVME_SUCCESS; + uint16_t err = NVME_SUCCESS; - if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { - return NVME_INVALID_FIELD | NVME_DNR; + err = nvme_map_prp(n, &qsg, prp1, prp2, len, req); + if (err) { + return err; } - if (qsg.nsg > 0) { - if (dma_buf_write(ptr, len, &qsg)) { - status = NVME_INVALID_FIELD | NVME_DNR; - } - qemu_sglist_destroy(&qsg); - } else { - if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) { - status = NVME_INVALID_FIELD | NVME_DNR; + + if (req->is_cmb) { + QEMUIOVector iov; + + qemu_iovec_init(&iov, qsg.nsg); + dma_to_cmb(n, &qsg, &iov); + + if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; } + qemu_iovec_destroy(&iov); + + return err; } - return status; + + if (unlikely(dma_buf_write(ptr, len, &qsg))) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_sglist_destroy(&qsg); + + return err; } static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - uint64_t prp1, uint64_t prp2) + uint64_t prp1, uint64_t prp2, NvmeRequest *req) { QEMUSGList qsg; - QEMUIOVector iov; - uint16_t status = NVME_SUCCESS; - - trace_nvme_dma_read(prp1, prp2); + uint16_t err = NVME_SUCCESS; - if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { - return NVME_INVALID_FIELD | NVME_DNR; + err = nvme_map_prp(n, &qsg, prp1, prp2, len, req); + if (err) { + return err; } - if (qsg.nsg > 0) { - if (unlikely(dma_buf_read(ptr, len, &qsg))) { - trace_nvme_err_invalid_dma(); - status = NVME_INVALID_FIELD | NVME_DNR; - } - qemu_sglist_destroy(&qsg); - } else { + + if (req->is_cmb) { + QEMUIOVector iov; + + qemu_iovec_init(&iov, qsg.nsg); + dma_to_cmb(n, &qsg, &iov); + if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) { trace_nvme_err_invalid_dma(); - status = NVME_INVALID_FIELD | NVME_DNR; + err = NVME_INVALID_FIELD | NVME_DNR; } + qemu_iovec_destroy(&iov); + + goto out; } - return status; + + if (unlikely(dma_buf_read(ptr, len, &qsg))) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; + } + +out: + qemu_sglist_destroy(&qsg); + + return err; } static void nvme_post_cqes(void *opaque) @@ -415,16 +483,20 @@ static void nvme_rw_cb(void *opaque, int ret) block_acct_failed(blk_get_stats(n->conf.blk), &req->acct); req->status = NVME_INTERNAL_DEV_ERROR; } - if (req->has_sg) { + + if (req->qsg.nalloc) { qemu_sglist_destroy(&req->qsg); } + if (req->iov.nalloc) { + qemu_iovec_destroy(&req->iov); + } + nvme_enqueue_req_completion(cq, req); } static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, NvmeRequest *req) { - req->has_sg = false; block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0, BLOCK_ACCT_FLUSH); req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req); @@ -448,7 +520,6 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, return NVME_LBA_RANGE | NVME_DNR; } - req->has_sg = false; block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0, BLOCK_ACCT_WRITE); req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count, @@ -480,21 +551,21 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, return NVME_LBA_RANGE | NVME_DNR; } - if (nvme_map_prp(&req->qsg, &req->iov, prp1, prp2, data_size, n)) { + if (nvme_map_prp(n, &req->qsg, prp1, prp2, data_size, req)) { block_acct_invalid(blk_get_stats(n->conf.blk), acct); return NVME_INVALID_FIELD | NVME_DNR; } dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct); - if (req->qsg.nsg > 0) { - req->has_sg = true; + if (!req->is_cmb) { req->aiocb = is_write ? dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, nvme_rw_cb, req) : dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, nvme_rw_cb, req); } else { - req->has_sg = false; + qemu_iovec_init(&req->iov, req->qsg.nsg); + dma_to_cmb(n, &req->qsg, &req->iov); req->aiocb = is_write ? blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb, req) : @@ -592,7 +663,7 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, sq->size = size; sq->cqid = cqid; sq->head = sq->tail = 0; - sq->io_req = g_new(NvmeRequest, sq->size); + sq->io_req = g_new0(NvmeRequest, sq->size); QTAILQ_INIT(&sq->req_list); QTAILQ_INIT(&sq->out_req_list); @@ -740,7 +811,8 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) return NVME_SUCCESS; } -static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c) +static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c, + NvmeRequest *req) { uint64_t prp1 = le64_to_cpu(c->prp1); uint64_t prp2 = le64_to_cpu(c->prp2); @@ -748,10 +820,11 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c) trace_nvme_identify_ctrl(); return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), - prp1, prp2); + prp1, prp2, req); } -static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) +static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c, + NvmeRequest *req) { NvmeNamespace *ns; uint32_t nsid = le32_to_cpu(c->nsid); @@ -768,10 +841,11 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) ns = &n->namespace; return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), - prp1, prp2); + prp1, prp2, req); } -static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c) +static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c, + NvmeRequest *req) { static const int data_len = 4 * KiB; uint32_t min_nsid = le32_to_cpu(c->nsid); @@ -793,12 +867,13 @@ static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c) break; } } - ret = nvme_dma_read_prp(n, (uint8_t *)list, data_len, prp1, prp2); + ret = nvme_dma_read_prp(n, (uint8_t *)list, data_len, prp1, prp2, req); g_free(list); return ret; } -static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *c) +static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *c, + NvmeRequest *req) { static const int data_len = 4 * KiB; @@ -834,24 +909,24 @@ static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *c) list->nidl = 0x10; list->nid = cpu_to_be32(nsid); - ret = nvme_dma_read_prp(n, (uint8_t *) list, data_len, prp1, prp2); + ret = nvme_dma_read_prp(n, (uint8_t *) list, data_len, prp1, prp2, req); g_free(list); return ret; } -static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) +static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { NvmeIdentify *c = (NvmeIdentify *)cmd; switch (le32_to_cpu(c->cns)) { case 0x00: - return nvme_identify_ns(n, c); + return nvme_identify_ns(n, c, req); case 0x01: - return nvme_identify_ctrl(n, c); + return nvme_identify_ctrl(n, c, req); case 0x02: - return nvme_identify_ns_list(n, c); + return nvme_identify_ns_list(n, c, req); case 0x03: - return nvme_identify_ns_descriptor_list(n, cmd); + return nvme_identify_ns_descriptor_list(n, cmd, req); default: trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); return NVME_INVALID_FIELD | NVME_DNR; @@ -898,15 +973,16 @@ static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n) return cpu_to_le64(ts.all); } -static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd) +static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, + NvmeRequest *req) { uint64_t prp1 = le64_to_cpu(cmd->prp1); uint64_t prp2 = le64_to_cpu(cmd->prp2); uint64_t timestamp = nvme_get_timestamp(n); - return nvme_dma_read_prp(n, (uint8_t *)×tamp, - sizeof(timestamp), prp1, prp2); + return nvme_dma_read_prp(n, (uint8_t *)×tamp, sizeof(timestamp), + prp1, prp2, req); } static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) @@ -940,7 +1016,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) trace_nvme_getfeat_numq(result); break; case NVME_TIMESTAMP: - return nvme_get_feature_timestamp(n, cmd); + return nvme_get_feature_timestamp(n, cmd, req); case NVME_INTERRUPT_COALESCING: result = cpu_to_le32(n->features.int_coalescing); break; @@ -966,7 +1042,8 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_SUCCESS; } -static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd) +static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, + NvmeRequest *req) { uint16_t ret; uint64_t timestamp; @@ -974,7 +1051,7 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd) uint64_t prp2 = le64_to_cpu(cmd->prp2); ret = nvme_dma_write_prp(n, (uint8_t *)×tamp, - sizeof(timestamp), prp1, prp2); + sizeof(timestamp), prp1, prp2, req); if (ret != NVME_SUCCESS) { return ret; } @@ -1019,7 +1096,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) ((n->params.num_queues - 2) << 16)); break; case NVME_TIMESTAMP: - return nvme_set_feature_timestamp(n, cmd); + return nvme_set_feature_timestamp(n, cmd, req); case NVME_ASYNCHRONOUS_EVENT_CONF: n->features.async_config = dw11; break; @@ -1065,7 +1142,7 @@ static uint16_t nvme_error_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, } return nvme_dma_read_prp(n, (uint8_t *) n->elpes + off, trans_len, prp1, - prp2); + prp2, req); } static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, @@ -1108,7 +1185,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, } return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1, - prp2); + prp2, req); } static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, @@ -1128,9 +1205,10 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, trans_len = MIN(sizeof(fw_log) - off, buf_len); return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1, - prp2); + prp2, req); } + static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { uint32_t dw10 = le32_to_cpu(cmd->cdw10); @@ -1244,7 +1322,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) case NVME_ADM_CMD_CREATE_CQ: return nvme_create_cq(n, cmd); case NVME_ADM_CMD_IDENTIFY: - return nvme_identify(n, cmd); + return nvme_identify(n, cmd, req); case NVME_ADM_CMD_SET_FEATURES: return nvme_set_feature(n, cmd, req); case NVME_ADM_CMD_GET_FEATURES: @@ -1261,6 +1339,17 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) } } +static void nvme_init_req(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + memset(&req->cqe, 0, sizeof(req->cqe)); + req->cqe.cid = le16_to_cpu(cmd->cid); + + memcpy(&req->cmd, cmd, sizeof(NvmeCmd)); + req->is_cmb = false; + + req->status = NVME_SUCCESS; +} + static void nvme_process_sq(void *opaque) { NvmeSQueue *sq = opaque; @@ -1284,8 +1373,8 @@ static void nvme_process_sq(void *opaque) req = QTAILQ_FIRST(&sq->req_list); QTAILQ_REMOVE(&sq->req_list, req, entry); QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry); - memset(&req->cqe, 0, sizeof(req->cqe)); - req->cqe.cid = cmd.cid; + + nvme_init_req(n, &cmd, req); status = sq->sqid ? nvme_io_cmd(n, &cmd, req) : nvme_admin_cmd(n, &cmd, req); @@ -1803,7 +1892,7 @@ static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index a502a3dbbbfd..7ec4cf7c3c13 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -23,11 +23,12 @@ typedef struct NvmeRequest { struct NvmeSQueue *sq; BlockAIOCB *aiocb; uint16_t status; - bool has_sg; + bool is_cmb; NvmeCqe cqe; BlockAcctCookie acct; QEMUSGList qsg; QEMUIOVector iov; + NvmeCmd cmd; QTAILQ_ENTRY(NvmeRequest)entry; } NvmeRequest; diff --git a/hw/block/trace-events b/hw/block/trace-events index 66f6c2c07d20..9ea51a464c13 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -33,6 +33,7 @@ nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" nvme_irq_pin(void) "pulsing IRQ pin" nvme_irq_masked(void) "IRQ is masked" nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" +nvme_map_prp(uint8_t cmd_opcode, uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "cmd_opcode=0x%"PRIx8", trans_len=%"PRIu64", len=%"PRIu32", prp1=0x%"PRIx64", prp2=0x%"PRIx64", num_prps=%d" nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" diff --git a/include/block/nvme.h b/include/block/nvme.h index 30f1d8b00fc5..a6ef8d8ff25a 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -427,6 +427,7 @@ enum NvmeStatusCodes { NVME_CMD_ABORT_MISSING_FUSE = 0x000a, NVME_INVALID_NSID = 0x000b, NVME_CMD_SEQ_ERROR = 0x000c, + NVME_INVALID_USE_OF_CMB = 0x0012, NVME_LBA_RANGE = 0x0080, NVME_CAP_EXCEEDED = 0x0081, NVME_NS_NOT_READY = 0x0082, From patchwork Fri Jul 5 07:23:31 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032121 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 63C9A138D for ; Fri, 5 Jul 2019 07:35:29 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 509A428A1E for ; Fri, 5 Jul 2019 07:35:29 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 4378228A2E; Fri, 5 Jul 2019 07:35:29 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 19FE628A1E for ; Fri, 5 Jul 2019 07:35:28 +0000 (UTC) Received: from localhost ([::1]:50178 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIkh-0001Po-DG for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:35:27 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44407) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIaA-0005g4-E7 for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:40 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIa4-00066n-Px for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:32 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50548) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZr-0005j6-MJ; Fri, 05 Jul 2019 03:24:16 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id 54CFCC064E; Fri, 5 Jul 2019 07:23:54 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:31 +0200 Message-Id: <20190705072333.17171-15-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 14/16] nvme: support multiple block requests per request X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP Currently, the device only issues a single block backend request per NVMe request, but as we move towards supporting metadata (and discontiguous vector requests supported by OpenChannel 2.0) it will be required to issue multiple block backend requests per NVMe request. With this patch the NVMe device is ready for that. Signed-off-by: Klaus Birkelund Jensen --- hw/block/nvme.c | 322 ++++++++++++++++++++++++++++++++---------- hw/block/nvme.h | 49 +++++-- hw/block/trace-events | 3 + 3 files changed, 290 insertions(+), 84 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 02888dbfdbc1..b285119fd29a 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -25,6 +25,8 @@ * Default: 64 * cmb_size_mb= : Size of Controller Memory Buffer in MBs. * Default: 0 (disabled) + * mdts= : Maximum Data Transfer Size (power of two) + * Default: 7 */ #include "qemu/osdep.h" @@ -319,10 +321,9 @@ static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, uint64_t prp1, uint64_t prp2, NvmeRequest *req) { - QEMUSGList qsg; uint16_t err = NVME_SUCCESS; - err = nvme_map_prp(n, &qsg, prp1, prp2, len, req); + err = nvme_map_prp(n, &req->qsg, prp1, prp2, len, req); if (err) { return err; } @@ -330,8 +331,8 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, if (req->is_cmb) { QEMUIOVector iov; - qemu_iovec_init(&iov, qsg.nsg); - dma_to_cmb(n, &qsg, &iov); + qemu_iovec_init(&iov, req->qsg.nsg); + dma_to_cmb(n, &req->qsg, &iov); if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) { trace_nvme_err_invalid_dma(); @@ -343,17 +344,86 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, goto out; } - if (unlikely(dma_buf_read(ptr, len, &qsg))) { + if (unlikely(dma_buf_read(ptr, len, &req->qsg))) { trace_nvme_err_invalid_dma(); err = NVME_INVALID_FIELD | NVME_DNR; } out: - qemu_sglist_destroy(&qsg); + qemu_sglist_destroy(&req->qsg); return err; } +static void nvme_blk_req_destroy(NvmeBlockBackendRequest *blk_req) +{ + if (blk_req->iov.nalloc) { + qemu_iovec_destroy(&blk_req->iov); + } + + g_free(blk_req); +} + +static void nvme_blk_req_put(NvmeCtrl *n, NvmeBlockBackendRequest *blk_req) +{ + nvme_blk_req_destroy(blk_req); +} + +static NvmeBlockBackendRequest *nvme_blk_req_get(NvmeCtrl *n, NvmeRequest *req, + QEMUSGList *qsg) +{ + NvmeBlockBackendRequest *blk_req = g_malloc0(sizeof(*blk_req)); + + blk_req->req = req; + + if (qsg) { + blk_req->qsg = qsg; + } + + return blk_req; +} + +static uint16_t nvme_blk_setup(NvmeCtrl *n, NvmeNamespace *ns, QEMUSGList *qsg, + NvmeRequest *req) +{ + NvmeBlockBackendRequest *blk_req = nvme_blk_req_get(n, req, qsg); + if (!blk_req) { + NVME_GUEST_ERR(nvme_err_internal_dev_error, "nvme_blk_req_get: %s", + "could not allocate memory"); + return NVME_INTERNAL_DEV_ERROR; + } + + blk_req->slba = req->slba; + blk_req->nlb = req->nlb; + blk_req->blk_offset = req->slba * nvme_ns_lbads_bytes(ns); + + QTAILQ_INSERT_TAIL(&req->blk_req_tailq, blk_req, tailq_entry); + + return NVME_SUCCESS; +} + +static uint16_t nvme_blk_map(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + uint16_t err; + + uint32_t len = req->nlb * nvme_ns_lbads_bytes(ns); + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + + err = nvme_map_prp(n, &req->qsg, prp1, prp2, len, req); + if (err) { + return err; + } + + err = nvme_blk_setup(n, ns, &req->qsg, req); + if (err) { + return err; + } + + return NVME_SUCCESS; +} + static void nvme_post_cqes(void *opaque) { NvmeCQueue *cq = opaque; @@ -388,6 +458,10 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) { assert(cq->cqid == req->sq->cqid); + if (req->qsg.nalloc) { + qemu_sglist_destroy(&req->qsg); + } + trace_nvme_enqueue_req_completion(req->cqe.cid, cq->cqid); QTAILQ_REMOVE(&req->sq->out_req_list, req, entry); QTAILQ_INSERT_TAIL(&cq->req_list, req, entry); @@ -471,130 +545,224 @@ static void nvme_process_aers(void *opaque) static void nvme_rw_cb(void *opaque, int ret) { - NvmeRequest *req = opaque; + NvmeBlockBackendRequest *blk_req = opaque; + NvmeRequest *req = blk_req->req; NvmeSQueue *sq = req->sq; NvmeCtrl *n = sq->ctrl; NvmeCQueue *cq = n->cq[sq->cqid]; + QTAILQ_REMOVE(&req->blk_req_tailq, blk_req, tailq_entry); + + trace_nvme_rw_cb(req->cqe.cid, req->cmd.nsid); + if (!ret) { - block_acct_done(blk_get_stats(n->conf.blk), &req->acct); - req->status = NVME_SUCCESS; + block_acct_done(blk_get_stats(n->conf.blk), &blk_req->acct); } else { - block_acct_failed(blk_get_stats(n->conf.blk), &req->acct); - req->status = NVME_INTERNAL_DEV_ERROR; + block_acct_failed(blk_get_stats(n->conf.blk), &blk_req->acct); + NVME_GUEST_ERR(nvme_err_internal_dev_error, "block request failed: %s", + strerror(-ret)); + req->status = NVME_INTERNAL_DEV_ERROR | NVME_DNR; } - if (req->qsg.nalloc) { - qemu_sglist_destroy(&req->qsg); - } - if (req->iov.nalloc) { - qemu_iovec_destroy(&req->iov); + if (QTAILQ_EMPTY(&req->blk_req_tailq)) { + nvme_enqueue_req_completion(cq, req); } - nvme_enqueue_req_completion(cq, req); + nvme_blk_req_put(n, blk_req); } -static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, - NvmeRequest *req) +static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { - block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0, + NvmeBlockBackendRequest *blk_req = nvme_blk_req_get(n, req, NULL); + if (!blk_req) { + NVME_GUEST_ERR(nvme_err_internal_dev_error, "nvme_blk_req_get: %s", + "could not allocate memory"); + return NVME_INTERNAL_DEV_ERROR; + } + + block_acct_start(blk_get_stats(n->conf.blk), &blk_req->acct, 0, BLOCK_ACCT_FLUSH); - req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req); + blk_req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, blk_req); + + QTAILQ_INSERT_TAIL(&req->blk_req_tailq, blk_req, tailq_entry); return NVME_NO_COMPLETE; } -static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, - NvmeRequest *req) +static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { NvmeRwCmd *rw = (NvmeRwCmd *)cmd; - const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); - const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds; + NvmeBlockBackendRequest *blk_req; + const uint8_t lbads = nvme_ns_lbads(req->ns); uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = le16_to_cpu(rw->nlb) + 1; - uint64_t offset = slba << data_shift; - uint32_t count = nlb << data_shift; + uint64_t offset = slba << lbads; + uint32_t count = nlb << lbads; - if (unlikely(slba + nlb > ns->id_ns.nsze)) { - trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); + if (unlikely(slba + nlb > req->ns->id_ns.nsze)) { + trace_nvme_err_invalid_lba_range(slba, nlb, req->ns->id_ns.nsze); return NVME_LBA_RANGE | NVME_DNR; } - block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0, - BLOCK_ACCT_WRITE); - req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count, - BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req); + blk_req = nvme_blk_req_get(n, req, NULL); + if (!blk_req) { + NVME_GUEST_ERR(nvme_err_internal_dev_error, "nvme_blk_req_get: %s", + "could not allocate memory"); + return NVME_INTERNAL_DEV_ERROR; + } + + block_acct_start(blk_get_stats(n->conf.blk), &blk_req->acct, 0, + BLOCK_ACCT_WRITE); + + blk_req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count, + BDRV_REQ_MAY_UNMAP, nvme_rw_cb, blk_req); + + QTAILQ_INSERT_TAIL(&req->blk_req_tailq, blk_req, tailq_entry); + return NVME_NO_COMPLETE; } -static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, - NvmeRequest *req) +static uint16_t nvme_rw_check_req(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { - NvmeRwCmd *rw = (NvmeRwCmd *)cmd; - uint32_t nlb = le32_to_cpu(rw->nlb) + 1; - uint64_t slba = le64_to_cpu(rw->slba); - uint64_t prp1 = le64_to_cpu(rw->prp1); - uint64_t prp2 = le64_to_cpu(rw->prp2); - - uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); - uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds; - uint64_t data_size = (uint64_t)nlb << data_shift; - uint64_t data_offset = slba << data_shift; - int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0; - enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ; + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *) cmd; - trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba); + uint16_t ctrl = le16_to_cpu(rw->control); + uint32_t data_size = req->nlb << nvme_ns_lbads(ns); - if (unlikely((slba + nlb) > ns->id_ns.nsze)) { - block_acct_invalid(blk_get_stats(n->conf.blk), acct); - trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); + if (unlikely((req->slba + req->nlb) > ns->id_ns.nsze)) { + block_acct_invalid(blk_get_stats(n->conf.blk), req->is_write ? + BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); + trace_nvme_err_invalid_lba_range(req->slba, req->nlb, ns->id_ns.nsze); return NVME_LBA_RANGE | NVME_DNR; } - if (nvme_map_prp(n, &req->qsg, prp1, prp2, data_size, req)) { - block_acct_invalid(blk_get_stats(n->conf.blk), acct); + if (n->params.mdts && data_size > n->page_size * (1 << n->params.mdts)) { return NVME_INVALID_FIELD | NVME_DNR; } - dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct); - if (!req->is_cmb) { - req->aiocb = is_write ? - dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, - nvme_rw_cb, req) : - dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, - nvme_rw_cb, req); + if ((ctrl & NVME_RW_PRINFO_PRACT) && !(ns->id_ns.dps & DPS_TYPE_MASK)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + return NVME_SUCCESS; +} + +static void nvme_blk_submit_dma(NvmeCtrl *n, NvmeBlockBackendRequest *blk_req, + BlockCompletionFunc *cb) +{ + NvmeRequest *req = blk_req->req; + + if (req->is_write) { + dma_acct_start(n->conf.blk, &blk_req->acct, blk_req->qsg, + BLOCK_ACCT_WRITE); + + blk_req->aiocb = dma_blk_write(n->conf.blk, blk_req->qsg, + blk_req->blk_offset, BDRV_SECTOR_SIZE, cb, blk_req); + } else { + dma_acct_start(n->conf.blk, &blk_req->acct, blk_req->qsg, + BLOCK_ACCT_READ); + + blk_req->aiocb = dma_blk_read(n->conf.blk, blk_req->qsg, + blk_req->blk_offset, BDRV_SECTOR_SIZE, cb, blk_req); + } +} + +static void nvme_blk_submit_cmb(NvmeCtrl *n, NvmeBlockBackendRequest *blk_req, + BlockCompletionFunc *cb) +{ + NvmeRequest *req = blk_req->req; + + qemu_iovec_init(&blk_req->iov, blk_req->qsg->nsg); + dma_to_cmb(n, blk_req->qsg, &blk_req->iov); + + if (req->is_write) { + block_acct_start(blk_get_stats(n->conf.blk), &blk_req->acct, + blk_req->iov.size, BLOCK_ACCT_WRITE); + + blk_req->aiocb = blk_aio_pwritev(n->conf.blk, blk_req->blk_offset, + &blk_req->iov, 0, cb, blk_req); } else { - qemu_iovec_init(&req->iov, req->qsg.nsg); - dma_to_cmb(n, &req->qsg, &req->iov); - req->aiocb = is_write ? - blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb, - req) : - blk_aio_preadv(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb, - req); + block_acct_start(blk_get_stats(n->conf.blk), &blk_req->acct, + blk_req->iov.size, BLOCK_ACCT_READ); + + blk_req->aiocb = blk_aio_preadv(n->conf.blk, blk_req->blk_offset, + &blk_req->iov, 0, cb, blk_req); + } +} + +static uint16_t nvme_blk_submit_io(NvmeCtrl *n, NvmeRequest *req, + BlockCompletionFunc *cb) +{ + NvmeBlockBackendRequest *blk_req; + + if (QTAILQ_EMPTY(&req->blk_req_tailq)) { + return NVME_SUCCESS; + } + + QTAILQ_FOREACH(blk_req, &req->blk_req_tailq, tailq_entry) { + if (req->is_cmb) { + nvme_blk_submit_cmb(n, blk_req, cb); + } else { + nvme_blk_submit_dma(n, blk_req, cb); + } } return NVME_NO_COMPLETE; } +static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)cmd; + uint32_t nlb = le32_to_cpu(rw->nlb) + 1; + uint64_t slba = le64_to_cpu(rw->slba); + + req->is_write = nvme_rw_is_write(req); + + trace_nvme_rw(req->is_write ? "write" : "read", nlb, + nlb << nvme_ns_lbads(req->ns), slba); + + int err = nvme_blk_map(n, cmd, req); + if (err) { + return err; + } + + return nvme_blk_submit_io(n, req, nvme_rw_cb); +} + static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { - NvmeNamespace *ns; + NvmeRwCmd *rw; + int err; + uint32_t nsid = le32_to_cpu(cmd->nsid); + trace_nvme_io_cmd(req->cqe.cid, nsid, cmd->opcode); + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { trace_nvme_err_invalid_ns(nsid, n->num_namespaces); return NVME_INVALID_NSID | NVME_DNR; } - ns = &n->namespace; + req->ns = &n->namespace; switch (cmd->opcode) { case NVME_CMD_FLUSH: - return nvme_flush(n, ns, cmd, req); + return nvme_flush(n, cmd, req); case NVME_CMD_WRITE_ZEROS: - return nvme_write_zeros(n, ns, cmd, req); + return nvme_write_zeros(n, cmd, req); case NVME_CMD_WRITE: case NVME_CMD_READ: - return nvme_rw(n, ns, cmd, req); + rw = (NvmeRwCmd *)cmd; + + req->nlb = le16_to_cpu(rw->nlb) + 1; + req->slba = le64_to_cpu(rw->slba); + + err = nvme_rw_check_req(n, cmd, req); + if (err) { + return err; + } + + return nvme_rw(n, cmd, req); default: trace_nvme_err_invalid_opc(cmd->opcode); return NVME_INVALID_OPCODE | NVME_DNR; @@ -619,6 +787,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) NvmeRequest *req, *next; NvmeSQueue *sq; NvmeCQueue *cq; + NvmeBlockBackendRequest *blk_req; uint16_t qid = le16_to_cpu(c->qid); if (unlikely(!qid || nvme_check_sqid(n, qid))) { @@ -631,8 +800,11 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) sq = n->sq[qid]; while (!QTAILQ_EMPTY(&sq->out_req_list)) { req = QTAILQ_FIRST(&sq->out_req_list); - assert(req->aiocb); - blk_aio_cancel(req->aiocb); + while (!QTAILQ_EMPTY(&req->blk_req_tailq)) { + blk_req = QTAILQ_FIRST(&req->blk_req_tailq); + assert(blk_req->aiocb); + blk_aio_cancel(blk_req->aiocb); + } } if (!nvme_check_cqid(n, sq->cqid)) { cq = n->cq[sq->cqid]; @@ -669,6 +841,7 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, QTAILQ_INIT(&sq->out_req_list); for (i = 0; i < sq->size; i++) { sq->io_req[i].sq = sq; + QTAILQ_INIT(&(sq->io_req[i].blk_req_tailq)); QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); } sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq); @@ -1947,6 +2120,7 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->ieee[0] = 0x00; id->ieee[1] = 0x02; id->ieee[2] = 0xb3; + id->mdts = params->mdts; id->ver = cpu_to_le32(0x00010300); id->oacs = cpu_to_le16(0); id->acl = 3; diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 7ec4cf7c3c13..832094f77845 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -6,11 +6,13 @@ #define DEFINE_NVME_PROPERTIES(_state, _props) \ DEFINE_PROP_STRING("serial", _state, _props.serial), \ DEFINE_PROP_UINT32("cmb_size_mb", _state, _props.cmb_size_mb, 0), \ - DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64) + DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64), \ + DEFINE_PROP_UINT8("mdts", _state, _props.mdts, 7) typedef struct NvmeParams { char *serial; uint32_t num_queues; + uint8_t mdts; uint32_t cmb_size_mb; } NvmeParams; @@ -19,16 +21,38 @@ typedef struct NvmeAsyncEvent { NvmeAerResult result; } NvmeAsyncEvent; +typedef struct NvmeBlockBackendRequest { + uint64_t slba; + uint16_t nlb; + uint64_t blk_offset; + + struct NvmeRequest *req; + + BlockAIOCB *aiocb; + BlockAcctCookie acct; + + QEMUSGList *qsg; + QEMUIOVector iov; + + QTAILQ_ENTRY(NvmeBlockBackendRequest) tailq_entry; + QSLIST_ENTRY(NvmeBlockBackendRequest) slist_entry; +} NvmeBlockBackendRequest; + typedef struct NvmeRequest { - struct NvmeSQueue *sq; - BlockAIOCB *aiocb; - uint16_t status; - bool is_cmb; - NvmeCqe cqe; - BlockAcctCookie acct; - QEMUSGList qsg; - QEMUIOVector iov; - NvmeCmd cmd; + struct NvmeSQueue *sq; + struct NvmeNamespace *ns; + NvmeCqe cqe; + NvmeCmd cmd; + + uint64_t slba; + uint16_t nlb; + uint16_t status; + bool is_cmb; + bool is_write; + + QEMUSGList qsg; + + QTAILQ_HEAD(, NvmeBlockBackendRequest) blk_req_tailq; QTAILQ_ENTRY(NvmeRequest)entry; } NvmeRequest; @@ -113,6 +137,11 @@ typedef struct NvmeCtrl { NvmeIdCtrl id_ctrl; } NvmeCtrl; +static inline bool nvme_rw_is_write(NvmeRequest *req) +{ + return req->cmd.opcode == NVME_CMD_WRITE; +} + static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) { NvmeIdNs *id = &ns->id_ns; diff --git a/hw/block/trace-events b/hw/block/trace-events index 9ea51a464c13..b324751ad990 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -34,7 +34,9 @@ nvme_irq_pin(void) "pulsing IRQ pin" nvme_irq_masked(void) "IRQ is masked" nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" nvme_map_prp(uint8_t cmd_opcode, uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "cmd_opcode=0x%"PRIx8", trans_len=%"PRIu64", len=%"PRIu32", prp1=0x%"PRIx64", prp2=0x%"PRIx64", num_prps=%d" +nvme_io_cmd(uint16_t cid, uint32_t nsid, uint8_t opcode) "cid %"PRIu16" nsid %"PRIu32" opc %"PRIu8"" nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" +nvme_rw_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" @@ -115,6 +117,7 @@ nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_star nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" nvme_err_startfail(void) "setting controller enable bit failed" +nvme_err_internal_dev_error(const char *reason) "%s" # Traces for undefined behavior nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" From patchwork Fri Jul 5 07:23:32 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032123 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 39058112C for ; Fri, 5 Jul 2019 07:35:41 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 254D02846C for ; Fri, 5 Jul 2019 07:35:41 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 15B9728600; Fri, 5 Jul 2019 07:35:41 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id B34772846C for ; Fri, 5 Jul 2019 07:35:39 +0000 (UTC) Received: from localhost ([::1]:50180 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIkt-0001zJ-23 for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:35:39 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44406) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIaA-0005g3-E7 for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:40 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIa4-00066e-Q0 for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:32 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50550) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZr-0005jA-NY; Fri, 05 Jul 2019 03:24:16 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id 9172DC0653; Fri, 5 Jul 2019 07:23:54 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:32 +0200 Message-Id: <20190705072333.17171-16-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 15/16] nvme: support scatter gather lists X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP For now, support the Data Block, Segment and Last Segment descriptor types. See NVM Express 1.3d, Section 4.4 ("Scatter Gather List (SGL)"). Signed-off-by: Klaus Birkelund Jensen --- block/nvme.c | 18 +- hw/block/nvme.c | 390 +++++++++++++++++++++++++++++++++++------- hw/block/nvme.h | 6 + hw/block/trace-events | 3 + include/block/nvme.h | 64 ++++++- 5 files changed, 410 insertions(+), 71 deletions(-) diff --git a/block/nvme.c b/block/nvme.c index 73ed5fa75f2e..907a610633f2 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -438,7 +438,7 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) error_setg(errp, "Cannot map buffer for DMA"); goto out; } - cmd.prp1 = cpu_to_le64(iova); + cmd.dptr.prp.prp1 = cpu_to_le64(iova); if (nvme_cmd_sync(bs, s->queues[0], &cmd)) { error_setg(errp, "Failed to identify controller"); @@ -512,7 +512,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) } cmd = (NvmeCmd) { .opcode = NVME_ADM_CMD_CREATE_CQ, - .prp1 = cpu_to_le64(q->cq.iova), + .dptr.prp.prp1 = cpu_to_le64(q->cq.iova), .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)), .cdw11 = cpu_to_le32(0x3), }; @@ -523,7 +523,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) } cmd = (NvmeCmd) { .opcode = NVME_ADM_CMD_CREATE_SQ, - .prp1 = cpu_to_le64(q->sq.iova), + .dptr.prp.prp1 = cpu_to_le64(q->sq.iova), .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)), .cdw11 = cpu_to_le32(0x1 | (n << 16)), }; @@ -858,16 +858,16 @@ try_map: case 0: abort(); case 1: - cmd->prp1 = pagelist[0]; - cmd->prp2 = 0; + cmd->dptr.prp.prp1 = pagelist[0]; + cmd->dptr.prp.prp2 = 0; break; case 2: - cmd->prp1 = pagelist[0]; - cmd->prp2 = pagelist[1]; + cmd->dptr.prp.prp1 = pagelist[0]; + cmd->dptr.prp.prp2 = pagelist[1]; break; default: - cmd->prp1 = pagelist[0]; - cmd->prp2 = cpu_to_le64(req->prp_list_iova + sizeof(uint64_t)); + cmd->dptr.prp.prp1 = pagelist[0]; + cmd->dptr.prp.prp2 = cpu_to_le64(req->prp_list_iova + sizeof(uint64_t)); break; } trace_nvme_cmd_map_qiov(s, cmd, req, qiov, entries); diff --git a/hw/block/nvme.c b/hw/block/nvme.c index b285119fd29a..6bf62952dd13 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -273,6 +273,198 @@ unmap: return status; } +static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg, + NvmeSglDescriptor *segment, uint64_t nsgld, uint32_t *len, + NvmeRequest *req) +{ + dma_addr_t addr, trans_len; + + for (int i = 0; i < nsgld; i++) { + if (NVME_SGL_TYPE(segment[i].type) != SGL_DESCR_TYPE_DATA_BLOCK) { + trace_nvme_err_invalid_sgl_descriptor(req->cqe.cid, + NVME_SGL_TYPE(segment[i].type)); + return NVME_SGL_DESCRIPTOR_TYPE_INVALID | NVME_DNR; + } + + if (*len == 0) { + if (!NVME_CTRL_SGLS_EXCESS_LENGTH(n->id_ctrl.sgls)) { + trace_nvme_err_invalid_sgl_excess_length(req->cqe.cid); + return NVME_DATA_SGL_LENGTH_INVALID | NVME_DNR; + } + + break; + } + + addr = le64_to_cpu(segment[i].addr); + trans_len = MIN(*len, le64_to_cpu(segment[i].len)); + + if (nvme_addr_is_cmb(n, addr)) { + /* + * All data and metadata, if any, associated with a particular + * command shall be located in either the CMB or host memory. Thus, + * if an address if found to be in the CMB and we have already + * mapped data that is in host memory, the use is invalid. + */ + if (!req->is_cmb && qsg->size) { + return NVME_INVALID_USE_OF_CMB | NVME_DNR; + } + + req->is_cmb = true; + } else { + /* + * Similarly, if the address does not reference the CMB, but we + * have already established that the request has data or metadata + * in the CMB, the use is invalid. + */ + if (req->is_cmb) { + return NVME_INVALID_USE_OF_CMB | NVME_DNR; + } + } + + qemu_sglist_add(qsg, addr, trans_len); + + *len -= trans_len; + } + + return NVME_SUCCESS; +} + +static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, + NvmeSglDescriptor sgl, uint32_t len, NvmeRequest *req) +{ + const int MAX_NSGLD = 256; + + NvmeSglDescriptor segment[MAX_NSGLD]; + uint64_t nsgld; + uint16_t status; + bool sgl_in_cmb = false; + hwaddr addr = le64_to_cpu(sgl.addr); + + trace_nvme_map_sgl(req->cqe.cid, NVME_SGL_TYPE(sgl.type), req->nlb, len); + + pci_dma_sglist_init(qsg, &n->parent_obj, 1); + + /* + * If the entire transfer can be described with a single data block it can + * be mapped directly. + */ + if (NVME_SGL_TYPE(sgl.type) == SGL_DESCR_TYPE_DATA_BLOCK) { + status = nvme_map_sgl_data(n, qsg, &sgl, 1, &len, req); + if (status) { + goto unmap; + } + + goto out; + } + + /* + * If the segment is located in the CMB, the submission queue of the + * request must also reside there. + */ + if (nvme_addr_is_cmb(n, addr)) { + if (!nvme_addr_is_cmb(n, req->sq->dma_addr)) { + return NVME_INVALID_USE_OF_CMB | NVME_DNR; + } + + sgl_in_cmb = true; + } + + while (NVME_SGL_TYPE(sgl.type) == SGL_DESCR_TYPE_SEGMENT) { + bool addr_is_cmb; + + nsgld = le64_to_cpu(sgl.len) / sizeof(NvmeSglDescriptor); + + /* read the segment in chunks of 256 descriptors (4k) */ + while (nsgld > MAX_NSGLD) { + nvme_addr_read(n, addr, segment, sizeof(segment)); + + status = nvme_map_sgl_data(n, qsg, segment, MAX_NSGLD, &len, req); + if (status) { + goto unmap; + } + + nsgld -= MAX_NSGLD; + addr += MAX_NSGLD * sizeof(NvmeSglDescriptor); + } + + nvme_addr_read(n, addr, segment, nsgld * sizeof(NvmeSglDescriptor)); + + sgl = segment[nsgld - 1]; + addr = le64_to_cpu(sgl.addr); + + /* an SGL is allowed to end with a Data Block in a regular Segment */ + if (NVME_SGL_TYPE(sgl.type) == SGL_DESCR_TYPE_DATA_BLOCK) { + status = nvme_map_sgl_data(n, qsg, segment, nsgld, &len, req); + if (status) { + goto unmap; + } + + goto out; + } + + /* do not map last descriptor */ + status = nvme_map_sgl_data(n, qsg, segment, nsgld - 1, &len, req); + if (status) { + goto unmap; + } + + /* + * If the next segment is in the CMB, make sure that the sgl was + * already located there. + */ + addr_is_cmb = nvme_addr_is_cmb(n, addr); + if ((sgl_in_cmb && !addr_is_cmb) || (!sgl_in_cmb && addr_is_cmb)) { + status = NVME_INVALID_USE_OF_CMB | NVME_DNR; + goto unmap; + } + } + + /* + * If the segment did not end with a Data Block or a Segment descriptor, it + * must be a Last Segment descriptor. + */ + if (NVME_SGL_TYPE(sgl.type) != SGL_DESCR_TYPE_LAST_SEGMENT) { + trace_nvme_err_invalid_sgl_descriptor(req->cqe.cid, + NVME_SGL_TYPE(sgl.type)); + return NVME_SGL_DESCRIPTOR_TYPE_INVALID | NVME_DNR; + } + + nsgld = le64_to_cpu(sgl.len) / sizeof(NvmeSglDescriptor); + + while (nsgld > MAX_NSGLD) { + nvme_addr_read(n, addr, segment, sizeof(segment)); + + status = nvme_map_sgl_data(n, qsg, segment, MAX_NSGLD, &len, req); + if (status) { + goto unmap; + } + + nsgld -= MAX_NSGLD; + addr += MAX_NSGLD * sizeof(NvmeSglDescriptor); + } + + nvme_addr_read(n, addr, segment, nsgld * sizeof(NvmeSglDescriptor)); + + status = nvme_map_sgl_data(n, qsg, segment, nsgld, &len, req); + if (status) { + goto unmap; + } + +out: + /* if there is any residual left in len, the SGL was too short */ + if (len) { + status = NVME_DATA_SGL_LENGTH_INVALID | NVME_DNR; + goto unmap; + } + + return NVME_SUCCESS; + +unmap: + qemu_sglist_destroy(qsg); + + return status; +} + static void dma_to_cmb(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov) { for (int i = 0; i < qsg->nsg; i++) { @@ -318,6 +510,56 @@ static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, return err; } +static uint16_t nvme_dma_write_sgl(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeSglDescriptor sgl, NvmeRequest *req) +{ + QEMUSGList qsg; + uint16_t err = NVME_SUCCESS; + + err = nvme_map_sgl(n, &qsg, sgl, len, req); + if (err) { + return err; + } + + if (req->is_cmb) { + QEMUIOVector iov; + + qemu_iovec_init(&iov, qsg.nsg); + dma_to_cmb(n, &qsg, &iov); + + if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_iovec_destroy(&iov); + + return err; + } + + if (unlikely(dma_buf_write(ptr, len, &qsg))) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_sglist_destroy(&qsg); + + return err; +} + +static uint16_t nvme_dma_write(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeCmd *cmd, NvmeRequest *req) +{ + if (NVME_CMD_FLAGS_PSDT(cmd->flags)) { + return nvme_dma_write_sgl(n, ptr, len, cmd->dptr.sgl, req); + } + + uint64_t prp1 = le64_to_cpu(cmd->dptr.prp.prp1); + uint64_t prp2 = le64_to_cpu(cmd->dptr.prp.prp2); + + return nvme_dma_write_prp(n, ptr, len, prp1, prp2, req); +} + static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, uint64_t prp1, uint64_t prp2, NvmeRequest *req) { @@ -355,6 +597,57 @@ out: return err; } +static uint16_t nvme_dma_read_sgl(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeSglDescriptor sgl, NvmeCmd *cmd, NvmeRequest *req) +{ + QEMUSGList qsg; + uint16_t err = NVME_SUCCESS; + + err = nvme_map_sgl(n, &qsg, sgl, len, req); + if (err) { + return err; + } + + if (req->is_cmb) { + QEMUIOVector iov; + + qemu_iovec_init(&iov, qsg.nsg); + dma_to_cmb(n, &qsg, &iov); + + if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_iovec_destroy(&iov); + + goto out; + } + + if (unlikely(dma_buf_read(ptr, len, &qsg))) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; + } + +out: + qemu_sglist_destroy(&qsg); + + return err; +} + +static uint16_t nvme_dma_read(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeCmd *cmd, NvmeRequest *req) +{ + if (NVME_CMD_FLAGS_PSDT(cmd->flags)) { + return nvme_dma_read_sgl(n, ptr, len, cmd->dptr.sgl, cmd, req); + } + + uint64_t prp1 = le64_to_cpu(cmd->dptr.prp.prp1); + uint64_t prp2 = le64_to_cpu(cmd->dptr.prp.prp2); + + return nvme_dma_read_prp(n, ptr, len, prp1, prp2, req); +} + static void nvme_blk_req_destroy(NvmeBlockBackendRequest *blk_req) { if (blk_req->iov.nalloc) { @@ -408,20 +701,25 @@ static uint16_t nvme_blk_map(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint16_t err; uint32_t len = req->nlb * nvme_ns_lbads_bytes(ns); - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); - err = nvme_map_prp(n, &req->qsg, prp1, prp2, len, req); - if (err) { - return err; + if (NVME_CMD_FLAGS_PSDT(cmd->flags)) { + err = nvme_map_sgl(n, &req->qsg, cmd->dptr.sgl, len, req); + if (err) { + return err; + } + } else { + uint64_t prp1 = le64_to_cpu(cmd->dptr.prp.prp1); + uint64_t prp2 = le64_to_cpu(cmd->dptr.prp.prp2); + + err = nvme_map_prp(n, &req->qsg, prp1, prp2, len, req); + if (err) { + return err; + } } err = nvme_blk_setup(n, ns, &req->qsg, req); - if (err) { - return err; - } - return NVME_SUCCESS; + return err; } static void nvme_post_cqes(void *opaque) @@ -984,25 +1282,18 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) return NVME_SUCCESS; } -static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c, - NvmeRequest *req) +static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { - uint64_t prp1 = le64_to_cpu(c->prp1); - uint64_t prp2 = le64_to_cpu(c->prp2); - trace_nvme_identify_ctrl(); - return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), - prp1, prp2, req); + return nvme_dma_read(n, (uint8_t *) &n->id_ctrl, sizeof(n->id_ctrl), cmd, + req); } -static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c, - NvmeRequest *req) +static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { NvmeNamespace *ns; - uint32_t nsid = le32_to_cpu(c->nsid); - uint64_t prp1 = le64_to_cpu(c->prp1); - uint64_t prp2 = le64_to_cpu(c->prp2); + uint32_t nsid = le32_to_cpu(cmd->nsid); trace_nvme_identify_ns(nsid); @@ -1013,17 +1304,15 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c, ns = &n->namespace; - return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), - prp1, prp2, req); + return nvme_dma_read(n, (uint8_t *) &ns->id_ns, sizeof(ns->id_ns), cmd, + req); } -static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c, +static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { static const int data_len = 4 * KiB; - uint32_t min_nsid = le32_to_cpu(c->nsid); - uint64_t prp1 = le64_to_cpu(c->prp1); - uint64_t prp2 = le64_to_cpu(c->prp2); + uint32_t min_nsid = le32_to_cpu(cmd->nsid); uint32_t *list; uint16_t ret; int i, j = 0; @@ -1040,12 +1329,12 @@ static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c, break; } } - ret = nvme_dma_read_prp(n, (uint8_t *)list, data_len, prp1, prp2, req); + ret = nvme_dma_read(n, (uint8_t *) list, data_len, cmd, req); g_free(list); return ret; } -static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *c, +static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { static const int data_len = 4 * KiB; @@ -1063,9 +1352,7 @@ static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *c, uint32_t nid; }; - uint32_t nsid = le32_to_cpu(c->nsid); - uint64_t prp1 = le64_to_cpu(c->prp1); - uint64_t prp2 = le64_to_cpu(c->prp2); + uint32_t nsid = le32_to_cpu(cmd->nsid); struct ns_descr *list; uint16_t ret; @@ -1082,7 +1369,7 @@ static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *c, list->nidl = 0x10; list->nid = cpu_to_be32(nsid); - ret = nvme_dma_read_prp(n, (uint8_t *) list, data_len, prp1, prp2, req); + ret = nvme_dma_read(n, (uint8_t *) list, data_len, cmd, req); g_free(list); return ret; } @@ -1093,11 +1380,11 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (le32_to_cpu(c->cns)) { case 0x00: - return nvme_identify_ns(n, c, req); + return nvme_identify_ns(n, cmd, req); case 0x01: - return nvme_identify_ctrl(n, c, req); + return nvme_identify_ctrl(n, cmd, req); case 0x02: - return nvme_identify_ns_list(n, c, req); + return nvme_identify_ns_list(n, cmd, req); case 0x03: return nvme_identify_ns_descriptor_list(n, cmd, req); default: @@ -1149,13 +1436,10 @@ static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n) static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); - uint64_t timestamp = nvme_get_timestamp(n); - return nvme_dma_read_prp(n, (uint8_t *)×tamp, sizeof(timestamp), - prp1, prp2, req); + return nvme_dma_read(n, (uint8_t *)×tamp, sizeof(timestamp), cmd, + req); } static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) @@ -1220,11 +1504,9 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, { uint16_t ret; uint64_t timestamp; - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); - ret = nvme_dma_write_prp(n, (uint8_t *)×tamp, - sizeof(timestamp), prp1, prp2, req); + ret = nvme_dma_write(n, (uint8_t *)×tamp, sizeof(timestamp), cmd, + req); if (ret != NVME_SUCCESS) { return ret; } @@ -1301,8 +1583,6 @@ static uint16_t nvme_error_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { uint32_t trans_len; - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); if (off > sizeof(*n->elpes) * (NVME_ELPE + 1)) { return NVME_INVALID_FIELD | NVME_DNR; @@ -1314,16 +1594,12 @@ static uint16_t nvme_error_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, nvme_clear_events(n, NVME_AER_TYPE_ERROR); } - return nvme_dma_read_prp(n, (uint8_t *) n->elpes + off, trans_len, prp1, - prp2, req); + return nvme_dma_read(n, (uint8_t *) n->elpes + off, trans_len, cmd, req); } static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); - uint32_t trans_len; time_t current_ms; NvmeSmartLog smart; @@ -1357,16 +1633,13 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, nvme_clear_events(n, NVME_AER_TYPE_SMART); } - return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1, - prp2, req); + return nvme_dma_read(n, (uint8_t *) &smart + off, trans_len, cmd, req); } static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, uint64_t off, NvmeRequest *req) { uint32_t trans_len; - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); NvmeFwSlotInfoLog fw_log; if (off > sizeof(fw_log)) { @@ -1377,8 +1650,7 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, trans_len = MIN(sizeof(fw_log) - off, buf_len); - return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1, - prp2, req); + return nvme_dma_read(n, (uint8_t *) &fw_log + off, trans_len, cmd, req); } @@ -2137,6 +2409,8 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->vwc = 1; } + id->sgls = cpu_to_le32(0x1); + strcpy((char *) id->subnqn, "nqn.2014-08.org.nvmexpress:uuid:"); qemu_uuid_unparse(&qemu_uuid, (char *) id->subnqn + strlen((char *) id->subnqn)); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 832094f77845..1d52b183d263 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -142,6 +142,12 @@ static inline bool nvme_rw_is_write(NvmeRequest *req) return req->cmd.opcode == NVME_CMD_WRITE; } +static inline bool nvme_is_error(uint16_t status, uint16_t err) +{ + /* strip DNR and MORE */ + return (status & 0xfff) == err; +} + static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) { NvmeIdNs *id = &ns->id_ns; diff --git a/hw/block/trace-events b/hw/block/trace-events index b324751ad990..b239e92294e4 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -34,6 +34,7 @@ nvme_irq_pin(void) "pulsing IRQ pin" nvme_irq_masked(void) "IRQ is masked" nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" nvme_map_prp(uint8_t cmd_opcode, uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "cmd_opcode=0x%"PRIx8", trans_len=%"PRIu64", len=%"PRIu32", prp1=0x%"PRIx64", prp2=0x%"PRIx64", num_prps=%d" +nvme_map_sgl(uint16_t cid, uint8_t typ, uint16_t nlb, uint64_t len) "cid %"PRIu16" type 0x%"PRIx8" nlb %"PRIu16" len %"PRIu64"" nvme_io_cmd(uint16_t cid, uint32_t nsid, uint8_t opcode) "cid %"PRIu16" nsid %"PRIu32" opc %"PRIu8"" nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" nvme_rw_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" @@ -76,6 +77,8 @@ nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" # nvme traces for error conditions nvme_err(uint16_t cid, const char *s, uint16_t status) "cid %"PRIu16" \"%s\" status 0x%"PRIx16"" +nvme_err_invalid_sgl_descriptor(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" +nvme_err_invalid_sgl_excess_length(uint16_t cid) "cid %"PRIu16"" nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" diff --git a/include/block/nvme.h b/include/block/nvme.h index a6ef8d8ff25a..f79c71ba3f8c 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -205,15 +205,53 @@ enum NvmeCmbszMask { #define NVME_CMBSZ_GETSIZE(cmbsz) \ (NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz)))) +enum NvmeSglDescriptorType { + SGL_DESCR_TYPE_DATA_BLOCK = 0x0, + SGL_DESCR_TYPE_BIT_BUCKET = 0x1, + SGL_DESCR_TYPE_SEGMENT = 0x2, + SGL_DESCR_TYPE_LAST_SEGMENT = 0x3, + SGL_DESCR_TYPE_KEYED_DATA_BLOCK = 0x4, + + SGL_DESCR_TYPE_VENDOR_SPECIFIC = 0xf, +}; + +enum NvmeSglDescriptorSubtype { + SGL_DESCR_SUBTYPE_ADDRESS = 0x0, + SGL_DESCR_SUBTYPE_OFFSET = 0x1, +}; + +typedef struct NvmeSglDescriptor { + uint64_t addr; + uint32_t len; + uint8_t rsvd[3]; + uint8_t type; +} NvmeSglDescriptor; + +#define NVME_SGL_TYPE(type) (type >> 4) + +typedef union NvmeCmdDptr { + struct { + uint64_t prp1; + uint64_t prp2; + } prp; + + NvmeSglDescriptor sgl; +} NvmeCmdDptr; + +enum NvmePsdt { + PSDT_PRP = 0x0, + PSDT_SGL_MPTR_CONTIGUOUS = 0x1, + PSDT_SGL_MPTR_SGL = 0x2, +}; + typedef struct NvmeCmd { uint8_t opcode; - uint8_t fuse; + uint8_t flags; uint16_t cid; uint32_t nsid; - uint64_t res1; + uint64_t rsvd2; uint64_t mptr; - uint64_t prp1; - uint64_t prp2; + NvmeCmdDptr dptr; uint32_t cdw10; uint32_t cdw11; uint32_t cdw12; @@ -222,6 +260,9 @@ typedef struct NvmeCmd { uint32_t cdw15; } NvmeCmd; +#define NVME_CMD_FLAGS_FUSE(flags) (flags & 0x3) +#define NVME_CMD_FLAGS_PSDT(flags) ((flags >> 6) & 0x3) + enum NvmeAdminCommands { NVME_ADM_CMD_DELETE_SQ = 0x00, NVME_ADM_CMD_CREATE_SQ = 0x01, @@ -427,6 +468,11 @@ enum NvmeStatusCodes { NVME_CMD_ABORT_MISSING_FUSE = 0x000a, NVME_INVALID_NSID = 0x000b, NVME_CMD_SEQ_ERROR = 0x000c, + NVME_INVALID_SGL_SEG_DESCRIPTOR = 0x000d, + NVME_INVALID_NUM_SGL_DESCRIPTORS = 0x000e, + NVME_DATA_SGL_LENGTH_INVALID = 0x000f, + NVME_METADATA_SGL_LENGTH_INVALID = 0x0010, + NVME_SGL_DESCRIPTOR_TYPE_INVALID = 0x0011, NVME_INVALID_USE_OF_CMB = 0x0012, NVME_LBA_RANGE = 0x0080, NVME_CAP_EXCEEDED = 0x0081, @@ -623,6 +669,16 @@ enum NvmeIdCtrlOncs { #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf) #define NVME_CTRL_CQES_MAX(cqes) (((cqes) >> 4) & 0xf) +#define NVME_CTRL_SGLS_SUPPORTED(sgls) ((sgls) & 0x3) +#define NVME_CTRL_SGLS_SUPPORTED_NO_ALIGNMENT(sgls) ((sgls) & (0x1 << 0)) +#define NVME_CTRL_SGLS_SUPPORTED_DWORD_ALIGNMENT(sgls) ((sgls) & (0x1 << 1)) +#define NVME_CTRL_SGLS_KEYED(sgls) ((sgls) & (0x1 << 2)) +#define NVME_CTRL_SGLS_BITBUCKET(sgls) ((sgls) & (0x1 << 16)) +#define NVME_CTRL_SGLS_MPTR_CONTIGUOUS(sgls) ((sgls) & (0x1 << 17)) +#define NVME_CTRL_SGLS_EXCESS_LENGTH(sgls) ((sgls) & (0x1 << 18)) +#define NVME_CTRL_SGLS_MPTR_SGL(sgls) ((sgls) & (0x1 << 19)) +#define NVME_CTRL_SGLS_ADDR_OFFSET(sgls) ((sgls) & (0x1 << 20)) + typedef struct NvmeFeatureVal { uint32_t arbitration; uint32_t power_mgmt; From patchwork Fri Jul 5 07:23:33 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Klaus Jensen X-Patchwork-Id: 11032135 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6AD49112C for ; Fri, 5 Jul 2019 07:42:37 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 59E3728A28 for ; Fri, 5 Jul 2019 07:42:37 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 4D9F428A30; Fri, 5 Jul 2019 07:42:37 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 055D328A28 for ; Fri, 5 Jul 2019 07:42:35 +0000 (UTC) Received: from localhost ([::1]:50270 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIra-0000KZ-US for patchwork-qemu-devel@patchwork.kernel.org; Fri, 05 Jul 2019 03:42:34 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:44434) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hjIaF-0005gb-W9 for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:42 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hjIa9-000698-5e for qemu-devel@nongnu.org; Fri, 05 Jul 2019 03:24:36 -0400 Received: from charlie.dont.surf ([128.199.63.193]:50552) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hjIZr-0005jO-Va; Fri, 05 Jul 2019 03:24:16 -0400 Received: from localhost.localdomain (ip-5-186-120-196.cgn.fibianet.dk [5.186.120.196]) by charlie.dont.surf (Postfix) with ESMTPSA id CDDAFC0656; Fri, 5 Jul 2019 07:23:54 +0000 (UTC) From: Klaus Birkelund Jensen To: qemu-block@nongnu.org Date: Fri, 5 Jul 2019 09:23:33 +0200 Message-Id: <20190705072333.17171-17-klaus@birkelund.eu> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190705072333.17171-1-klaus@birkelund.eu> References: <20190705072333.17171-1-klaus@birkelund.eu> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 128.199.63.193 Subject: [Qemu-devel] [PATCH 16/16] nvme: support multiple namespaces X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, matt.fitzpatrick@oakgatetech.com, qemu-devel@nongnu.org, armbru@redhat.com, keith.busch@intel.com, mreitz@redhat.com, lersek@redhat.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP This adds support for multiple namespaces by introducing a new 'nvme-ns' device model. The nvme device creates a bus named from the device name ('id'). The nvme-ns devices then connect to this and registers themselves with the nvme device. This changes how an nvme device is created. Example with two namespaces: -drive file=nvme0n1.img,if=none,id=disk1 -drive file=nvme0n2.img,if=none,id=disk2 -device nvme,serial=deadbeef,id=nvme0 -device nvme-ns,drive=disk1,bus=nvme0,nsid=1 -device nvme-ns,drive=disk2,bus=nvme0,nsid=2 A maximum of 256 namespaces can be configured. Signed-off-by: Klaus Birkelund Jensen --- hw/block/Makefile.objs | 2 +- hw/block/nvme-ns.c | 139 +++++++++++++++++++++++++++++++++ hw/block/nvme-ns.h | 35 +++++++++ hw/block/nvme.c | 169 ++++++++++++++++------------------------- hw/block/nvme.h | 29 ++++--- hw/block/trace-events | 1 + 6 files changed, 255 insertions(+), 120 deletions(-) create mode 100644 hw/block/nvme-ns.c create mode 100644 hw/block/nvme-ns.h diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs index f5f643f0cc06..d44a2f4b780d 100644 --- a/hw/block/Makefile.objs +++ b/hw/block/Makefile.objs @@ -7,7 +7,7 @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o common-obj-$(CONFIG_XEN) += xen-block.o common-obj-$(CONFIG_ECC) += ecc.o common-obj-$(CONFIG_ONENAND) += onenand.o -common-obj-$(CONFIG_NVME_PCI) += nvme.o +common-obj-$(CONFIG_NVME_PCI) += nvme.o nvme-ns.o obj-$(CONFIG_SH4) += tc58128.o diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c new file mode 100644 index 000000000000..11b594467991 --- /dev/null +++ b/hw/block/nvme-ns.c @@ -0,0 +1,139 @@ +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/cutils.h" +#include "qemu/log.h" +#include "hw/block/block.h" +#include "hw/pci/msix.h" +#include "sysemu/sysemu.h" +#include "sysemu/block-backend.h" +#include "qapi/error.h" + +#include "hw/qdev-core.h" + +#include "nvme.h" +#include "nvme-ns.h" + +static uint64_t nvme_ns_calc_blks(NvmeNamespace *ns) +{ + return ns->size / nvme_ns_lbads_bytes(ns); +} + +static void nvme_ns_init_identify(NvmeIdNs *id_ns) +{ + id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; +} + +static int nvme_ns_init(NvmeNamespace *ns) +{ + uint64_t ns_blks; + NvmeIdNs *id_ns = &ns->id_ns; + + nvme_ns_init_identify(id_ns); + + ns_blks = nvme_ns_calc_blks(ns); + id_ns->nuse = id_ns->ncap = id_ns->nsze = cpu_to_le64(ns_blks); + + return 0; +} + +static int nvme_ns_init_blk(NvmeNamespace *ns, NvmeIdCtrl *id, Error **errp) +{ + blkconf_blocksizes(&ns->conf); + + if (!blkconf_apply_backend_options(&ns->conf, + blk_is_read_only(ns->conf.blk), false, errp)) { + return 1; + } + + ns->size = blk_getlength(ns->conf.blk); + if (ns->size < 0) { + error_setg_errno(errp, -ns->size, "blk_getlength"); + return 1; + } + + if (!blk_enable_write_cache(ns->conf.blk)) { + id->vwc = 0; + } + + return 0; +} + +static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) +{ + if (!ns->conf.blk) { + error_setg(errp, "nvme-ns: block backend not configured"); + return 1; + } + + return 0; +} + + +static void nvme_ns_realize(DeviceState *dev, Error **errp) +{ + NvmeNamespace *ns = NVME_NS(dev); + BusState *s = qdev_get_parent_bus(dev); + NvmeCtrl *n = NVME(s->parent); + Error *local_err = NULL; + + if (nvme_ns_check_constraints(ns, &local_err)) { + error_propagate_prepend(errp, local_err, + "nvme_ns_check_constraints: "); + return; + } + + if (nvme_ns_init_blk(ns, &n->id_ctrl, &local_err)) { + error_propagate_prepend(errp, local_err, "nvme_ns_init_blk: "); + return; + } + + nvme_ns_init(ns); + if (nvme_register_namespace(n, ns, &local_err)) { + error_propagate_prepend(errp, local_err, "nvme_register_namespace: "); + return; + } +} + +static Property nvme_ns_props[] = { + DEFINE_BLOCK_PROPERTIES(NvmeNamespace, conf), + DEFINE_NVME_NS_PROPERTIES(NvmeNamespace, params), + DEFINE_PROP_END_OF_LIST(), +}; + +static void nvme_ns_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + + dc->bus_type = TYPE_NVME_BUS; + dc->realize = nvme_ns_realize; + dc->props = nvme_ns_props; + dc->desc = "virtual nvme namespace"; +} + +static void nvme_ns_instance_init(Object *obj) +{ + NvmeNamespace *ns = NVME_NS(obj); + char *bootindex = g_strdup_printf("/namespace@%d,0", ns->params.nsid); + + device_add_bootindex_property(obj, &ns->conf.bootindex, "bootindex", + bootindex, DEVICE(obj), &error_abort); + + g_free(bootindex); +} + +static const TypeInfo nvme_ns_info = { + .name = TYPE_NVME_NS, + .parent = TYPE_DEVICE, + .class_init = nvme_ns_class_init, + .instance_size = sizeof(NvmeNamespace), + .instance_init = nvme_ns_instance_init, +}; + +static void nvme_ns_register_types(void) +{ + type_register_static(&nvme_ns_info); +} + +type_init(nvme_ns_register_types) diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h new file mode 100644 index 000000000000..f563bb14eceb --- /dev/null +++ b/hw/block/nvme-ns.h @@ -0,0 +1,35 @@ +#ifndef NVME_NS_H +#define NVME_NS_H + +#define TYPE_NVME_NS "nvme-ns" +#define NVME_NS(obj) \ + OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) + +#define DEFINE_NVME_NS_PROPERTIES(_state, _props) \ + DEFINE_PROP_UINT32("nsid", _state, _props.nsid, 0) + +typedef struct NvmeNamespaceParams { + uint32_t nsid; +} NvmeNamespaceParams; + +typedef struct NvmeNamespace { + DeviceState parent_obj; + BlockConf conf; + int64_t size; + + NvmeIdNs id_ns; + NvmeNamespaceParams params; +} NvmeNamespace; + +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) +{ + NvmeIdNs *id = &ns->id_ns; + return id->lbaf[NVME_ID_NS_FLBAS_INDEX(id->flbas)].ds; +} + +static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns) +{ + return 1 << nvme_ns_lbads(ns); +} + +#endif /* NVME_NS_H */ diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 6bf62952dd13..6448798132d6 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -17,7 +17,8 @@ /** * Usage: add options: * -drive file=,if=none,id= - * -device nvme,drive=,serial=,id= + * -device nvme,serial=,id=nvme0 + * -device nvme-ns,drive=,bus=nvme0,nsid=1 * * Advanced optional options: * @@ -41,6 +42,7 @@ #include "trace.h" #include "nvme.h" +#include "nvme-ns.h" #define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE #define NVME_TEMPERATURE 0x143 @@ -848,15 +850,16 @@ static void nvme_rw_cb(void *opaque, int ret) NvmeSQueue *sq = req->sq; NvmeCtrl *n = sq->ctrl; NvmeCQueue *cq = n->cq[sq->cqid]; + NvmeNamespace *ns = req->ns; QTAILQ_REMOVE(&req->blk_req_tailq, blk_req, tailq_entry); - trace_nvme_rw_cb(req->cqe.cid, req->cmd.nsid); + trace_nvme_rw_cb(req->cqe.cid, ns->params.nsid); if (!ret) { - block_acct_done(blk_get_stats(n->conf.blk), &blk_req->acct); + block_acct_done(blk_get_stats(ns->conf.blk), &blk_req->acct); } else { - block_acct_failed(blk_get_stats(n->conf.blk), &blk_req->acct); + block_acct_failed(blk_get_stats(ns->conf.blk), &blk_req->acct); NVME_GUEST_ERR(nvme_err_internal_dev_error, "block request failed: %s", strerror(-ret)); req->status = NVME_INTERNAL_DEV_ERROR | NVME_DNR; @@ -871,6 +874,7 @@ static void nvme_rw_cb(void *opaque, int ret) static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { + NvmeNamespace *ns = req->ns; NvmeBlockBackendRequest *blk_req = nvme_blk_req_get(n, req, NULL); if (!blk_req) { NVME_GUEST_ERR(nvme_err_internal_dev_error, "nvme_blk_req_get: %s", @@ -878,9 +882,9 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_INTERNAL_DEV_ERROR; } - block_acct_start(blk_get_stats(n->conf.blk), &blk_req->acct, 0, + block_acct_start(blk_get_stats(ns->conf.blk), &blk_req->acct, 0, BLOCK_ACCT_FLUSH); - blk_req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, blk_req); + blk_req->aiocb = blk_aio_flush(ns->conf.blk, nvme_rw_cb, blk_req); QTAILQ_INSERT_TAIL(&req->blk_req_tailq, blk_req, tailq_entry); @@ -890,6 +894,7 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { NvmeRwCmd *rw = (NvmeRwCmd *)cmd; + NvmeNamespace *ns = req->ns; NvmeBlockBackendRequest *blk_req; const uint8_t lbads = nvme_ns_lbads(req->ns); uint64_t slba = le64_to_cpu(rw->slba); @@ -909,10 +914,10 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_INTERNAL_DEV_ERROR; } - block_acct_start(blk_get_stats(n->conf.blk), &blk_req->acct, 0, + block_acct_start(blk_get_stats(ns->conf.blk), &blk_req->acct, 0, BLOCK_ACCT_WRITE); - blk_req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count, + blk_req->aiocb = blk_aio_pwrite_zeroes(ns->conf.blk, offset, count, BDRV_REQ_MAY_UNMAP, nvme_rw_cb, blk_req); QTAILQ_INSERT_TAIL(&req->blk_req_tailq, blk_req, tailq_entry); @@ -929,7 +934,7 @@ static uint16_t nvme_rw_check_req(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t data_size = req->nlb << nvme_ns_lbads(ns); if (unlikely((req->slba + req->nlb) > ns->id_ns.nsze)) { - block_acct_invalid(blk_get_stats(n->conf.blk), req->is_write ? + block_acct_invalid(blk_get_stats(ns->conf.blk), req->is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); trace_nvme_err_invalid_lba_range(req->slba, req->nlb, ns->id_ns.nsze); return NVME_LBA_RANGE | NVME_DNR; @@ -950,18 +955,19 @@ static void nvme_blk_submit_dma(NvmeCtrl *n, NvmeBlockBackendRequest *blk_req, BlockCompletionFunc *cb) { NvmeRequest *req = blk_req->req; + NvmeNamespace *ns = req->ns; if (req->is_write) { - dma_acct_start(n->conf.blk, &blk_req->acct, blk_req->qsg, + dma_acct_start(ns->conf.blk, &blk_req->acct, blk_req->qsg, BLOCK_ACCT_WRITE); - blk_req->aiocb = dma_blk_write(n->conf.blk, blk_req->qsg, + blk_req->aiocb = dma_blk_write(ns->conf.blk, blk_req->qsg, blk_req->blk_offset, BDRV_SECTOR_SIZE, cb, blk_req); } else { - dma_acct_start(n->conf.blk, &blk_req->acct, blk_req->qsg, + dma_acct_start(ns->conf.blk, &blk_req->acct, blk_req->qsg, BLOCK_ACCT_READ); - blk_req->aiocb = dma_blk_read(n->conf.blk, blk_req->qsg, + blk_req->aiocb = dma_blk_read(ns->conf.blk, blk_req->qsg, blk_req->blk_offset, BDRV_SECTOR_SIZE, cb, blk_req); } } @@ -970,21 +976,22 @@ static void nvme_blk_submit_cmb(NvmeCtrl *n, NvmeBlockBackendRequest *blk_req, BlockCompletionFunc *cb) { NvmeRequest *req = blk_req->req; + NvmeNamespace *ns = req->ns; qemu_iovec_init(&blk_req->iov, blk_req->qsg->nsg); dma_to_cmb(n, blk_req->qsg, &blk_req->iov); if (req->is_write) { - block_acct_start(blk_get_stats(n->conf.blk), &blk_req->acct, + block_acct_start(blk_get_stats(ns->conf.blk), &blk_req->acct, blk_req->iov.size, BLOCK_ACCT_WRITE); - blk_req->aiocb = blk_aio_pwritev(n->conf.blk, blk_req->blk_offset, + blk_req->aiocb = blk_aio_pwritev(ns->conf.blk, blk_req->blk_offset, &blk_req->iov, 0, cb, blk_req); } else { - block_acct_start(blk_get_stats(n->conf.blk), &blk_req->acct, + block_acct_start(blk_get_stats(ns->conf.blk), &blk_req->acct, blk_req->iov.size, BLOCK_ACCT_READ); - blk_req->aiocb = blk_aio_preadv(n->conf.blk, blk_req->blk_offset, + blk_req->aiocb = blk_aio_preadv(ns->conf.blk, blk_req->blk_offset, &blk_req->iov, 0, cb, blk_req); } } @@ -1042,7 +1049,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_INVALID_NSID | NVME_DNR; } - req->ns = &n->namespace; + req->ns = n->namespaces[nsid - 1]; + switch (cmd->opcode) { case NVME_CMD_FLUSH: return nvme_flush(n, cmd, req); @@ -1302,7 +1310,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_INVALID_NSID | NVME_DNR; } - ns = &n->namespace; + ns = n->namespaces[nsid - 1]; return nvme_dma_read(n, (uint8_t *) &ns->id_ns, sizeof(ns->id_ns), cmd, req); @@ -1444,6 +1452,8 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { + NvmeNamespace *ns = req->ns; + uint32_t dw10 = le32_to_cpu(cmd->cdw10); uint32_t dw11 = le32_to_cpu(cmd->cdw11); uint32_t result; @@ -1464,7 +1474,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) result = cpu_to_le32(n->features.err_rec); break; case NVME_VOLATILE_WRITE_CACHE: - result = blk_enable_write_cache(n->conf.blk); + result = blk_enable_write_cache(ns->conf.blk); trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); break; case NVME_NUMBER_OF_QUEUES: @@ -1518,6 +1528,8 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { + NvmeNamespace *ns = req->ns; + uint32_t dw10 = le32_to_cpu(cmd->cdw10); uint32_t dw11 = le32_to_cpu(cmd->cdw11); @@ -1532,7 +1544,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) } break; case NVME_VOLATILE_WRITE_CACHE: - blk_set_enable_write_cache(n->conf.blk, dw11 & 1); + blk_set_enable_write_cache(ns->conf.blk, dw11 & 1); break; case NVME_NUMBER_OF_QUEUES: if (n->qs_created > 2) { @@ -1835,7 +1847,9 @@ static void nvme_clear_ctrl(NvmeCtrl *n) NvmeAsyncEvent *event; int i; - blk_drain(n->conf.blk); + for (int i = 0; i < n->num_namespaces; i++) { + blk_drain(n->namespaces[i]->conf.blk); + } for (i = 0; i < n->params.num_queues; i++) { if (n->sq[i] != NULL) { @@ -1858,7 +1872,10 @@ static void nvme_clear_ctrl(NvmeCtrl *n) g_free(event); } - blk_flush(n->conf.blk); + for (int i = 0; i < n->num_namespaces; i++) { + blk_flush(n->namespaces[i]->conf.blk); + } + n->bar.cc = 0; n->outstanding_aers = 0; } @@ -2280,8 +2297,8 @@ static int nvme_check_constraints(NvmeCtrl *n, Error **errp) { NvmeParams *params = &n->params; - if (!n->conf.blk) { - error_setg(errp, "nvme: block backend not configured"); + if (!n->parent_obj.qdev.id) { + error_setg(errp, "nvme: invalid 'id' parameter"); return 1; } @@ -2298,20 +2315,9 @@ static int nvme_check_constraints(NvmeCtrl *n, Error **errp) return 0; } -static int nvme_init_blk(NvmeCtrl *n, Error **errp) -{ - blkconf_blocksizes(&n->conf); - if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), - false, errp)) { - return 1; - } - - return 0; -} - static void nvme_init_state(NvmeCtrl *n) { - n->num_namespaces = 1; + n->num_namespaces = 0; n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4); n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); n->sq = g_new0(NvmeSQueue *, n->params.num_queues); @@ -2404,11 +2410,7 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->cqes = (0x4 << 4) | 0x4; id->nn = cpu_to_le32(n->num_namespaces); id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP); - - if (blk_enable_write_cache(n->conf.blk)) { - id->vwc = 1; - } - + id->vwc = 1; id->sgls = cpu_to_le32(0x1); strcpy((char *) id->subnqn, "nqn.2014-08.org.nvmexpress:uuid:"); @@ -2430,52 +2432,26 @@ static void nvme_init_ctrl(NvmeCtrl *n) n->bar.intmc = n->bar.intms = 0; } -static uint64_t nvme_ns_calc_blks(NvmeCtrl *n, NvmeNamespace *ns) -{ - return n->ns_size / nvme_ns_lbads_bytes(ns); -} - -static void nvme_ns_init_identify(NvmeCtrl *n, NvmeIdNs *id_ns) -{ - id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; - id_ns->ncap = id_ns->nuse = id_ns->nsze = - cpu_to_le64(n->ns_size >> - id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)].ds); -} - -static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) -{ - uint64_t ns_blks; - NvmeIdNs *id_ns = &ns->id_ns; - - nvme_ns_init_identify(n, id_ns); - - ns_blks = nvme_ns_calc_blks(n, ns); - id_ns->nuse = id_ns->ncap = id_ns->nsze = cpu_to_le64(ns_blks); - - return 0; -} - -static int nvme_init_namespaces(NvmeCtrl *n, Error **errp) +int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) { - int64_t bs_size; - Error *local_err = NULL; - NvmeNamespace *ns = &n->namespace; + uint32_t nsid = ns->params.nsid; - bs_size = blk_getlength(n->conf.blk); - if (bs_size < 0) { - error_setg_errno(errp, -bs_size, "blk_getlength"); + if (nsid == 0 || nsid > NVME_MAX_NAMESPACES) { + error_setg(errp, "invalid namespace id"); return 1; } - n->ns_size = bs_size / (uint64_t) n->num_namespaces; - - if (nvme_init_namespace(n, ns, &local_err)) { - error_propagate_prepend(errp, local_err, - "nvme_init_namespace: "); + if (n->namespaces[nsid - 1]) { + error_setg(errp, "namespace ids must be unique"); return 1; } + trace_nvme_register_namespace(nsid); + + n->namespaces[nsid - 1] = ns; + n->num_namespaces++; + n->id_ctrl.nn++; + return 0; } @@ -2489,19 +2465,10 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) return; } - nvme_init_state(n); - - if (nvme_init_blk(n, &local_err)) { - error_propagate_prepend(errp, local_err, "nvme_init_blk: "); - return; - } - - if (nvme_init_namespaces(n, &local_err)) { - error_propagate_prepend(errp, local_err, - "nvme_init_namespaces: "); - return; - } + qbus_create_inplace(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS, + &pci_dev->qdev, n->parent_obj.qdev.id); + nvme_init_state(n); nvme_init_pci(n, pci_dev); nvme_init_ctrl(n); } @@ -2524,7 +2491,6 @@ static void nvme_exit(PCIDevice *pci_dev) } static Property nvme_props[] = { - DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), DEFINE_NVME_PROPERTIES(NvmeCtrl, params), DEFINE_PROP_END_OF_LIST(), }; @@ -2552,30 +2518,27 @@ static void nvme_class_init(ObjectClass *oc, void *data) dc->vmsd = &nvme_vmstate; } -static void nvme_instance_init(Object *obj) -{ - NvmeCtrl *s = NVME(obj); - - device_add_bootindex_property(obj, &s->conf.bootindex, - "bootindex", "/namespace@1,0", - DEVICE(obj), &error_abort); -} - static const TypeInfo nvme_info = { .name = TYPE_NVME, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(NvmeCtrl), .class_init = nvme_class_init, - .instance_init = nvme_instance_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_PCIE_DEVICE }, { } }, }; +static const TypeInfo nvme_bus_info = { + .name = TYPE_NVME_BUS, + .parent = TYPE_BUS, + .instance_size = sizeof(NvmeBus), +}; + static void nvme_register_types(void) { type_register_static(&nvme_info); + type_register_static(&nvme_bus_info); } type_init(nvme_register_types) diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 1d52b183d263..9aff5c82a51b 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -2,6 +2,9 @@ #define HW_NVME_H #include "block/nvme.h" +#include "nvme-ns.h" + +#define NVME_MAX_NAMESPACES 256 #define DEFINE_NVME_PROPERTIES(_state, _props) \ DEFINE_PROP_STRING("serial", _state, _props.serial), \ @@ -86,9 +89,12 @@ typedef struct NvmeCQueue { QTAILQ_HEAD(, NvmeRequest) req_list; } NvmeCQueue; -typedef struct NvmeNamespace { - NvmeIdNs id_ns; -} NvmeNamespace; +#define TYPE_NVME_BUS "nvme-bus" +#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS) + +typedef struct NvmeBus { + BusState parent_bus; +} NvmeBus; #define TYPE_NVME "nvme" #define NVME(obj) \ @@ -99,7 +105,6 @@ typedef struct NvmeCtrl { MemoryRegion iomem; MemoryRegion ctrl_mem; NvmeBar bar; - BlockConf conf; NvmeParams params; uint64_t starttime_ms; @@ -112,7 +117,6 @@ typedef struct NvmeCtrl { uint32_t reg_size; uint32_t num_namespaces; uint32_t max_q_ents; - uint64_t ns_size; uint8_t outstanding_aers; uint32_t cmbsz; uint32_t cmbloc; @@ -128,13 +132,15 @@ typedef struct NvmeCtrl { QSIMPLEQ_HEAD(, NvmeAsyncEvent) aer_queue; NvmeErrorLog *elpes; - NvmeNamespace namespace; + NvmeNamespace *namespaces[NVME_MAX_NAMESPACES]; NvmeSQueue **sq; NvmeCQueue **cq; NvmeSQueue admin_sq; NvmeCQueue admin_cq; NvmeFeatureVal features; NvmeIdCtrl id_ctrl; + + NvmeBus bus; } NvmeCtrl; static inline bool nvme_rw_is_write(NvmeRequest *req) @@ -148,15 +154,6 @@ static inline bool nvme_is_error(uint16_t status, uint16_t err) return (status & 0xfff) == err; } -static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) -{ - NvmeIdNs *id = &ns->id_ns; - return id->lbaf[NVME_ID_NS_FLBAS_INDEX(id->flbas)].ds; -} - -static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns) -{ - return 1 << nvme_ns_lbads(ns); -} +int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp); #endif /* HW_NVME_H */ diff --git a/hw/block/trace-events b/hw/block/trace-events index b239e92294e4..0809c248aa54 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -29,6 +29,7 @@ hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int t # nvme.c # nvme traces for successful events +nvme_register_namespace(uint32_t nsid) "nsid %"PRIu32"" nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" nvme_irq_pin(void) "pulsing IRQ pin" nvme_irq_masked(void) "IRQ is masked"