@@ -35,6 +35,7 @@
* mdts=<N[optional]>,vsl=<N[optional]>, \
* zoned.zasl=<N[optional]>, \
* zoned.auto_transition=<on|off[optional]>, \
+ * sriov_max_vfs=<N[optional]> \
* subsys=<subsys_id>
* -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
* zoned=<true|false[optional]>, \
@@ -106,6 +107,12 @@
* transitioned to zone state closed for resource management purposes.
* Defaults to 'on'.
*
+ * - `sriov_max_vfs`
+ * Indicates the maximum number of PCIe virtual functions supported
+ * by the controller. The default value is 0. Specifying a non-zero value
+ * enables reporting of both SR-IOV and ARI capabilities by the NVMe device.
+ * Virtual function controllers will not report SR-IOV capability.
+ *
* nvme namespace device parameters
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* - `shared`
@@ -160,6 +167,7 @@
#include "sysemu/block-backend.h"
#include "sysemu/hostmem.h"
#include "hw/pci/msix.h"
+#include "hw/pci/pcie_sriov.h"
#include "migration/vmstate.h"
#include "nvme.h"
@@ -175,6 +183,9 @@
#define NVME_TEMPERATURE_CRITICAL 0x175
#define NVME_NUM_FW_SLOTS 1
#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
+#define NVME_MAX_VFS 127
+#define NVME_VF_OFFSET 0x1
+#define NVME_VF_STRIDE 1
#define NVME_GUEST_ERR(trace, fmt, ...) \
do { \
@@ -5583,6 +5594,10 @@ static void nvme_ctrl_reset(NvmeCtrl *n)
g_free(event);
}
+ if (!pci_is_vf(&n->parent_obj) && n->params.sriov_max_vfs) {
+ pcie_sriov_pf_disable_vfs(&n->parent_obj);
+ }
+
n->aer_queued = 0;
n->outstanding_aers = 0;
n->qs_created = false;
@@ -6264,6 +6279,29 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp)
error_setg(errp, "vsl must be non-zero");
return;
}
+
+ if (params->sriov_max_vfs) {
+ if (!n->subsys) {
+ error_setg(errp, "subsystem is required for the use of SR-IOV");
+ return;
+ }
+
+ if (params->sriov_max_vfs > NVME_MAX_VFS) {
+ error_setg(errp, "sriov_max_vfs must be between 0 and %d",
+ NVME_MAX_VFS);
+ return;
+ }
+
+ if (params->cmb_size_mb) {
+ error_setg(errp, "CMB is not supported with SR-IOV");
+ return;
+ }
+
+ if (n->pmr.dev) {
+ error_setg(errp, "PMR is not supported with SR-IOV");
+ return;
+ }
+ }
}
static void nvme_init_state(NvmeCtrl *n)
@@ -6321,6 +6359,20 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)
memory_region_set_enabled(&n->pmr.dev->mr, false);
}
+static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset,
+ uint64_t bar_size)
+{
+ uint16_t vf_dev_id = n->params.use_intel_id ?
+ PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
+
+ pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
+ n->params.sriov_max_vfs, n->params.sriov_max_vfs,
+ NVME_VF_OFFSET, NVME_VF_STRIDE);
+
+ pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size);
+}
+
static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
{
uint8_t *pci_conf = pci_dev->config;
@@ -6335,7 +6387,7 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
if (n->params.use_intel_id) {
pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
- pci_config_set_device_id(pci_conf, 0x5845);
+ pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_NVME);
} else {
pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT);
pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME);
@@ -6343,6 +6395,9 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
pcie_endpoint_cap_init(pci_dev, 0x80);
+ if (n->params.sriov_max_vfs) {
+ pcie_ari_init(pci_dev, 0x100, 1);
+ }
bar_size = QEMU_ALIGN_UP(n->reg_size, 4 * KiB);
msix_table_offset = bar_size;
@@ -6361,8 +6416,12 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
n->reg_size);
memory_region_add_subregion(&n->bar0, 0, &n->iomem);
- pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
- PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
+ if (pci_is_vf(pci_dev)) {
+ pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0);
+ } else {
+ pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
+ }
ret = msix_init(pci_dev, n->params.msix_qsize,
&n->bar0, 0, msix_table_offset,
&n->bar0, 0, msix_pba_offset, 0, &err);
@@ -6383,6 +6442,10 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
nvme_init_pmr(n, pci_dev);
}
+ if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
+ nvme_init_sriov(n, pci_dev, 0x120, bar_size);
+ }
+
return 0;
}
@@ -6532,6 +6595,15 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
NvmeCtrl *n = NVME(pci_dev);
NvmeNamespace *ns;
Error *local_err = NULL;
+ NvmeCtrl *pn = NVME(pcie_sriov_get_pf(pci_dev));
+
+ if (pci_is_vf(pci_dev)) {
+ /* VFs derive settings from the parent. PF's lifespan exceeds
+ * that of VF's, so it's safe to share params.serial.
+ */
+ memcpy(&n->params, &pn->params, sizeof(NvmeParams));
+ n->subsys = pn->subsys;
+ }
nvme_check_constraints(n, &local_err);
if (local_err) {
@@ -6596,6 +6668,11 @@ static void nvme_exit(PCIDevice *pci_dev)
if (n->pmr.dev) {
host_memory_backend_set_mapped(n->pmr.dev, false);
}
+
+ if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
+ pcie_sriov_pf_exit(pci_dev);
+ }
+
msix_uninit(pci_dev, &n->bar0, &n->bar0);
memory_region_del_subregion(&n->bar0, &n->iomem);
}
@@ -6620,6 +6697,7 @@ static Property nvme_props[] = {
DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
params.auto_transition_zones, true),
+ DEFINE_PROP_UINT8("sriov_max_vfs", NvmeCtrl, params.sriov_max_vfs, 0),
DEFINE_PROP_END_OF_LIST(),
};
@@ -24,7 +24,7 @@
#include "block/nvme.h"
-#define NVME_MAX_CONTROLLERS 32
+#define NVME_MAX_CONTROLLERS 256
#define NVME_MAX_NAMESPACES 256
#define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
@@ -391,6 +391,7 @@ typedef struct NvmeParams {
uint8_t zasl;
bool auto_transition_zones;
bool legacy_cmb;
+ uint8_t sriov_max_vfs;
} NvmeParams;
typedef struct NvmeCtrl {
@@ -237,6 +237,7 @@
#define PCI_DEVICE_ID_INTEL_82801BA_11 0x244e
#define PCI_DEVICE_ID_INTEL_82801D 0x24CD
#define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab
+#define PCI_DEVICE_ID_INTEL_NVME 0x5845
#define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000
#define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010
#define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020