diff mbox series

[RFC,19/29] vfio/vgpu_mgr: introdcue vGPU lifecycle management prelude

Message ID 20240922124951.1946072-20-zhiw@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Introduce NVIDIA GPU Virtualization (vGPU) Support | expand

Commit Message

Zhi Wang Sept. 22, 2024, 12:49 p.m. UTC
To introduce the routines when creating a vGPU one by one in the
following patches, first, introduce the prelude of the vGPU lifecycle
management as the skeleton.

Introduce NVIDIA vGPU manager core module that hosting the vGPU lifecycle
managemement data structures and routines.

Cc: Neo Jia <cjia@nvidia.com>
Cc: Surath Mitra <smitra@nvidia.com>
Signed-off-by: Zhi Wang <zhiw@nvidia.com>
---
 drivers/vfio/pci/Kconfig                |  2 +
 drivers/vfio/pci/Makefile               |  2 +
 drivers/vfio/pci/nvidia-vgpu/Kconfig    | 13 ++++
 drivers/vfio/pci/nvidia-vgpu/Makefile   |  3 +
 drivers/vfio/pci/nvidia-vgpu/nvkm.h     | 46 ++++++++++++
 drivers/vfio/pci/nvidia-vgpu/vgpu.c     | 83 +++++++++++++++++++++
 drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.c | 99 +++++++++++++++++++++++++
 drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h | 43 +++++++++++
 8 files changed, 291 insertions(+)
 create mode 100644 drivers/vfio/pci/nvidia-vgpu/Kconfig
 create mode 100644 drivers/vfio/pci/nvidia-vgpu/Makefile
 create mode 100644 drivers/vfio/pci/nvidia-vgpu/nvkm.h
 create mode 100644 drivers/vfio/pci/nvidia-vgpu/vgpu.c
 create mode 100644 drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.c
 create mode 100644 drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h
diff mbox series

Patch

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 15821a2d77d2..4b42378afc1a 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -69,4 +69,6 @@  source "drivers/vfio/pci/virtio/Kconfig"
 
 source "drivers/vfio/pci/nvgrace-gpu/Kconfig"
 
+source "drivers/vfio/pci/nvidia-vgpu/Kconfig"
+
 endmenu
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index ce7a61f1d912..88f722c5c161 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -17,3 +17,5 @@  obj-$(CONFIG_PDS_VFIO_PCI) += pds/
 obj-$(CONFIG_VIRTIO_VFIO_PCI) += virtio/
 
 obj-$(CONFIG_NVGRACE_GPU_VFIO_PCI) += nvgrace-gpu/
+
+obj-$(CONFIG_NVIDIA_VGPU_VFIO_PCI) += nvidia-vgpu/
diff --git a/drivers/vfio/pci/nvidia-vgpu/Kconfig b/drivers/vfio/pci/nvidia-vgpu/Kconfig
new file mode 100644
index 000000000000..a9b28e944902
--- /dev/null
+++ b/drivers/vfio/pci/nvidia-vgpu/Kconfig
@@ -0,0 +1,13 @@ 
+# SPDX-License-Identifier: GPL-2.0-only
+config NVIDIA_VGPU_MGR
+	tristate
+
+config NVIDIA_VGPU_VFIO_PCI
+	tristate "VFIO support for the NVIDIA vGPU"
+	select NVIDIA_VGPU_MGR
+	select VFIO_PCI_CORE
+	help
+	  VFIO support for the NVIDIA vGPU is required to assign the vGPU
+	  to userspace using KVM/qemu/etc.
+
+	  If you don't know what to do here, say N.
diff --git a/drivers/vfio/pci/nvidia-vgpu/Makefile b/drivers/vfio/pci/nvidia-vgpu/Makefile
new file mode 100644
index 000000000000..1d2c0eb1fa5c
--- /dev/null
+++ b/drivers/vfio/pci/nvidia-vgpu/Makefile
@@ -0,0 +1,3 @@ 
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_NVIDIA_VGPU_MGR) += nvidia-vgpu-mgr.o
+nvidia-vgpu-mgr-y := vgpu_mgr.o vgpu.o
diff --git a/drivers/vfio/pci/nvidia-vgpu/nvkm.h b/drivers/vfio/pci/nvidia-vgpu/nvkm.h
new file mode 100644
index 000000000000..4c75431ee1f6
--- /dev/null
+++ b/drivers/vfio/pci/nvidia-vgpu/nvkm.h
@@ -0,0 +1,46 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright © 2024 NVIDIA Corporation
+ */
+#ifndef __NVIDIA_VGPU_MGR_NVKM_H__
+#define __NVIDIA_VGPU_MGR_NVKM_H__
+
+#include <linux/pci.h>
+#include <drm/nvkm_vgpu_mgr_vfio.h>
+
+struct nvidia_vgpu_mgr_handle {
+	void *pf_drvdata;
+	struct nvkm_vgpu_mgr_vfio_ops *ops;
+	struct nvidia_vgpu_vfio_handle_data data;
+};
+
+static inline int nvidia_vgpu_mgr_get_handle(struct pci_dev *pdev,
+		struct nvidia_vgpu_mgr_handle *h)
+{
+	struct pci_dev *pf_dev;
+
+	if (!pdev->is_virtfn)
+		return -EINVAL;
+
+	pf_dev = pdev->physfn;
+
+	if (strcmp(pf_dev->driver->name, "nvkm"))
+		return -EINVAL;
+
+	h->pf_drvdata = pci_get_drvdata(pf_dev);
+	h->ops = nvkm_vgpu_mgr_get_vfio_ops(h->pf_drvdata);
+	h->ops->get_handle(h->pf_drvdata, &h->data);
+
+	return 0;
+}
+
+#define nvidia_vgpu_mgr_support_is_enabled(h) \
+	(h).ops->vgpu_mgr_is_enabled((h).pf_drvdata)
+
+#define nvidia_vgpu_mgr_attach_handle(h) \
+	(h)->ops->attach_handle((h)->pf_drvdata, &(h)->data)
+
+#define nvidia_vgpu_mgr_detach_handle(h) \
+	(h)->ops->detach_handle((h)->pf_drvdata)
+
+#endif
diff --git a/drivers/vfio/pci/nvidia-vgpu/vgpu.c b/drivers/vfio/pci/nvidia-vgpu/vgpu.c
new file mode 100644
index 000000000000..34f6adb9dfe4
--- /dev/null
+++ b/drivers/vfio/pci/nvidia-vgpu/vgpu.c
@@ -0,0 +1,83 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright © 2024 NVIDIA Corporation
+ */
+
+#include "vgpu_mgr.h"
+
+static void unregister_vgpu(struct nvidia_vgpu *vgpu)
+{
+	struct nvidia_vgpu_mgr *vgpu_mgr = vgpu->vgpu_mgr;
+
+	mutex_lock(&vgpu_mgr->vgpu_id_lock);
+
+	vgpu_mgr->vgpus[vgpu->info.id] = NULL;
+	atomic_dec(&vgpu_mgr->num_vgpus);
+
+	mutex_unlock(&vgpu_mgr->vgpu_id_lock);
+}
+
+static int register_vgpu(struct nvidia_vgpu *vgpu)
+{
+	struct nvidia_vgpu_mgr *vgpu_mgr = vgpu->vgpu_mgr;
+
+	mutex_lock(&vgpu_mgr->vgpu_id_lock);
+
+	if (vgpu_mgr->vgpus[vgpu->info.id]) {
+		mutex_unlock(&vgpu_mgr->vgpu_id_lock);
+		return -EBUSY;
+	}
+	vgpu_mgr->vgpus[vgpu->info.id] = vgpu;
+	atomic_inc(&vgpu_mgr->num_vgpus);
+
+	mutex_unlock(&vgpu_mgr->vgpu_id_lock);
+	return 0;
+}
+
+/**
+ * nvidia_vgpu_mgr_destroy_vgpu - destroy a vGPU instance
+ * @vgpu: the vGPU instance going to be destroyed.
+ *
+ * Returns: 0 on success, others on failure.
+ */
+int nvidia_vgpu_mgr_destroy_vgpu(struct nvidia_vgpu *vgpu)
+{
+	if (!atomic_cmpxchg(&vgpu->status, 1, 0))
+		return -ENODEV;
+
+	unregister_vgpu(vgpu);
+	return 0;
+}
+EXPORT_SYMBOL(nvidia_vgpu_mgr_destroy_vgpu);
+
+/**
+ * nvidia_vgpu_mgr_create_vgpu - create a vGPU instance
+ * @vgpu: the vGPU instance going to be created.
+ * @vgpu_type: the vGPU type of the vGPU instance.
+ *
+ * The caller must initialize vgpu->vgpu_mgr, gpu->info, vgpu->pdev.
+ *
+ * Returns: 0 on success, others on failure.
+ */
+int nvidia_vgpu_mgr_create_vgpu(struct nvidia_vgpu *vgpu, u8 *vgpu_type)
+{
+	int ret;
+
+	if (WARN_ON(vgpu->info.id >= NVIDIA_MAX_VGPUS))
+		return -EINVAL;
+
+	if (WARN_ON(!vgpu->vgpu_mgr || !vgpu->info.gfid || !vgpu->info.dbdf))
+		return -EINVAL;
+
+	mutex_init(&vgpu->lock);
+	vgpu->vgpu_type = vgpu_type;
+
+	ret = register_vgpu(vgpu);
+	if (ret)
+		return ret;
+
+	atomic_set(&vgpu->status, 1);
+
+	return 0;
+}
+EXPORT_SYMBOL(nvidia_vgpu_mgr_create_vgpu);
diff --git a/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.c b/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.c
new file mode 100644
index 000000000000..dc2a73f95650
--- /dev/null
+++ b/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.c
@@ -0,0 +1,99 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright © 2024 NVIDIA Corporation
+ */
+
+#include "vgpu_mgr.h"
+
+DEFINE_MUTEX(vgpu_mgr_attach_lock);
+
+static void vgpu_mgr_release(struct kref *kref)
+{
+	struct nvidia_vgpu_mgr *vgpu_mgr =
+		container_of(kref, struct nvidia_vgpu_mgr, refcount);
+
+	nvidia_vgpu_mgr_detach_handle(&vgpu_mgr->handle);
+	kvfree(vgpu_mgr);
+}
+
+/**
+ * nvidia_vgpu_mgr_put - put the vGPU manager
+ * @vgpu: the vGPU manager to put.
+ *
+ */
+void nvidia_vgpu_mgr_put(struct nvidia_vgpu_mgr *vgpu_mgr)
+{
+	if (!nvidia_vgpu_mgr_support_is_enabled(vgpu_mgr->handle))
+		return;
+
+	mutex_lock(&vgpu_mgr_attach_lock);
+	kref_put(&vgpu_mgr->refcount, vgpu_mgr_release);
+	mutex_unlock(&vgpu_mgr_attach_lock);
+}
+EXPORT_SYMBOL(nvidia_vgpu_mgr_put);
+
+/**
+ * nvidia_vgpu_mgr_get - get the vGPU manager
+ * @dev: the VF pci_dev.
+ *
+ * Returns: pointer to vgpu_mgr on success, IS_ERR() on failure.
+ */
+struct nvidia_vgpu_mgr *nvidia_vgpu_mgr_get(struct pci_dev *dev)
+{
+	struct nvidia_vgpu_mgr *vgpu_mgr;
+	struct nvidia_vgpu_mgr_handle handle;
+	int ret;
+
+	mutex_lock(&vgpu_mgr_attach_lock);
+
+	memset(&handle, 0, sizeof(handle));
+
+	ret = nvidia_vgpu_mgr_get_handle(dev, &handle);
+	if (ret) {
+		mutex_unlock(&vgpu_mgr_attach_lock);
+		return ERR_PTR(ret);
+	}
+
+	if (!nvidia_vgpu_mgr_support_is_enabled(handle)) {
+		mutex_unlock(&vgpu_mgr_attach_lock);
+		return ERR_PTR(-ENODEV);
+	}
+
+	if (handle.data.priv) {
+		vgpu_mgr = handle.data.priv;
+		kref_get(&vgpu_mgr->refcount);
+		mutex_unlock(&vgpu_mgr_attach_lock);
+		return vgpu_mgr;
+	}
+
+	vgpu_mgr = kvzalloc(sizeof(*vgpu_mgr), GFP_KERNEL);
+	if (!vgpu_mgr) {
+		ret = -ENOMEM;
+		goto fail_alloc_vgpu_mgr;
+	}
+
+	vgpu_mgr->handle = handle;
+	vgpu_mgr->handle.data.priv = vgpu_mgr;
+
+	ret = nvidia_vgpu_mgr_attach_handle(&handle);
+	if (ret)
+		goto fail_attach_handle;
+
+	kref_init(&vgpu_mgr->refcount);
+	mutex_init(&vgpu_mgr->vgpu_id_lock);
+
+	mutex_unlock(&vgpu_mgr_attach_lock);
+	return vgpu_mgr;
+
+fail_attach_handle:
+	kvfree(vgpu_mgr);
+fail_alloc_vgpu_mgr:
+	mutex_unlock(&vgpu_mgr_attach_lock);
+	vgpu_mgr = ERR_PTR(ret);
+	return vgpu_mgr;
+}
+EXPORT_SYMBOL(nvidia_vgpu_mgr_get);
+
+MODULE_LICENSE("Dual MIT/GPL");
+MODULE_AUTHOR("Zhi Wang <zhiw@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA VGPU manager - core module to support VFIO PCI driver for NVIDIA vGPU");
diff --git a/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h b/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h
new file mode 100644
index 000000000000..2efd96644098
--- /dev/null
+++ b/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h
@@ -0,0 +1,43 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright © 2024 NVIDIA Corporation
+ */
+#ifndef __NVIDIA_VGPU_MGR_H__
+#define __NVIDIA_VGPU_MGR_H__
+
+#include "nvkm.h"
+
+#define NVIDIA_MAX_VGPUS 2
+
+struct nvidia_vgpu_info {
+	int id;
+	u32 gfid;
+	u32 dbdf;
+};
+
+struct nvidia_vgpu {
+	struct mutex lock;
+	atomic_t status;
+	struct pci_dev *pdev;
+
+	u8 *vgpu_type;
+	struct nvidia_vgpu_info info;
+	struct nvidia_vgpu_mgr *vgpu_mgr;
+};
+
+struct nvidia_vgpu_mgr {
+	struct kref refcount;
+	struct nvidia_vgpu_mgr_handle handle;
+
+	struct mutex vgpu_id_lock;
+	struct nvidia_vgpu *vgpus[NVIDIA_MAX_VGPUS];
+	atomic_t num_vgpus;
+};
+
+struct nvidia_vgpu_mgr *nvidia_vgpu_mgr_get(struct pci_dev *dev);
+void nvidia_vgpu_mgr_put(struct nvidia_vgpu_mgr *vgpu_mgr);
+
+int nvidia_vgpu_mgr_destroy_vgpu(struct nvidia_vgpu *vgpu);
+int nvidia_vgpu_mgr_create_vgpu(struct nvidia_vgpu *vgpu, u8 *vgpu_type);
+
+#endif