[RFC,29/29] vfio/vgpu_mgr: introduce NVIDIA vGPU VFIO variant driver

Message ID	20240922124951.1946072-30-zhiw@nvidia.com (mailing list archive)
State	New
Headers	show Received: from NAM02-SN1-obe.outbound.protection.outlook.com (mail-sn1nam02on2081.outbound.protection.outlook.com [40.107.96.81]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1B82B7489 for <kvm@vger.kernel.org>; Sun, 22 Sep 2024 12:51:05 +0000 (UTC) Received-SPF: Pass (protection.outlook.com: domain of nvidia.com designates 216.228.118.233 as permitted sender) receiver=protection.outlook.com; client-ip=216.228.118.233; helo=mail.nvidia.com; pr=C From: Zhi Wang <zhiw@nvidia.com> To: <kvm@vger.kernel.org>, <nouveau@lists.freedesktop.org> CC: <alex.williamson@redhat.com>, <kevin.tian@intel.com>, <jgg@nvidia.com>, <airlied@gmail.com>, <daniel@ffwll.ch>, <acurrid@nvidia.com>, <cjia@nvidia.com>, <smitra@nvidia.com>, <ankita@nvidia.com>, <aniketa@nvidia.com>, <kwankhede@nvidia.com>, <targupta@nvidia.com>, <zhiw@nvidia.com>, <zhiwang@kernel.org>, Vinay Kabra <vkabra@nvidia.com> Subject: [RFC 29/29] vfio/vgpu_mgr: introduce NVIDIA vGPU VFIO variant driver Date: Sun, 22 Sep 2024 05:49:51 -0700 Message-ID: <20240922124951.1946072-30-zhiw@nvidia.com> In-Reply-To: <20240922124951.1946072-1-zhiw@nvidia.com> References: <20240922124951.1946072-1-zhiw@nvidia.com> Precedence: bulk MIME-Version: 1.0 Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: 8bit
Series	Introduce NVIDIA GPU Virtualization (vGPU) Support \| expand [RFC,00/29] Introduce NVIDIA GPU Virtualization (vGPU) Support [RFC,01/29] nvkm/vgpu: introduce NVIDIA vGPU support prelude [RFC,02/29] nvkm/vgpu: attach to nvkm as a nvkm client [RFC,03/29] nvkm/vgpu: reserve a larger GSP heap when NVIDIA vGPU is enabled [RFC,04/29] nvkm/vgpu: set the VF partition count when NVIDIA vGPU is enabled [RFC,05/29] nvkm/vgpu: populate GSP_VF_INFO when NVIDIA vGPU is enabled [RFC,06/29] nvkm/vgpu: set RMSetSriovMode when NVIDIA vGPU is enabled [RFC,07/29] nvkm/gsp: add a notify handler for GSP event GPUACCT_PERFMON_UTIL_SAMPLES [RFC,08/29] nvkm/vgpu: get the size VMMU segment from GSP firmware [RFC,09/29] nvkm/vgpu: introduce the reserved channel allocator [RFC,10/29] nvkm/vgpu: introduce interfaces for NVIDIA vGPU VFIO module [RFC,11/29] nvkm/vgpu: introduce GSP RM client alloc and free for vGPU [RFC,12/29] nvkm/vgpu: introduce GSP RM control interface for vGPU [RFC,13/29] nvkm: move chid.h to nvkm/engine. [RFC,14/29] nvkm/vgpu: introduce channel allocation for vGPU [RFC,15/29] nvkm/vgpu: introduce FB memory allocation for vGPU [RFC,16/29] nvkm/vgpu: introduce BAR1 map routines for vGPUs [RFC,17/29] nvkm/vgpu: introduce engine bitmap for vGPU [RFC,18/29] nvkm/vgpu: introduce pci_driver.sriov_configure() in nvkm [RFC,19/29] vfio/vgpu_mgr: introdcue vGPU lifecycle management prelude [RFC,20/29] vfio/vgpu_mgr: allocate GSP RM client for NVIDIA vGPU manager [RFC,21/29] vfio/vgpu_mgr: introduce vGPU type uploading [RFC,22/29] vfio/vgpu_mgr: allocate vGPU FB memory when creating vGPUs [RFC,23/29] vfio/vgpu_mgr: allocate vGPU channels when creating vGPUs [RFC,24/29] vfio/vgpu_mgr: allocate mgmt heap when creating vGPUs [RFC,25/29] vfio/vgpu_mgr: map mgmt heap when creating a vGPU [RFC,26/29] vfio/vgpu_mgr: allocate GSP RM client when creating vGPUs [RFC,27/29] vfio/vgpu_mgr: bootload the new vGPU [RFC,28/29] vfio/vgpu_mgr: introduce vGPU host RPC channel [RFC,29/29] vfio/vgpu_mgr: introduce NVIDIA vGPU VFIO variant driver

diff --git a/drivers/vfio/pci/nvidia-vgpu/Makefile b/drivers/vfio/pci/nvidia-vgpu/Makefile index fade9d49df97..99c47e2f436d 100644 --- a/drivers/vfio/pci/nvidia-vgpu/Makefile +++ b/drivers/vfio/pci/nvidia-vgpu/Makefile @@ -3,3 +3,6 @@ ccflags-y += -I$(srctree)/$(src)/include obj-$(CONFIG_NVIDIA_VGPU_MGR) += nvidia-vgpu-mgr.o nvidia-vgpu-mgr-y := vgpu_mgr.o vgpu.o vgpu_types.o rpc.o + +obj-$(CONFIG_NVIDIA_VGPU_VFIO_PCI) += nvidia-vgpu-vfio-pci.o +nvidia-vgpu-vfio-pci-y := vfio_main.o vfio_access.o diff --git a/drivers/vfio/pci/nvidia-vgpu/vfio.h b/drivers/vfio/pci/nvidia-vgpu/vfio.h new file mode 100644 index 000000000000..fa6bbf81552d --- /dev/null +++ b/drivers/vfio/pci/nvidia-vgpu/vfio.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright © 2024 NVIDIA Corporation + */ + +#ifndef _NVIDIA_VGPU_VFIO_H__ +#define _NVIDIA_VGPU_VFIO_H__ + +#include <linux/vfio_pci_core.h> + +#include <nvrm/nvtypes.h> +#include <nvrm/common/sdk/nvidia/inc/ctrl/ctrla081.h> +#include <nvrm/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080vgpumgrinternal.h> + +#include "vgpu_mgr.h" + +#define VGPU_CONFIG_PARAMS_MAX_LENGTH 1024 +#define DEVICE_CLASS_LENGTH 5 +#define PCI_CONFIG_SPACE_LENGTH 4096 + +#define CAP_LIST_NEXT_PTR_MSIX 0x7c +#define MSIX_CAP_SIZE 0xc + +struct nvidia_vgpu_vfio { + struct vfio_pci_core_device core_dev; + u8 config_space[PCI_CONFIG_SPACE_LENGTH]; + + void __iomem *bar0_map; + + u8 **vgpu_types; + NVA081_CTRL_VGPU_INFO *curr_vgpu_type; + u32 num_vgpu_types; + + struct nvidia_vgpu_mgr *vgpu_mgr; + struct nvidia_vgpu *vgpu; +}; + +void nvidia_vgpu_vfio_setup_config(struct nvidia_vgpu_vfio *nvdev); +ssize_t nvidia_vgpu_vfio_access(struct nvidia_vgpu_vfio *nvdev, + char __user *buf, size_t count, + loff_t ppos, bool iswrite); + +#endif /* _NVIDIA_VGPU_VFIO_H__ */ diff --git a/drivers/vfio/pci/nvidia-vgpu/vfio_access.c b/drivers/vfio/pci/nvidia-vgpu/vfio_access.c new file mode 100644 index 000000000000..320c72a07dbe --- /dev/null +++ b/drivers/vfio/pci/nvidia-vgpu/vfio_access.c @@ -0,0 +1,297 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright © 2024 NVIDIA Corporation + */ + +#include <linux/string.h> +#include <linux/pci.h> +#include <linux/pci_regs.h> + +#include "vfio.h" + +void nvidia_vgpu_vfio_setup_config(struct nvidia_vgpu_vfio *nvdev) +{ + u8 *buffer = NULL; + + memset(nvdev->config_space, 0, sizeof(nvdev->config_space)); + + /* Header type 0 (normal devices) */ + *(u16 *)&nvdev->config_space[PCI_VENDOR_ID] = 0x10de; + *(u16 *)&nvdev->config_space[PCI_DEVICE_ID] = + FIELD_GET(GENMASK(31, 16), nvdev->curr_vgpu_type->vdevId); + *(u16 *)&nvdev->config_space[PCI_COMMAND] = 0x0000; + *(u16 *)&nvdev->config_space[PCI_STATUS] = 0x0010; + + buffer = &nvdev->config_space[PCI_CLASS_REVISION]; + pci_read_config_byte(nvdev->core_dev.pdev, PCI_CLASS_REVISION, buffer); + + nvdev->config_space[PCI_CLASS_PROG] = 0; /* VGA-compatible */ + nvdev->config_space[PCI_CLASS_DEVICE] = 0; /* VGA controller */ + nvdev->config_space[PCI_CLASS_DEVICE + 1] = 3; /* display controller */ + + /* BAR0: 32-bit */ + *(u32 *)&nvdev->config_space[PCI_BASE_ADDRESS_0] = 0x00000000; + /* BAR1: 64-bit, prefetchable */ + *(u32 *)&nvdev->config_space[PCI_BASE_ADDRESS_1] = 0x0000000c; + /* BAR2: 64-bit, prefetchable */ + *(u32 *)&nvdev->config_space[PCI_BASE_ADDRESS_3] = 0x0000000c; + /* Disable BAR3: I/O */ + *(u32 *)&nvdev->config_space[PCI_BASE_ADDRESS_5] = 0x00000000; + + *(u16 *)&nvdev->config_space[PCI_SUBSYSTEM_VENDOR_ID] = 0x10de; + *(u16 *)&nvdev->config_space[PCI_SUBSYSTEM_ID] = + FIELD_GET(GENMASK(15, 0), nvdev->curr_vgpu_type->vdevId); + + nvdev->config_space[PCI_CAPABILITY_LIST] = CAP_LIST_NEXT_PTR_MSIX; + nvdev->config_space[CAP_LIST_NEXT_PTR_MSIX + 1] = 0x0; + + /* INTx disabled */ + nvdev->config_space[0x3d] = 0; +} + +static void read_hw_pci_config(struct pci_dev *pdev, char *buf, + size_t count, loff_t offset) +{ + switch (count) { + case 4: + pci_read_config_dword(pdev, offset, (u32 *)buf); + break; + + case 2: + pci_read_config_word(pdev, offset, (u16 *)buf); + break; + + case 1: + pci_read_config_byte(pdev, offset, (u8 *)buf); + break; + default: + WARN_ONCE(1, "Not supported access len\n"); + break; + } +} + +static void write_hw_pci_config(struct pci_dev *pdev, char *buf, + size_t count, loff_t offset) +{ + switch (count) { + case 4: + pci_write_config_dword(pdev, offset, *(u32 *)buf); + break; + + case 2: + pci_write_config_word(pdev, offset, *(u16 *)buf); + break; + + case 1: + pci_write_config_byte(pdev, offset, *(u8 *)buf); + break; + default: + WARN_ONCE(1, "Not supported access len\n"); + break; + } +} + +static void hw_pci_config_rw(struct pci_dev *pdev, char *buf, + size_t count, loff_t offset, + bool is_write) +{ + is_write ? write_hw_pci_config(pdev, buf, count, offset) : + read_hw_pci_config(pdev, buf, count, offset); +} + +static ssize_t bar0_rw(struct nvidia_vgpu_vfio *nvdev, char *buf, + size_t count, loff_t ppos, bool iswrite) +{ + struct pci_dev *pdev = nvdev->core_dev.pdev; + int index = VFIO_PCI_OFFSET_TO_INDEX(ppos); + loff_t offset = ppos; + void __iomem *map; + u32 val; + int ret; + + if (index != VFIO_PCI_BAR0_REGION_INDEX) + return -EINVAL; + + offset &= VFIO_PCI_OFFSET_MASK; + + if (nvdev->bar0_map == NULL) { + ret = pci_request_selected_regions(pdev, 1 << index, "nvidia-vgpu-vfio"); + if (ret) + return ret; + + if (!(pci_resource_flags(pdev, index) & IORESOURCE_MEM)) { + pci_release_selected_regions(pdev, 1 << index); + return -EIO; + } + + map = ioremap(pci_resource_start(pdev, index), pci_resource_len(pdev, index)); + if (!map) { + pci_err(pdev, "Can't map BAR0 MMIO space\n"); + pci_release_selected_regions(pdev, 1 << index); + return -ENOMEM; + } + nvdev->bar0_map = map; + } else + map = nvdev->bar0_map; + + if (!iswrite) { + switch (count) { + case 4: + val = ioread32(map + offset); + break; + case 2: + val = ioread16(map + offset); + break; + case 1: + val = ioread8(map + offset); + break; + } + memcpy(buf, (u8 *)&val, count); + } else { + switch (count) { + case 4: + iowrite32(*(u32 *)buf, map + offset); + break; + case 2: + iowrite16(*(u16 *)buf, map + offset); + break; + case 1: + iowrite8(*(u8 *)buf, map + offset); + break; + } + } + return count; +} + +static ssize_t pci_config_rw(struct nvidia_vgpu_vfio *nvdev, char *buf, + size_t count, loff_t ppos, bool iswrite) +{ + struct pci_dev *pdev = nvdev->core_dev.pdev; + int index = VFIO_PCI_OFFSET_TO_INDEX(ppos); + loff_t offset = ppos; + u32 bar_mask, cfg_addr; + u32 val = 0; + + if (index != VFIO_PCI_CONFIG_REGION_INDEX) + return -EINVAL; + + offset &= VFIO_PCI_OFFSET_MASK; + + if ((offset >= CAP_LIST_NEXT_PTR_MSIX) && (offset < + (CAP_LIST_NEXT_PTR_MSIX + MSIX_CAP_SIZE))) { + hw_pci_config_rw(pdev, buf, count, offset, iswrite); + return count; + } + + if (!iswrite) { + memcpy(buf, (u8 *)&nvdev->config_space[offset], count); + + switch (offset) { + case PCI_COMMAND: + hw_pci_config_rw(pdev, (char *)&val, count, offset, iswrite); + + switch (count) { + case 4: + val = (u32)(val & 0xFFFF0000) | (val & + (PCI_COMMAND_PARITY | PCI_COMMAND_SERR)); + break; + case 2: + val = (val & (PCI_COMMAND_PARITY | PCI_COMMAND_SERR)); + break; + default: + WARN_ONCE(1, "Not supported access len\n"); + break; + } + break; + case PCI_STATUS: + hw_pci_config_rw(pdev, (char *)&val, count, offset, iswrite); + break; + + default: + break; + } + *(u32 *)buf = *(u32 *)buf | val; + } else { + switch (offset) { + case PCI_VENDOR_ID: + case PCI_DEVICE_ID: + case PCI_CAPABILITY_LIST: + break; + + case PCI_STATUS: + hw_pci_config_rw(pdev, buf, count, offset, iswrite); + break; + + case PCI_COMMAND: + if (count == 4) { + val = (u32)((*(u32 *)buf & 0xFFFF0000) >> 16); + hw_pci_config_rw(pdev, (char *)&val, 2, PCI_STATUS, iswrite); + + val = (u32)(*(u32 *)buf & 0x0000FFFF); + *(u32 *)buf = val; + } + + memcpy((u8 *)&nvdev->config_space[offset], buf, count); + break; + + case PCI_BASE_ADDRESS_0: + case PCI_BASE_ADDRESS_1: + case PCI_BASE_ADDRESS_2: + case PCI_BASE_ADDRESS_3: + case PCI_BASE_ADDRESS_4: + cfg_addr = *(u32 *)buf; + + switch (offset) { + case PCI_BASE_ADDRESS_0: + bar_mask = (u32)((~(pci_resource_len(pdev, VFIO_PCI_BAR0_REGION_INDEX)) + 1) & ~0xFul); + cfg_addr = (cfg_addr & bar_mask) | (nvdev->config_space[offset] & 0xFul); + break; + case PCI_BASE_ADDRESS_1: + bar_mask = (u32)((~(nvdev->curr_vgpu_type->bar1Length * 1024 * 1024) + 1) & ~0xFul); + cfg_addr = (cfg_addr & bar_mask) | (nvdev->config_space[offset] & 0xFul); + break; + + case PCI_BASE_ADDRESS_2: + bar_mask = (u32)(((~(nvdev->curr_vgpu_type->bar1Length * 1024 * 1024) + 1) & ~0xFul) >> 32); + cfg_addr = (cfg_addr & bar_mask); + break; + + case PCI_BASE_ADDRESS_3: + bar_mask = (u32)((~(pci_resource_len(pdev, VFIO_PCI_BAR3_REGION_INDEX)) + 1) & ~0xFul); + cfg_addr = (cfg_addr & bar_mask) | (nvdev->config_space[offset] & 0xFul); + break; + + case PCI_BASE_ADDRESS_4: + bar_mask = (u32)(((~(pci_resource_len(pdev, VFIO_PCI_BAR3_REGION_INDEX)) + 1) & ~0xFul) >> 32); + cfg_addr = (cfg_addr & bar_mask); + break; + } + *(u32 *)&nvdev->config_space[offset] = cfg_addr; + break; + default: + break; + + } + } + return count; +} + +ssize_t nvidia_vgpu_vfio_access(struct nvidia_vgpu_vfio *nvdev, char *buf, + size_t count, loff_t ppos, bool iswrite) +{ + int index = VFIO_PCI_OFFSET_TO_INDEX(ppos); + + if (index >= VFIO_PCI_NUM_REGIONS) + return -EINVAL; + + switch (index) { + case VFIO_PCI_CONFIG_REGION_INDEX: + return pci_config_rw(nvdev, buf, count, ppos, + iswrite); + case VFIO_PCI_BAR0_REGION_INDEX: + return bar0_rw(nvdev, buf, count, ppos, iswrite); + default: + return -EINVAL; + } + return count; +} diff --git a/drivers/vfio/pci/nvidia-vgpu/vfio_main.c b/drivers/vfio/pci/nvidia-vgpu/vfio_main.c new file mode 100644 index 000000000000..667ed6fb48f6 --- /dev/null +++ b/drivers/vfio/pci/nvidia-vgpu/vfio_main.c @@ -0,0 +1,511 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright © 2024 NVIDIA Corporation + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/vfio_pci_core.h> +#include <linux/types.h> + +#include "vfio.h" + +static int pdev_to_gfid(struct pci_dev *pdev) +{ + return pci_iov_vf_id(pdev) + 1; +} + +static int destroy_vgpu(struct nvidia_vgpu_vfio *nvdev) +{ + int ret; + + ret = nvidia_vgpu_mgr_destroy_vgpu(nvdev->vgpu); + if (ret) + return ret; + + kfree(nvdev->vgpu); + nvdev->vgpu = NULL; + return 0; +} + +static int create_vgpu(struct nvidia_vgpu_vfio *nvdev) +{ + struct nvidia_vgpu_mgr *vgpu_mgr = nvdev->vgpu_mgr; + struct pci_dev *pdev = nvdev->core_dev.pdev; + struct nvidia_vgpu *vgpu; + int ret; + + vgpu = kzalloc(sizeof(*vgpu), GFP_KERNEL); + if (!vgpu) + return -ENOMEM; + + vgpu->info.id = pci_iov_vf_id(pdev); + vgpu->info.dbdf = (0 << 16) | pci_dev_id(pdev); + vgpu->info.gfid = pdev_to_gfid(pdev); + + vgpu->vgpu_mgr = vgpu_mgr; + vgpu->pdev = pdev; + + ret = nvidia_vgpu_mgr_create_vgpu(vgpu, + (u8 *)nvdev->curr_vgpu_type); + if (ret) { + kfree(vgpu); + return ret; + } + + pr_err("create_vgpu() called\n"); + nvdev->vgpu = vgpu; + return 0; +} + +static inline struct vfio_pci_core_device * +vdev_to_core_dev(struct vfio_device *vdev) +{ + return container_of(vdev, struct vfio_pci_core_device, vdev); +} + +static inline struct nvidia_vgpu_vfio * +core_dev_to_nvdev(struct vfio_pci_core_device *core_dev) +{ + return container_of(core_dev, struct nvidia_vgpu_vfio, core_dev); +} + +static void detach_vgpu_mgr(struct nvidia_vgpu_vfio *nvdev) +{ + nvidia_vgpu_mgr_put(nvdev->vgpu_mgr); + + nvdev->vgpu_mgr = NULL; + nvdev->vgpu_types = NULL; + nvdev->num_vgpu_types = 0; +} + +static int attach_vgpu_mgr(struct nvidia_vgpu_vfio *nvdev, + struct pci_dev *pdev) +{ + struct nvidia_vgpu_mgr *vgpu_mgr; + + vgpu_mgr = nvidia_vgpu_mgr_get(pdev); + if (IS_ERR(vgpu_mgr)) + return PTR_ERR(vgpu_mgr); + + nvdev->vgpu_mgr = vgpu_mgr; + nvdev->vgpu_types = nvdev->vgpu_mgr->vgpu_types; + nvdev->num_vgpu_types = nvdev->vgpu_mgr->num_vgpu_types; + + return 0; +} + +static NVA081_CTRL_VGPU_INFO * +find_vgpu_type(struct nvidia_vgpu_vfio *nvdev, u32 type_id) +{ + NVA081_CTRL_VGPU_INFO *vgpu_type; + u32 i; + + for (i = 0; i < nvdev->num_vgpu_types; i++) { + vgpu_type = (NVA081_CTRL_VGPU_INFO *)nvdev->vgpu_types[i]; + if (vgpu_type->vgpuType == type_id) + return vgpu_type; + } + + return NULL; +} + +static int +nvidia_vgpu_vfio_open_device(struct vfio_device *vdev) +{ + struct vfio_pci_core_device *core_dev = vdev_to_core_dev(vdev); + struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev); + struct pci_dev *pdev = core_dev->pdev; + u64 pf_dma_mask; + int ret; + + if (!nvdev->curr_vgpu_type) + return -ENODEV; + + if (!pdev->physfn) + return -EINVAL; + + ret = create_vgpu(nvdev); + if (ret) + return ret; + + ret = pci_enable_device(pdev); + if (ret) + goto err_enable_device; + + pci_set_master(pdev); + + pf_dma_mask = dma_get_mask(&pdev->physfn->dev); + dma_set_mask(&pdev->dev, pf_dma_mask); + dma_set_coherent_mask(&pdev->dev, pf_dma_mask); + + ret = pci_try_reset_function(pdev); + if (ret) + goto err_reset_function; + + ret = nvidia_vgpu_mgr_enable_bme(nvdev->vgpu); + if (ret) + goto err_enable_bme; + + return 0; + +err_enable_bme: +err_reset_function: + pci_clear_master(pdev); + pci_disable_device(pdev); +err_enable_device: + destroy_vgpu(nvdev); + return ret; +} + +static void +nvidia_vgpu_vfio_close_device(struct vfio_device *vdev) +{ + struct vfio_pci_core_device *core_dev = vdev_to_core_dev(vdev); + struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev); + struct pci_dev *pdev = core_dev->pdev; + + WARN_ON(destroy_vgpu(nvdev)); + + if (nvdev->bar0_map) { + iounmap(nvdev->bar0_map); + pci_release_selected_regions(pdev, 1 << 0); + nvdev->bar0_map = NULL; + } + + pci_clear_master(pdev); + pci_disable_device(pdev); +} + +static int +get_region_info(struct vfio_pci_core_device *core_dev, unsigned long arg) +{ + struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev); + struct pci_dev *pdev = core_dev->pdev; + struct vfio_region_info info; + unsigned long minsz; + int ret = 0; + + minsz = offsetofend(struct vfio_region_info, offset); + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EINVAL; + + if (info.argsz < minsz) + return -EINVAL; + + switch (info.index) { + case VFIO_PCI_CONFIG_REGION_INDEX: + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + info.size = PCI_CONFIG_SPACE_LENGTH; + info.flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE; + break; + + case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR4_REGION_INDEX: + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; + + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + info.size = pci_resource_len(pdev, info.index); + + if (info.index == VFIO_PCI_BAR1_REGION_INDEX) + info.size = nvdev->curr_vgpu_type->bar1Length * 1024 * 1024; + + if (!info.size) { + info.flags = 0; + break; + } + info.flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE | + VFIO_REGION_INFO_FLAG_MMAP; + + if (caps.size) { + info.flags |= VFIO_REGION_INFO_FLAG_CAPS; + if (info.argsz < sizeof(info) + caps.size) { + info.argsz = sizeof(info) + caps.size; + info.cap_offset = 0; + } else { + vfio_info_cap_shift(&caps, sizeof(info)); + if (copy_to_user((void __user *)arg + + sizeof(info), caps.buf, + caps.size)) { + kfree(caps.buf); + ret = -EFAULT; + break; + } + info.cap_offset = sizeof(info); + } + kfree(caps.buf); + } + break; + case VFIO_PCI_BAR5_REGION_INDEX: + case VFIO_PCI_ROM_REGION_INDEX: + case VFIO_PCI_VGA_REGION_INDEX: + info.size = 0; + break; + + default: + if (info.index >= VFIO_PCI_NUM_REGIONS) + ret = -EINVAL; + break; + } + + if (!ret) + ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; + + return ret; +} + +static long nvidia_vgpu_vfio_ioctl(struct vfio_device *vdev, + unsigned int cmd, + unsigned long arg) +{ + struct vfio_pci_core_device *core_dev = vdev_to_core_dev(vdev); + struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev); + int ret = 0; + + if (!nvdev->curr_vgpu_type) + return -ENODEV; + + switch (cmd) { + case VFIO_DEVICE_GET_REGION_INFO: + ret = get_region_info(core_dev, arg); + break; + case VFIO_DEVICE_GET_PCI_HOT_RESET_INFO: + case VFIO_DEVICE_PCI_HOT_RESET: + case VFIO_DEVICE_RESET: + break; + + default: + ret = vfio_pci_core_ioctl(vdev, cmd, arg); + break; + } + + return ret; +} + +static ssize_t nvidia_vgpu_vfio_read(struct vfio_device *vdev, + char __user *buf, size_t count, + loff_t *ppos) +{ + struct vfio_pci_core_device *core_dev = vdev_to_core_dev(vdev); + struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev); + u64 val; + size_t done = 0; + int ret = 0, size; + + if (!nvdev->curr_vgpu_type) + return -ENODEV; + + while (count) { + if (count >= 4 && !(*ppos % 4)) + size = 4; + else if (count >= 2 && !(*ppos % 2)) + size = 2; + else + size = 1; + + ret = nvidia_vgpu_vfio_access(nvdev, (char *)&val, size, *ppos, false); + + if (ret <= 0) + return ret; + + if (copy_to_user(buf, &val, size) != 0) + return -EFAULT; + + *ppos += size; + buf += size; + count -= size; + done += size; + } + + return done; +} + +static ssize_t nvidia_vgpu_vfio_write(struct vfio_device *vdev, + const char __user *buf, size_t count, + loff_t *ppos) +{ + struct vfio_pci_core_device *core_dev = vdev_to_core_dev(vdev); + struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev); + u64 val; + size_t done = 0; + int ret = 0, size; + + if (!nvdev->curr_vgpu_type) + return -ENODEV; + + while (count) { + if (count >= 4 && !(*ppos % 4)) + size = 4; + else if (count >= 2 && !(*ppos % 2)) + size = 2; + else + size = 1; + + if (copy_from_user(&val, buf, size) != 0) + return -EFAULT; + + ret = nvidia_vgpu_vfio_access(nvdev, (char *)&val, size, *ppos, true); + + if (ret <= 0) + return ret; + + *ppos += size; + buf += size; + count -= size; + done += size; + } + + return done; +} + +static int nvidia_vgpu_vfio_mmap(struct vfio_device *vdev, + struct vm_area_struct *vma) +{ + struct vfio_pci_core_device *core_dev = vdev_to_core_dev(vdev); + struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev); + struct pci_dev *pdev = core_dev->pdev; + u64 phys_len, req_len, pgoff, req_start; + unsigned int index; + + if (!nvdev->curr_vgpu_type) + return -ENODEV; + + index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); + + if (index >= VFIO_PCI_BAR5_REGION_INDEX) + return -EINVAL; + if (vma->vm_end < vma->vm_start) + return -EINVAL; + if ((vma->vm_flags & VM_SHARED) == 0) + return -EINVAL; + + phys_len = PAGE_ALIGN(pci_resource_len(pdev, index)); + req_len = vma->vm_end - vma->vm_start; + pgoff = vma->vm_pgoff & + ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + req_start = pgoff << PAGE_SHIFT; + + if (req_len == 0) + return -EINVAL; + + if ((req_start + req_len > phys_len) || (phys_len == 0)) + return -EINVAL; + + vma->vm_private_data = vdev; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff; + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); + + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, req_len, vma->vm_page_prot); +} + +static const struct vfio_device_ops nvidia_vgpu_vfio_ops = { + .name = "nvidia-vgpu-vfio-pci", + .init = vfio_pci_core_init_dev, + .release = vfio_pci_core_release_dev, + .open_device = nvidia_vgpu_vfio_open_device, + .close_device = nvidia_vgpu_vfio_close_device, + .ioctl = nvidia_vgpu_vfio_ioctl, + .device_feature = vfio_pci_core_ioctl_feature, + .read = nvidia_vgpu_vfio_read, + .write = nvidia_vgpu_vfio_write, + .mmap = nvidia_vgpu_vfio_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, +}; + +static int setup_vgpu_type(struct nvidia_vgpu_vfio *nvdev) +{ + nvdev->curr_vgpu_type = find_vgpu_type(nvdev, 869); + if (!nvdev->curr_vgpu_type) + return -ENODEV; + return 0; +} + +static int nvidia_vgpu_vfio_probe(struct pci_dev *pdev, + const struct pci_device_id *id_table) +{ + struct nvidia_vgpu_vfio *nvdev; + int ret; + + if (!pdev->is_virtfn) + return -EINVAL; + + nvdev = vfio_alloc_device(nvidia_vgpu_vfio, core_dev.vdev, + &pdev->dev, &nvidia_vgpu_vfio_ops); + if (IS_ERR(nvdev)) + return PTR_ERR(nvdev); + + ret = attach_vgpu_mgr(nvdev, pdev); + if (ret) + goto err_attach_vgpu_mgr; + + ret = setup_vgpu_type(nvdev); + if (ret) + goto err_setup_vgpu_type; + + nvidia_vgpu_vfio_setup_config(nvdev); + + dev_set_drvdata(&pdev->dev, &nvdev->core_dev); + + ret = vfio_pci_core_register_device(&nvdev->core_dev); + if (ret) + goto err_setup_vgpu_type; + + return 0; + +err_setup_vgpu_type: + detach_vgpu_mgr(nvdev); + +err_attach_vgpu_mgr: + vfio_put_device(&nvdev->core_dev.vdev); + + pci_err(pdev, "VF probe failed with ret: %d\n", ret); + return ret; +} + +static void nvidia_vgpu_vfio_remove(struct pci_dev *pdev) +{ + struct vfio_pci_core_device *core_dev = dev_get_drvdata(&pdev->dev); + struct nvidia_vgpu_vfio *nvdev = core_dev_to_nvdev(core_dev); + + vfio_pci_core_unregister_device(core_dev); + detach_vgpu_mgr(nvdev); + vfio_put_device(&core_dev->vdev); +} + +struct pci_device_id nvidia_vgpu_vfio_table[] = { + { + .vendor = PCI_VENDOR_ID_NVIDIA, + .device = PCI_ANY_ID, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .class = (PCI_CLASS_DISPLAY_3D << 8), + .class_mask = ~0, + }, + { } +}; +MODULE_DEVICE_TABLE(pci, nvidia_vgpu_vfio_table); + +struct pci_driver nvidia_vgpu_vfio_driver = { + .name = "nvidia-vgpu-vfio", + .id_table = nvidia_vgpu_vfio_table, + .probe = nvidia_vgpu_vfio_probe, + .remove = nvidia_vgpu_vfio_remove, + .driver_managed_dma = true, +}; + +module_pci_driver(nvidia_vgpu_vfio_driver); + +MODULE_LICENSE("Dual MIT/GPL"); +MODULE_AUTHOR("Vinay Kabra <vkabra@nvidia.com>"); +MODULE_AUTHOR("Kirti Wankhede <kwankhede@nvidia.com>"); +MODULE_AUTHOR("Zhi Wang <zhiw@nvidia.com>"); +MODULE_DESCRIPTION("NVIDIA vGPU VFIO Variant Driver - User Level driver for NVIDIA vGPU"); diff --git a/drivers/vfio/pci/nvidia-vgpu/vgpu.c b/drivers/vfio/pci/nvidia-vgpu/vgpu.c index 93d27db30a41..003ca116b4a8 100644 --- a/drivers/vfio/pci/nvidia-vgpu/vgpu.c +++ b/drivers/vfio/pci/nvidia-vgpu/vgpu.c @@ -328,3 +328,25 @@ int nvidia_vgpu_mgr_create_vgpu(struct nvidia_vgpu *vgpu, u8 *vgpu_type) return ret; } EXPORT_SYMBOL(nvidia_vgpu_mgr_create_vgpu); + +static int update_bme_state(struct nvidia_vgpu *vgpu) +{ + NV_VGPU_CPU_RPC_DATA_UPDATE_BME_STATE params = {0}; + + params.enable = true; + + return nvidia_vgpu_rpc_call(vgpu, NV_VGPU_CPU_RPC_MSG_UPDATE_BME_STATE, + &params, sizeof(params)); +} + +/** + * nvidia_vgpu_enable_bme - handle BME sequence + * @vf: the vGPU instance + * + * Returns: 0 on success, others on failure. + */ +int nvidia_vgpu_mgr_enable_bme(struct nvidia_vgpu *vgpu) +{ + return update_bme_state(vgpu); +} +EXPORT_SYMBOL(nvidia_vgpu_mgr_enable_bme); diff --git a/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h b/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h index af922d8e539c..2c9e0eebcb99 100644 --- a/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h +++ b/drivers/vfio/pci/nvidia-vgpu/vgpu_mgr.h @@ -84,6 +84,6 @@ int nvidia_vgpu_rpc_call(struct nvidia_vgpu *vgpu, u32 msg_type, void nvidia_vgpu_clean_rpc(struct nvidia_vgpu *vgpu); int nvidia_vgpu_setup_rpc(struct nvidia_vgpu *vgpu); -int nvidia_vgpu_mgr_reset_vgpu(struct nvidia_vgpu *vgpu); +int nvidia_vgpu_mgr_enable_bme(struct nvidia_vgpu *vgpu); #endif

[RFC,29/29] vfio/vgpu_mgr: introduce NVIDIA vGPU VFIO variant driver

Commit Message

Patch