@@ -23,6 +23,11 @@
/* A hardwired and constant ABI value between the GPU FW and VFIO driver. */
#define MEMBLK_SIZE SZ_512M
+#define DVSEC_BITMAP_OFFSET 0xA
+#define MIG_SUPPORTED_WITH_CACHED_RESMEM BIT(0)
+
+#define GPU_CAP_DVSEC_REGISTER 3
+
/*
* The state of the two device memory region - resmem and usemem - is
* saved as struct mem_region.
@@ -46,6 +51,7 @@ struct nvgrace_gpu_pci_core_device {
struct mem_region resmem;
/* Lock to control device memory kernel mapping */
struct mutex remap_lock;
+ bool has_mig_hw_bug_fix;
};
static void nvgrace_gpu_init_fake_bar_emu_regs(struct vfio_device *core_vdev)
@@ -812,6 +818,26 @@ nvgrace_gpu_init_nvdev_struct(struct pci_dev *pdev,
return ret;
}
+static bool nvgrace_gpu_has_mig_hw_bug_fix(struct pci_dev *pdev)
+{
+ int pcie_dvsec;
+ u16 dvsec_ctrl16;
+
+ pcie_dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_NVIDIA,
+ GPU_CAP_DVSEC_REGISTER);
+
+ if (pcie_dvsec) {
+ pci_read_config_word(pdev,
+ pcie_dvsec + DVSEC_BITMAP_OFFSET,
+ &dvsec_ctrl16);
+
+ if (dvsec_ctrl16 & MIG_SUPPORTED_WITH_CACHED_RESMEM)
+ return true;
+ }
+
+ return false;
+}
+
static int nvgrace_gpu_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
@@ -832,6 +858,8 @@ static int nvgrace_gpu_probe(struct pci_dev *pdev,
dev_set_drvdata(&pdev->dev, &nvdev->core_device);
if (ops == &nvgrace_gpu_pci_ops) {
+ nvdev->has_mig_hw_bug_fix = nvgrace_gpu_has_mig_hw_bug_fix(pdev);
+
/*
* Device memory properties are identified in the host ACPI
* table. Set the nvgrace_gpu_pci_core_device structure.
@@ -868,6 +896,8 @@ static const struct pci_device_id nvgrace_gpu_vfio_pci_table[] = {
{ PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2345) },
/* GH200 SKU */
{ PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2348) },
+ /* GB200 SKU */
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2941) },
{}
};