@@ -17,9 +17,6 @@
#define RESMEM_REGION_INDEX VFIO_PCI_BAR2_REGION_INDEX
#define USEMEM_REGION_INDEX VFIO_PCI_BAR4_REGION_INDEX
-/* Memory size expected as non cached and reserved by the VM driver */
-#define RESMEM_SIZE SZ_1G
-
/* A hardwired and constant ABI value between the GPU FW and VFIO driver. */
#define MEMBLK_SIZE SZ_512M
@@ -72,7 +69,7 @@ nvgrace_gpu_memregion(int index,
if (index == USEMEM_REGION_INDEX)
return &nvdev->usemem;
- if (index == RESMEM_REGION_INDEX)
+ if (nvdev->resmem.memlength && index == RESMEM_REGION_INDEX)
return &nvdev->resmem;
return NULL;
@@ -757,40 +754,67 @@ nvgrace_gpu_init_nvdev_struct(struct pci_dev *pdev,
u64 memphys, u64 memlength)
{
int ret = 0;
+ u64 resmem_size = 0;
/*
- * The VM GPU device driver needs a non-cacheable region to support
- * the MIG feature. Since the device memory is mapped as NORMAL cached,
- * carve out a region from the end with a different NORMAL_NC
- * property (called as reserved memory and represented as resmem). This
- * region then is exposed as a 64b BAR (region 2 and 3) to the VM, while
- * exposing the rest (termed as usable memory and represented using usemem)
- * as cacheable 64b BAR (region 4 and 5).
+ * On Grace Hopper systems, the VM GPU device driver needs a non-cacheable
+ * region to support the MIG feature owing to a hardware bug. Since the
+ * device memory is mapped as NORMAL cached, carve out a region from the end
+ * with a different NORMAL_NC property (called as reserved memory and
+ * represented as resmem). This region then is exposed as a 64b BAR
+ * (region 2 and 3) to the VM, while exposing the rest (termed as usable
+ * memory and represented using usemem) as cacheable 64b BAR (region 4 and 5).
*
* devmem (memlength)
* |-------------------------------------------------|
* | |
* usemem.memphys resmem.memphys
+ *
+ * This hardware bug is fixed on the Grace Blackwell platforms and the
+ * presence of the bug can be determined through nvdev->has_mig_hw_bug.
+ * Thus on systems with the hardware fix, there is no need to partition
+ * the GPU device memory and the entire memory is usable and mapped as
+ * NORMAL cached (i.e. resmem size is 0).
*/
+ if (nvdev->has_mig_hw_bug)
+ resmem_size = SZ_1G;
+
nvdev->usemem.memphys = memphys;
/*
* The device memory exposed to the VM is added to the kernel by the
- * VM driver module in chunks of memory block size. Only the usable
- * memory (usemem) is added to the kernel for usage by the VM
- * workloads. Make the usable memory size memblock aligned.
+ * VM driver module in chunks of memory block size. Note that only the
+ * usable memory (usemem) is added to the kernel for usage by the VM
+ * workloads.
*/
- if (check_sub_overflow(memlength, RESMEM_SIZE,
+ if (check_sub_overflow(memlength, resmem_size,
&nvdev->usemem.memlength)) {
ret = -EOVERFLOW;
goto done;
}
/*
- * The USEMEM part of the device memory has to be MEMBLK_SIZE
- * aligned. This is a hardwired ABI value between the GPU FW and
- * VFIO driver. The VM device driver is also aware of it and make
- * use of the value for its calculation to determine USEMEM size.
+ * The usemem region is exposed as a 64B Bar composed of region 4 and 5.
+ * Calculate and save the BAR size for the region.
+ */
+ nvdev->usemem.bar_size = roundup_pow_of_two(nvdev->usemem.memlength);
+
+ /*
+ * If the hardware has the fix for MIG, there is no requirement
+ * for splitting the device memory to create RESMEM. The entire
+ * device memory is usable and will be USEMEM. Return here for
+ * such case.
+ */
+ if (!nvdev->has_mig_hw_bug)
+ goto done;
+
+ /*
+ * When the device memory is split to workaround the MIG bug on
+ * Grace Hopper, the USEMEM part of the device memory has to be
+ * MEMBLK_SIZE aligned. This is a hardwired ABI value between the
+ * GPU FW and VFIO driver. The VM device driver is also aware of it
+ * and make use of the value for its calculation to determine USEMEM
+ * size. Note that the device memory may not be 512M aligned.
*/
nvdev->usemem.memlength = round_down(nvdev->usemem.memlength,
MEMBLK_SIZE);
@@ -809,10 +833,9 @@ nvgrace_gpu_init_nvdev_struct(struct pci_dev *pdev,
}
/*
- * The memory regions are exposed as BARs. Calculate and save
- * the BAR size for them.
+ * The resmem region is exposed as a 64b BAR composed of region 2 and 3
+ * for Grace Hopper. Calculate and save the BAR size for the region.
*/
- nvdev->usemem.bar_size = roundup_pow_of_two(nvdev->usemem.memlength);
nvdev->resmem.bar_size = roundup_pow_of_two(nvdev->resmem.memlength);
done:
return ret;