@@ -17,9 +17,6 @@
#define RESMEM_REGION_INDEX VFIO_PCI_BAR2_REGION_INDEX
#define USEMEM_REGION_INDEX VFIO_PCI_BAR4_REGION_INDEX
-/* Memory size expected as non cached and reserved by the VM driver */
-#define RESMEM_SIZE SZ_1G
-
/* A hardwired and constant ABI value between the GPU FW and VFIO driver. */
#define MEMBLK_SIZE SZ_512M
@@ -72,7 +69,7 @@ nvgrace_gpu_memregion(int index,
if (index == USEMEM_REGION_INDEX)
return &nvdev->usemem;
- if (index == RESMEM_REGION_INDEX)
+ if (nvdev->resmem.memlength && index == RESMEM_REGION_INDEX)
return &nvdev->resmem;
return NULL;
@@ -757,21 +754,31 @@ nvgrace_gpu_init_nvdev_struct(struct pci_dev *pdev,
u64 memphys, u64 memlength)
{
int ret = 0;
+ u64 resmem_size = 0;
/*
- * The VM GPU device driver needs a non-cacheable region to support
- * the MIG feature. Since the device memory is mapped as NORMAL cached,
- * carve out a region from the end with a different NORMAL_NC
- * property (called as reserved memory and represented as resmem). This
- * region then is exposed as a 64b BAR (region 2 and 3) to the VM, while
- * exposing the rest (termed as usable memory and represented using usemem)
- * as cacheable 64b BAR (region 4 and 5).
+ * On Grace Hopper systems, the VM GPU device driver needs a non-cacheable
+ * region to support the MIG feature owing to a hardware bug. Since the
+ * device memory is mapped as NORMAL cached, carve out a region from the end
+ * with a different NORMAL_NC property (called as reserved memory and
+ * represented as resmem). This region then is exposed as a 64b BAR
+ * (region 2 and 3) to the VM, while exposing the rest (termed as usable
+ * memory and represented using usemem) as cacheable 64b BAR (region 4 and 5).
*
* devmem (memlength)
* |-------------------------------------------------|
* | |
* usemem.memphys resmem.memphys
+ *
+ * This hardware bug is fixed on the Grace Blackwell platforms and the
+ * presence of fix can be determined through nvdev->has_mig_hw_bug_fix.
+ * Thus on systems with the hardware fix, there is no need to partition
+ * the GPU device memory and the entire memory is usable and mapped as
+ * NORMAL cached.
*/
+ if (!nvdev->has_mig_hw_bug_fix)
+ resmem_size = SZ_1G;
+
nvdev->usemem.memphys = memphys;
/*
@@ -780,23 +787,31 @@ nvgrace_gpu_init_nvdev_struct(struct pci_dev *pdev,
* memory (usemem) is added to the kernel for usage by the VM
* workloads. Make the usable memory size memblock aligned.
*/
- if (check_sub_overflow(memlength, RESMEM_SIZE,
+ if (check_sub_overflow(memlength, resmem_size,
&nvdev->usemem.memlength)) {
ret = -EOVERFLOW;
goto done;
}
- /*
- * The USEMEM part of the device memory has to be MEMBLK_SIZE
- * aligned. This is a hardwired ABI value between the GPU FW and
- * VFIO driver. The VM device driver is also aware of it and make
- * use of the value for its calculation to determine USEMEM size.
- */
- nvdev->usemem.memlength = round_down(nvdev->usemem.memlength,
- MEMBLK_SIZE);
- if (nvdev->usemem.memlength == 0) {
- ret = -EINVAL;
- goto done;
+ if (!nvdev->has_mig_hw_bug_fix) {
+ /*
+ * If the device memory is split to workaround the MIG bug,
+ * the USEMEM part of the device memory has to be MEMBLK_SIZE
+ * aligned. This is a hardwired ABI value between the GPU FW and
+ * VFIO driver. The VM device driver is also aware of it and make
+ * use of the value for its calculation to determine USEMEM size.
+ * Note that the device memory may not be 512M aligned.
+ *
+ * If the hardware has the fix for MIG, there is no requirement
+ * for splitting the device memory to create RESMEM. The entire
+ * device memory is usable and will be USEMEM.
+ */
+ nvdev->usemem.memlength = round_down(nvdev->usemem.memlength,
+ MEMBLK_SIZE);
+ if (nvdev->usemem.memlength == 0) {
+ ret = -EINVAL;
+ goto done;
+ }
}
if ((check_add_overflow(nvdev->usemem.memphys,
@@ -813,7 +828,10 @@ nvgrace_gpu_init_nvdev_struct(struct pci_dev *pdev,
* the BAR size for them.
*/
nvdev->usemem.bar_size = roundup_pow_of_two(nvdev->usemem.memlength);
- nvdev->resmem.bar_size = roundup_pow_of_two(nvdev->resmem.memlength);
+
+ if (nvdev->resmem.memlength)
+ nvdev->resmem.bar_size =
+ roundup_pow_of_two(nvdev->resmem.memlength);
done:
return ret;
}