@@ -24,6 +24,7 @@
#include <linux/gfp.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
+#include <linux/property.h>
#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/stddef.h>
@@ -635,6 +636,7 @@ bool
pvr_device_overrides_validate(struct pvr_device *pvr_dev,
const struct pvr_device_overrides *overrides)
{
+ struct drm_device *drm_dev = from_pvr_device(pvr_dev);
bool ret = true;
/*
@@ -643,7 +645,14 @@ pvr_device_overrides_validate(struct pvr_device *pvr_dev,
*
* Note that this function may be called early during device initialization
* so it should not be assumed that @pvr_dev is ready for normal use yet.
- */
+ */
+
+ if (overrides->device_memory_force_cpu_cached &&
+ device_get_dma_attr(drm_dev->dev) != DEV_DMA_COHERENT) {
+ drm_err(drm_dev,
+ "Specifying device_memory_force_cpu_cached override without dma-coherent attribute is unsupported.");
+ ret = false;
+ }
return ret;
}
@@ -60,8 +60,19 @@ struct pvr_fw_version {
/**
* struct pvr_device_overrides - Hardware-level overrides loaded from
* MODULE_DEVICE_TABLE() or similar.
+ *
+ * @device_memory_force_cpu_cached: By default, all device memory buffer objects
+ * are mapped write-combined on the CPU (see %PVR_BO_CPU_CACHED) including MMU
+ * page table backing pages which do not use the regular device memory objects.
+ * This override forces all CPU mappings to be mapped cached instead. Since this
+ * could require additional cache maintenance operations to be performed,
+ * pvr_device_overrides_validate() ensures that the dma-coherent attribute is
+ * set when this override is specified. Required on some TI platforms where a
+ * bug causes device-to-cpu cache snooping to behave incorrectly when
+ * interacting with cpu-uncached memory.
*/
struct pvr_device_overrides {
+ bool device_memory_force_cpu_cached;
};
/**
@@ -1490,7 +1490,7 @@ static void pvr_remove(struct platform_device *plat_dev)
pvr_power_domains_fini(pvr_dev);
}
-static const struct pvr_device_overrides pvr_device_overrides_default = {};
+static const struct pvr_device_overrides pvr_device_overrides_default = { 0 };
/*
* Always specify &pvr_device_overrides_default instead of %NULL for &struct of_device_id->data so
@@ -345,6 +345,9 @@ pvr_gem_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags)
if (size == 0 || !pvr_gem_object_flags_validate(flags))
return ERR_PTR(-EINVAL);
+ if (PVR_HAS_OVERRIDE(pvr_dev, device_memory_force_cpu_cached))
+ flags |= PVR_BO_CPU_CACHED;
+
shmem_obj = drm_gem_shmem_create(from_pvr_device(pvr_dev), size);
if (IS_ERR(shmem_obj))
return ERR_CAST(shmem_obj);
@@ -44,8 +44,11 @@ struct pvr_file;
* Bits not defined anywhere are "undefined".
*
* CPU mapping options
- * :PVR_BO_CPU_CACHED: By default, all GEM objects are mapped write-combined on the CPU. Set this
- * flag to override this behaviour and map the object cached.
+ * :PVR_BO_CPU_CACHED: By default, all GEM objects are mapped write-combined on the CPU. Set
+ * this flag to override this behaviour and map the object cached. If
+ * &struct pvr_device_overrides->device_memory_force_cpu_cached is specified, all allocations
+ * will be mapped as if this flag was set. This does not require any additional consideration
+ * at allocation time since the override is only valid if the dma-coherent attribute is set.
*
* Firmware options
* :PVR_BO_FW_NO_CLEAR_ON_RESET: By default, all FW objects are cleared and reinitialised on hard
@@ -259,6 +259,7 @@ pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page,
struct device *dev = from_pvr_device(pvr_dev)->dev;
struct page *raw_page;
+ pgprot_t prot;
int err;
dma_addr_t dma_addr;
@@ -268,7 +269,11 @@ pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page,
if (!raw_page)
return -ENOMEM;
- host_ptr = vmap(&raw_page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+ prot = PAGE_KERNEL;
+ if (!PVR_HAS_OVERRIDE(pvr_dev, device_memory_force_cpu_cached))
+ prot = pgprot_writecombine(prot);
+
+ host_ptr = vmap(&raw_page, 1, VM_MAP, prot);
if (!host_ptr) {
err = -ENOMEM;
goto err_free_page;
The TI k3-j721s2 platform has a bug relating to cache snooping on the AXI ACE-Lite interface. Disabling cache snooping altogether would also resolve the issue, but is considered more of a performance hit. Given the platform is dma-coherent, forcing all device-accessible memory allocations through the CPU cache is the preferred solution. Implement this workaround so that it can later be enabled for the TI k3-j721s2 platform. Signed-off-by: Matt Coster <matt.coster@imgtec.com> --- Changes in v2: - None - Link to v1: https://lore.kernel.org/r/20241105-sets-bxs-4-64-patch-v1-v1-19-4ed30e865892@imgtec.com --- drivers/gpu/drm/imagination/pvr_device.c | 11 ++++++++++- drivers/gpu/drm/imagination/pvr_device.h | 11 +++++++++++ drivers/gpu/drm/imagination/pvr_drv.c | 2 +- drivers/gpu/drm/imagination/pvr_gem.c | 3 +++ drivers/gpu/drm/imagination/pvr_gem.h | 7 +++++-- drivers/gpu/drm/imagination/pvr_mmu.c | 7 ++++++- 6 files changed, 36 insertions(+), 5 deletions(-)