@@ -8,7 +8,9 @@
#include "a6xx_gpu.h"
#include "a6xx_gmu.xml.h"
+#include <linux/bitfield.h>
#include <linux/devfreq.h>
+#include <linux/soc/qcom/llcc-qcom.h>
#define GPU_PAS_ID 13
@@ -1022,6 +1024,79 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
return IRQ_HANDLED;
}
+static void a6xx_llc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 or)
+{
+ return msm_rmw(a6xx_gpu->llc_mmio + (reg << 2), mask, or);
+}
+
+static void a6xx_llc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value)
+{
+ return msm_writel(value, a6xx_gpu->llc_mmio + (reg << 2));
+}
+
+static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
+{
+ llcc_slice_deactivate(a6xx_gpu->llc_slice);
+ llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
+}
+
+static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
+{
+ u32 cntl1_regval = 0;
+
+ if (IS_ERR(a6xx_gpu->llc_mmio))
+ return;
+
+ if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
+ u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
+
+ gpu_scid &= 0x1f;
+ cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
+ (gpu_scid << 15) | (gpu_scid << 20);
+ }
+
+ if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
+ u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
+
+ gpuhtw_scid &= 0x1f;
+ cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
+ }
+
+ if (cntl1_regval) {
+ /*
+ * Program the slice IDs for the various GPU blocks and GPU MMU
+ * pagetables
+ */
+ a6xx_llc_write(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
+
+ /*
+ * Program cacheability overrides to not allocate cache lines on
+ * a write miss
+ */
+ a6xx_llc_rmw(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
+ }
+}
+
+static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
+{
+ llcc_slice_putd(a6xx_gpu->llc_slice);
+ llcc_slice_putd(a6xx_gpu->htw_llc_slice);
+}
+
+static void a6xx_llc_slices_init(struct platform_device *pdev,
+ struct a6xx_gpu *a6xx_gpu)
+{
+ a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
+ if (IS_ERR(a6xx_gpu->llc_mmio))
+ return;
+
+ a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
+ a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
+
+ if (IS_ERR(a6xx_gpu->llc_slice) && IS_ERR(a6xx_gpu->htw_llc_slice))
+ a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
+}
+
static int a6xx_pm_resume(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -1038,6 +1113,8 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)
msm_gpu_resume_devfreq(gpu);
+ a6xx_llc_activate(a6xx_gpu);
+
return 0;
}
@@ -1048,6 +1125,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
trace_msm_gpu_suspend(0);
+ a6xx_llc_deactivate(a6xx_gpu);
+
devfreq_suspend_device(gpu->devfreq.devfreq);
return a6xx_gmu_stop(a6xx_gpu);
@@ -1091,6 +1170,8 @@ static void a6xx_destroy(struct msm_gpu *gpu)
drm_gem_object_put(a6xx_gpu->shadow_bo);
}
+ a6xx_llc_slices_destroy(a6xx_gpu);
+
a6xx_gmu_remove(a6xx_gpu);
adreno_gpu_cleanup(adreno_gpu);
@@ -1209,6 +1290,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
if (info && info->revn == 650)
adreno_gpu->base.hw_apriv = true;
+ a6xx_llc_slices_init(pdev, a6xx_gpu);
+
ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
if (ret) {
a6xx_destroy(&(a6xx_gpu->base.base));
@@ -28,6 +28,10 @@ struct a6xx_gpu {
uint32_t *shadow;
bool has_whereami;
+
+ void __iomem *llc_mmio;
+ void *llc_slice;
+ void *htw_llc_slice;
};
#define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base)
@@ -16,6 +16,7 @@
#include <linux/soc/qcom/mdt_loader.h>
#include <soc/qcom/ocmem.h>
#include "adreno_gpu.h"
+#include "a6xx_gpu.h"
#include "msm_gem.h"
#include "msm_mmu.h"
@@ -189,6 +190,9 @@ struct msm_gem_address_space *
adreno_iommu_create_address_space(struct msm_gpu *gpu,
struct platform_device *pdev)
{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+ struct domain_attr_io_pgtbl_cfg pgtbl_cfg;
struct iommu_domain *iommu;
struct msm_mmu *mmu;
struct msm_gem_address_space *aspace;
@@ -198,7 +202,20 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu,
if (!iommu)
return NULL;
+ /*
+ * This allows GPU to set the bus attributes required to use system
+ * cache on behalf of the iommu page table walker.
+ */
+ if (!IS_ERR(a6xx_gpu->htw_llc_slice)) {
+ pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
+ iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg);
+ }
+
mmu = msm_iommu_new(&pdev->dev, iommu);
+ if (IS_ERR(mmu)) {
+ iommu_domain_free(iommu);
+ return ERR_CAST(mmu);
+ }
/*
* Use the aperture start or SZ_16M, whichever is greater. This will