diff mbox series

drm/malidp: Enable MMU prefetch on Mali-DP650

Message ID 20180820125029.5366-1-alexandru-cosmin.gheorghe@arm.com (mailing list archive)
State New, archived
Headers show
Series drm/malidp: Enable MMU prefetch on Mali-DP650 | expand

Commit Message

Alexandru-Cosmin Gheorghe Aug. 20, 2018, 12:50 p.m. UTC
From: Jamie Fox <jamie.fox@arm.com>

Mali-DP650 supports warming up the SMMU translations, by sending
requsts to the SMMU before a buffer is read.

There are two modes supported:

- PARTIAL: could be enabled when the buffer is composed of 4K or 64K
  pages, the display hardware will send a configurable number of
  requests before the actual reading.

- FULL: could be enabled when the buffer is composed of 1M or 2M
  pages, the display hardware will send requests before reading for
  all pages composing the buffer.

This patch adds a mechanism for detecting the page size and set the
MMU prefetch mode if possible.

Signed-off-by: Jamie Fox <jamie.fox@arm.com>
Signed-off-by: Alexandru Gheorghe <alexandru-cosmin.gheorghe@arm.com>
---
 drivers/gpu/drm/arm/malidp_drv.h    |   9 ++
 drivers/gpu/drm/arm/malidp_hw.c     |  17 +-
 drivers/gpu/drm/arm/malidp_hw.h     |   1 +
 drivers/gpu/drm/arm/malidp_planes.c | 233 ++++++++++++++++++++++++++++
 drivers/gpu/drm/arm/malidp_regs.h   |  11 ++
 5 files changed, 269 insertions(+), 2 deletions(-)

Comments

Liviu Dudau Sept. 21, 2018, 12:23 p.m. UTC | #1
On Mon, Aug 20, 2018 at 01:50:29PM +0100, Alexandru Gheorghe wrote:
> From: Jamie Fox <jamie.fox@arm.com>
> 
> Mali-DP650 supports warming up the SMMU translations, by sending
> requsts to the SMMU before a buffer is read.
> 
> There are two modes supported:
> 
> - PARTIAL: could be enabled when the buffer is composed of 4K or 64K
>   pages, the display hardware will send a configurable number of
>   requests before the actual reading.
> 
> - FULL: could be enabled when the buffer is composed of 1M or 2M
>   pages, the display hardware will send requests before reading for
>   all pages composing the buffer.
> 
> This patch adds a mechanism for detecting the page size and set the
> MMU prefetch mode if possible.
> 
> Signed-off-by: Jamie Fox <jamie.fox@arm.com>
> Signed-off-by: Alexandru Gheorghe <alexandru-cosmin.gheorghe@arm.com>

Acked-by: Liviu Dudau <liviu.dudau@arm.com>

> ---
>  drivers/gpu/drm/arm/malidp_drv.h    |   9 ++
>  drivers/gpu/drm/arm/malidp_hw.c     |  17 +-
>  drivers/gpu/drm/arm/malidp_hw.h     |   1 +
>  drivers/gpu/drm/arm/malidp_planes.c | 233 ++++++++++++++++++++++++++++
>  drivers/gpu/drm/arm/malidp_regs.h   |  11 ++
>  5 files changed, 269 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/arm/malidp_drv.h b/drivers/gpu/drm/arm/malidp_drv.h
> index e3eb0cb1f385..2d824558216f 100644
> --- a/drivers/gpu/drm/arm/malidp_drv.h
> +++ b/drivers/gpu/drm/arm/malidp_drv.h
> @@ -53,6 +53,13 @@ struct malidp_plane {
>  	struct drm_plane base;
>  	struct malidp_hw_device *hwdev;
>  	const struct malidp_layer *layer;
> +
> +};
> +
> +enum mmu_prefetch_mode {
> +	MALIDP_PREFETCH_MODE_NONE,
> +	MALIDP_PREFETCH_MODE_PARTIAL,
> +	MALIDP_PREFETCH_MODE_FULL,
>  };
>  
>  struct malidp_plane_state {
> @@ -63,6 +70,8 @@ struct malidp_plane_state {
>  	/* internal format ID */
>  	u8 format;
>  	u8 n_planes;
> +	enum mmu_prefetch_mode mmu_prefetch_mode;
> +	u32 mmu_prefetch_pgsize;
>  };
>  
>  #define to_malidp_plane(x) container_of(x, struct malidp_plane, base)
> diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c
> index c94a4422e0e9..4233be1c5709 100644
> --- a/drivers/gpu/drm/arm/malidp_hw.c
> +++ b/drivers/gpu/drm/arm/malidp_hw.c
> @@ -96,6 +96,19 @@ static const struct malidp_layer malidp550_layers[] = {
>  	{ DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, MALIDP550_DE_LS_R1_STRIDE, 0 },
>  };
>  
> +static const struct malidp_layer malidp650_layers[] = {
> +	{ DE_VIDEO1, MALIDP550_DE_LV1_BASE, MALIDP550_DE_LV1_PTR_BASE,
> +		MALIDP_DE_LV_STRIDE0, MALIDP550_LV_YUV2RGB,
> +		MALIDP650_DE_LV_MMU_CTRL },
> +	{ DE_GRAPHICS1, MALIDP550_DE_LG_BASE, MALIDP550_DE_LG_PTR_BASE,
> +		MALIDP_DE_LG_STRIDE, 0, MALIDP650_DE_LG_MMU_CTRL },
> +	{ DE_VIDEO2, MALIDP550_DE_LV2_BASE, MALIDP550_DE_LV2_PTR_BASE,
> +		MALIDP_DE_LV_STRIDE0, MALIDP550_LV_YUV2RGB,
> +		MALIDP650_DE_LV_MMU_CTRL },
> +	{ DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE,
> +		MALIDP550_DE_LS_R1_STRIDE, 0, MALIDP650_DE_LS_MMU_CTRL },
> +};
> +
>  #define SE_N_SCALING_COEFFS	96
>  static const u16 dp500_se_scaling_coeffs[][SE_N_SCALING_COEFFS] = {
>  	[MALIDP_UPSCALING_COEFFS - 1] = {
> @@ -832,8 +845,8 @@ const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES] = {
>  			.dc_base = MALIDP550_DC_BASE,
>  			.out_depth_base = MALIDP550_DE_OUTPUT_DEPTH,
>  			.features = MALIDP_REGMAP_HAS_CLEARIRQ,
> -			.n_layers = ARRAY_SIZE(malidp550_layers),
> -			.layers = malidp550_layers,
> +			.n_layers = ARRAY_SIZE(malidp650_layers),
> +			.layers = malidp650_layers,
>  			.de_irq_map = {
>  				.irq_mask = MALIDP_DE_IRQ_UNDERRUN |
>  					    MALIDP650_DE_IRQ_DRIFT |
> diff --git a/drivers/gpu/drm/arm/malidp_hw.h b/drivers/gpu/drm/arm/malidp_hw.h
> index ad2e96915d44..39b92a9fb810 100644
> --- a/drivers/gpu/drm/arm/malidp_hw.h
> +++ b/drivers/gpu/drm/arm/malidp_hw.h
> @@ -62,6 +62,7 @@ struct malidp_layer {
>  	u16 ptr;		/* address offset for the pointer register */
>  	u16 stride_offset;	/* offset to the first stride register. */
>  	s16 yuv2rgb_offset;	/* offset to the YUV->RGB matrix entries */
> +	u16 mmu_ctrl_offset;    /* offset to the MMU control register */
>  };
>  
>  enum malidp_scaling_coeff_set {
> diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c
> index 29409a65d864..99c487be9a1d 100644
> --- a/drivers/gpu/drm/arm/malidp_planes.c
> +++ b/drivers/gpu/drm/arm/malidp_planes.c
> @@ -10,11 +10,14 @@
>   * ARM Mali DP plane manipulation routines.
>   */
>  
> +#include <linux/iommu.h>
> +
>  #include <drm/drmP.h>
>  #include <drm/drm_atomic.h>
>  #include <drm/drm_atomic_helper.h>
>  #include <drm/drm_fb_cma_helper.h>
>  #include <drm/drm_gem_cma_helper.h>
> +#include <drm/drm_gem_framebuffer_helper.h>
>  #include <drm/drm_plane_helper.h>
>  #include <drm/drm_print.h>
>  
> @@ -56,6 +59,13 @@
>   */
>  #define MALIDP_ALPHA_LUT 0xffaa5500
>  
> +/* page sizes the MMU prefetcher can support */
> +#define MALIDP_MMU_PREFETCH_PARTIAL_PGSIZES	(SZ_4K | SZ_64K)
> +#define MALIDP_MMU_PREFETCH_FULL_PGSIZES	(SZ_1M | SZ_2M)
> +
> +/* readahead for partial-frame prefetch */
> +#define MALIDP_MMU_PREFETCH_READAHEAD		8
> +
>  static void malidp_de_plane_destroy(struct drm_plane *plane)
>  {
>  	struct malidp_plane *mp = to_malidp_plane(plane);
> @@ -103,6 +113,9 @@ drm_plane_state *malidp_duplicate_plane_state(struct drm_plane *plane)
>  	state->format = m_state->format;
>  	state->n_planes = m_state->n_planes;
>  
> +	state->mmu_prefetch_mode = m_state->mmu_prefetch_mode;
> +	state->mmu_prefetch_pgsize = m_state->mmu_prefetch_pgsize;
> +
>  	return &state->base;
>  }
>  
> @@ -115,6 +128,12 @@ static void malidp_destroy_plane_state(struct drm_plane *plane,
>  	kfree(m_state);
>  }
>  
> +static const char * const prefetch_mode_names[] = {
> +	[MALIDP_PREFETCH_MODE_NONE] = "MMU_PREFETCH_NONE",
> +	[MALIDP_PREFETCH_MODE_PARTIAL] = "MMU_PREFETCH_PARTIAL",
> +	[MALIDP_PREFETCH_MODE_FULL] = "MMU_PREFETCH_FULL",
> +};
> +
>  static void malidp_plane_atomic_print_state(struct drm_printer *p,
>  					    const struct drm_plane_state *state)
>  {
> @@ -123,6 +142,9 @@ static void malidp_plane_atomic_print_state(struct drm_printer *p,
>  	drm_printf(p, "\trotmem_size=%u\n", ms->rotmem_size);
>  	drm_printf(p, "\tformat_id=%u\n", ms->format);
>  	drm_printf(p, "\tn_planes=%u\n", ms->n_planes);
> +	drm_printf(p, "\tmmu_prefetch_mode=%s\n",
> +		   prefetch_mode_names[ms->mmu_prefetch_mode]);
> +	drm_printf(p, "\tmmu_prefetch_pgsize=%d\n", ms->mmu_prefetch_pgsize);
>  }
>  
>  static const struct drm_plane_funcs malidp_de_plane_funcs = {
> @@ -176,6 +198,9 @@ static int malidp_se_check_scaling(struct malidp_plane *mp,
>  	return 0;
>  }
>  
> +static void malidp_de_prefetch_settings(struct malidp_plane *mp,
> +					struct malidp_plane_state *ms);
> +
>  static int malidp_de_plane_check(struct drm_plane *plane,
>  				 struct drm_plane_state *state)
>  {
> @@ -245,6 +270,7 @@ static int malidp_de_plane_check(struct drm_plane *plane,
>  		ms->rotmem_size = val;
>  	}
>  
> +	malidp_de_prefetch_settings(mp, ms);
>  	return 0;
>  }
>  
> @@ -321,6 +347,210 @@ static void malidp_de_set_color_encoding(struct malidp_plane *plane,
>  	}
>  }
>  
> +static u32 malidp_get_pgsize_bitmap(struct malidp_plane *mp)
> +{
> +	u32 pgsize_bitmap = 0;
> +
> +	if (iommu_present(&platform_bus_type)) {
> +		struct iommu_domain *mmu_dom =
> +			iommu_get_domain_for_dev(mp->base.dev->dev);
> +
> +		if (mmu_dom)
> +			pgsize_bitmap = mmu_dom->pgsize_bitmap;
> +	}
> +
> +	return pgsize_bitmap;
> +}
> +
> +/*
> + * Check if the framebuffer is entirely made up of pages at least pgsize in
> + * size. Only a heuristic: assumes that each scatterlist entry has been aligned
> + * to the largest page size smaller than its length and that the MMU maps to
> + * the largest page size possible.
> + */
> +static bool malidp_check_pages_threshold(struct malidp_plane_state *ms,
> +					 u32 pgsize)
> +{
> +	int i;
> +
> +	for (i = 0; i < ms->n_planes; i++) {
> +		struct drm_gem_object *obj;
> +		struct sg_table *sgt;
> +		struct scatterlist *sgl;
> +
> +		obj = drm_gem_fb_get_obj(ms->base.fb, i);
> +		sgt = obj->dev->driver->gem_prime_get_sg_table(obj);
> +
> +		if (!sgt)
> +			return false;
> +
> +		sgl = sgt->sgl;
> +
> +		while (sgl) {
> +			if (sgl->length < pgsize) {
> +				kfree(sgt);
> +				return false;
> +			}
> +
> +			sgl = sg_next(sgl);
> +		}
> +
> +		kfree(sgt);
> +	}
> +
> +	return true;
> +}
> +
> +/*
> + * Check if it is possible to enable partial-frame MMU prefetch given the
> + * current format, AFBC state and rotation.
> + */
> +static bool malidp_partial_prefetch_supported(u32 format, u64 modifier,
> +					      unsigned int rotation)
> +{
> +	bool afbc, sparse;
> +
> +	/* rotation and horizontal flip not supported for partial prefetch */
> +	if (rotation & (DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_180 |
> +			DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_X))
> +		return false;
> +
> +	afbc = modifier & DRM_FORMAT_MOD_ARM_AFBC(0);
> +	sparse = modifier & AFBC_FORMAT_MOD_SPARSE;
> +
> +	switch (format) {
> +	case DRM_FORMAT_ARGB2101010:
> +	case DRM_FORMAT_RGBA1010102:
> +	case DRM_FORMAT_BGRA1010102:
> +	case DRM_FORMAT_ARGB8888:
> +	case DRM_FORMAT_RGBA8888:
> +	case DRM_FORMAT_BGRA8888:
> +	case DRM_FORMAT_XRGB8888:
> +	case DRM_FORMAT_XBGR8888:
> +	case DRM_FORMAT_RGBX8888:
> +	case DRM_FORMAT_BGRX8888:
> +	case DRM_FORMAT_RGB888:
> +	case DRM_FORMAT_RGBA5551:
> +	case DRM_FORMAT_RGB565:
> +		/* always supported */
> +		return true;
> +
> +	case DRM_FORMAT_ABGR2101010:
> +	case DRM_FORMAT_ABGR8888:
> +	case DRM_FORMAT_ABGR1555:
> +	case DRM_FORMAT_BGR565:
> +		/* supported, but if AFBC then must be sparse mode */
> +		return (!afbc) || (afbc && sparse);
> +
> +	case DRM_FORMAT_BGR888:
> +		/* supported, but not for AFBC */
> +		return !afbc;
> +
> +	case DRM_FORMAT_YUYV:
> +	case DRM_FORMAT_UYVY:
> +	case DRM_FORMAT_NV12:
> +	case DRM_FORMAT_YUV420:
> +		/* not supported */
> +		return false;
> +
> +	default:
> +		return false;
> +	}
> +}
> +
> +/*
> + * Select the preferred MMU prefetch mode. Full-frame prefetch is preferred as
> + * long as the framebuffer is all large pages. Otherwise partial-frame prefetch
> + * is selected as long as it is supported for the current format. The selected
> + * page size for prefetch is returned in pgsize_bitmap.
> + */
> +static enum mmu_prefetch_mode malidp_mmu_prefetch_select_mode
> +		(struct malidp_plane_state *ms,	u32 *pgsize_bitmap)
> +{
> +	u32 pgsizes;
> +
> +	/* get the full-frame prefetch page size(s) supported by the MMU */
> +	pgsizes = *pgsize_bitmap & MALIDP_MMU_PREFETCH_FULL_PGSIZES;
> +
> +	while (pgsizes) {
> +		u32 largest_pgsize = 1 << __fls(pgsizes);
> +
> +		if (malidp_check_pages_threshold(ms, largest_pgsize)) {
> +			*pgsize_bitmap = largest_pgsize;
> +			return MALIDP_PREFETCH_MODE_FULL;
> +		}
> +
> +		pgsizes -= largest_pgsize;
> +	}
> +
> +	/* get the partial-frame prefetch page size(s) supported by the MMU */
> +	pgsizes = *pgsize_bitmap & MALIDP_MMU_PREFETCH_PARTIAL_PGSIZES;
> +
> +	if (malidp_partial_prefetch_supported(ms->base.fb->format->format,
> +					      ms->base.fb->modifier,
> +					      ms->base.rotation)) {
> +		/* partial prefetch using the smallest page size */
> +		*pgsize_bitmap = 1 << __ffs(pgsizes);
> +		return MALIDP_PREFETCH_MODE_PARTIAL;
> +	}
> +	*pgsize_bitmap = 0;
> +	return MALIDP_PREFETCH_MODE_NONE;
> +}
> +
> +static u32 malidp_calc_mmu_control_value(enum mmu_prefetch_mode mode,
> +					 u8 readahead, u8 n_planes, u32 pgsize)
> +{
> +	u32 mmu_ctrl = 0;
> +
> +	if (mode != MALIDP_PREFETCH_MODE_NONE) {
> +		mmu_ctrl |= MALIDP_MMU_CTRL_EN;
> +
> +		if (mode == MALIDP_PREFETCH_MODE_PARTIAL) {
> +			mmu_ctrl |= MALIDP_MMU_CTRL_MODE;
> +			mmu_ctrl |= MALIDP_MMU_CTRL_PP_NUM_REQ(readahead);
> +		}
> +
> +		if (pgsize == SZ_64K || pgsize == SZ_2M) {
> +			int i;
> +
> +			for (i = 0; i < n_planes; i++)
> +				mmu_ctrl |= MALIDP_MMU_CTRL_PX_PS(i);
> +		}
> +	}
> +
> +	return mmu_ctrl;
> +}
> +
> +static void malidp_de_prefetch_settings(struct malidp_plane *mp,
> +					struct malidp_plane_state *ms)
> +{
> +	if (!mp->layer->mmu_ctrl_offset)
> +		return;
> +
> +	/* get the page sizes supported by the MMU */
> +	ms->mmu_prefetch_pgsize = malidp_get_pgsize_bitmap(mp);
> +	ms->mmu_prefetch_mode  =
> +		malidp_mmu_prefetch_select_mode(ms, &ms->mmu_prefetch_pgsize);
> +}
> +
> +static void malidp_de_set_mmu_control(struct malidp_plane *mp,
> +				      struct malidp_plane_state *ms)
> +{
> +	u32 mmu_ctrl;
> +
> +	/* check hardware supports MMU prefetch */
> +	if (!mp->layer->mmu_ctrl_offset)
> +		return;
> +
> +	mmu_ctrl = malidp_calc_mmu_control_value(ms->mmu_prefetch_mode,
> +						 MALIDP_MMU_PREFETCH_READAHEAD,
> +						 ms->n_planes,
> +						 ms->mmu_prefetch_pgsize);
> +
> +	malidp_hw_write(mp->hwdev, mmu_ctrl,
> +			mp->layer->base + mp->layer->mmu_ctrl_offset);
> +}
> +
>  static void malidp_de_plane_update(struct drm_plane *plane,
>  				   struct drm_plane_state *old_state)
>  {
> @@ -351,6 +581,9 @@ static void malidp_de_plane_update(struct drm_plane *plane,
>  		malidp_hw_write(mp->hwdev, lower_32_bits(fb_addr), ptr);
>  		malidp_hw_write(mp->hwdev, upper_32_bits(fb_addr), ptr + 4);
>  	}
> +
> +	malidp_de_set_mmu_control(mp, ms);
> +
>  	malidp_de_set_plane_pitches(mp, ms->n_planes,
>  				    plane->state->fb->pitches);
>  
> diff --git a/drivers/gpu/drm/arm/malidp_regs.h b/drivers/gpu/drm/arm/malidp_regs.h
> index 3579d36b2a71..c60fff4b2aaf 100644
> --- a/drivers/gpu/drm/arm/malidp_regs.h
> +++ b/drivers/gpu/drm/arm/malidp_regs.h
> @@ -245,6 +245,17 @@
>  #define MALIDP550_CONFIG_VALID		0x0c014
>  #define MALIDP550_CONFIG_ID		0x0ffd4
>  
> +/* register offsets specific to DP650 */
> +#define MALIDP650_DE_LV_MMU_CTRL	0x000D0
> +#define MALIDP650_DE_LG_MMU_CTRL	0x00048
> +#define MALIDP650_DE_LS_MMU_CTRL	0x00078
> +
> +/* bit masks to set the MMU control register */
> +#define MALIDP_MMU_CTRL_EN		(1 << 0)
> +#define MALIDP_MMU_CTRL_MODE		(1 << 4)
> +#define MALIDP_MMU_CTRL_PX_PS(x)	(1 << (8 + (x)))
> +#define MALIDP_MMU_CTRL_PP_NUM_REQ(x)	(((x) & 0x7f) << 12)
> +
>  /*
>   * Starting with DP550 the register map blocks has been standardised to the
>   * following layout:
> -- 
> 2.18.0
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/arm/malidp_drv.h b/drivers/gpu/drm/arm/malidp_drv.h
index e3eb0cb1f385..2d824558216f 100644
--- a/drivers/gpu/drm/arm/malidp_drv.h
+++ b/drivers/gpu/drm/arm/malidp_drv.h
@@ -53,6 +53,13 @@  struct malidp_plane {
 	struct drm_plane base;
 	struct malidp_hw_device *hwdev;
 	const struct malidp_layer *layer;
+
+};
+
+enum mmu_prefetch_mode {
+	MALIDP_PREFETCH_MODE_NONE,
+	MALIDP_PREFETCH_MODE_PARTIAL,
+	MALIDP_PREFETCH_MODE_FULL,
 };
 
 struct malidp_plane_state {
@@ -63,6 +70,8 @@  struct malidp_plane_state {
 	/* internal format ID */
 	u8 format;
 	u8 n_planes;
+	enum mmu_prefetch_mode mmu_prefetch_mode;
+	u32 mmu_prefetch_pgsize;
 };
 
 #define to_malidp_plane(x) container_of(x, struct malidp_plane, base)
diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c
index c94a4422e0e9..4233be1c5709 100644
--- a/drivers/gpu/drm/arm/malidp_hw.c
+++ b/drivers/gpu/drm/arm/malidp_hw.c
@@ -96,6 +96,19 @@  static const struct malidp_layer malidp550_layers[] = {
 	{ DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, MALIDP550_DE_LS_R1_STRIDE, 0 },
 };
 
+static const struct malidp_layer malidp650_layers[] = {
+	{ DE_VIDEO1, MALIDP550_DE_LV1_BASE, MALIDP550_DE_LV1_PTR_BASE,
+		MALIDP_DE_LV_STRIDE0, MALIDP550_LV_YUV2RGB,
+		MALIDP650_DE_LV_MMU_CTRL },
+	{ DE_GRAPHICS1, MALIDP550_DE_LG_BASE, MALIDP550_DE_LG_PTR_BASE,
+		MALIDP_DE_LG_STRIDE, 0, MALIDP650_DE_LG_MMU_CTRL },
+	{ DE_VIDEO2, MALIDP550_DE_LV2_BASE, MALIDP550_DE_LV2_PTR_BASE,
+		MALIDP_DE_LV_STRIDE0, MALIDP550_LV_YUV2RGB,
+		MALIDP650_DE_LV_MMU_CTRL },
+	{ DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE,
+		MALIDP550_DE_LS_R1_STRIDE, 0, MALIDP650_DE_LS_MMU_CTRL },
+};
+
 #define SE_N_SCALING_COEFFS	96
 static const u16 dp500_se_scaling_coeffs[][SE_N_SCALING_COEFFS] = {
 	[MALIDP_UPSCALING_COEFFS - 1] = {
@@ -832,8 +845,8 @@  const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES] = {
 			.dc_base = MALIDP550_DC_BASE,
 			.out_depth_base = MALIDP550_DE_OUTPUT_DEPTH,
 			.features = MALIDP_REGMAP_HAS_CLEARIRQ,
-			.n_layers = ARRAY_SIZE(malidp550_layers),
-			.layers = malidp550_layers,
+			.n_layers = ARRAY_SIZE(malidp650_layers),
+			.layers = malidp650_layers,
 			.de_irq_map = {
 				.irq_mask = MALIDP_DE_IRQ_UNDERRUN |
 					    MALIDP650_DE_IRQ_DRIFT |
diff --git a/drivers/gpu/drm/arm/malidp_hw.h b/drivers/gpu/drm/arm/malidp_hw.h
index ad2e96915d44..39b92a9fb810 100644
--- a/drivers/gpu/drm/arm/malidp_hw.h
+++ b/drivers/gpu/drm/arm/malidp_hw.h
@@ -62,6 +62,7 @@  struct malidp_layer {
 	u16 ptr;		/* address offset for the pointer register */
 	u16 stride_offset;	/* offset to the first stride register. */
 	s16 yuv2rgb_offset;	/* offset to the YUV->RGB matrix entries */
+	u16 mmu_ctrl_offset;    /* offset to the MMU control register */
 };
 
 enum malidp_scaling_coeff_set {
diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c
index 29409a65d864..99c487be9a1d 100644
--- a/drivers/gpu/drm/arm/malidp_planes.c
+++ b/drivers/gpu/drm/arm/malidp_planes.c
@@ -10,11 +10,14 @@ 
  * ARM Mali DP plane manipulation routines.
  */
 
+#include <linux/iommu.h>
+
 #include <drm/drmP.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_print.h>
 
@@ -56,6 +59,13 @@ 
  */
 #define MALIDP_ALPHA_LUT 0xffaa5500
 
+/* page sizes the MMU prefetcher can support */
+#define MALIDP_MMU_PREFETCH_PARTIAL_PGSIZES	(SZ_4K | SZ_64K)
+#define MALIDP_MMU_PREFETCH_FULL_PGSIZES	(SZ_1M | SZ_2M)
+
+/* readahead for partial-frame prefetch */
+#define MALIDP_MMU_PREFETCH_READAHEAD		8
+
 static void malidp_de_plane_destroy(struct drm_plane *plane)
 {
 	struct malidp_plane *mp = to_malidp_plane(plane);
@@ -103,6 +113,9 @@  drm_plane_state *malidp_duplicate_plane_state(struct drm_plane *plane)
 	state->format = m_state->format;
 	state->n_planes = m_state->n_planes;
 
+	state->mmu_prefetch_mode = m_state->mmu_prefetch_mode;
+	state->mmu_prefetch_pgsize = m_state->mmu_prefetch_pgsize;
+
 	return &state->base;
 }
 
@@ -115,6 +128,12 @@  static void malidp_destroy_plane_state(struct drm_plane *plane,
 	kfree(m_state);
 }
 
+static const char * const prefetch_mode_names[] = {
+	[MALIDP_PREFETCH_MODE_NONE] = "MMU_PREFETCH_NONE",
+	[MALIDP_PREFETCH_MODE_PARTIAL] = "MMU_PREFETCH_PARTIAL",
+	[MALIDP_PREFETCH_MODE_FULL] = "MMU_PREFETCH_FULL",
+};
+
 static void malidp_plane_atomic_print_state(struct drm_printer *p,
 					    const struct drm_plane_state *state)
 {
@@ -123,6 +142,9 @@  static void malidp_plane_atomic_print_state(struct drm_printer *p,
 	drm_printf(p, "\trotmem_size=%u\n", ms->rotmem_size);
 	drm_printf(p, "\tformat_id=%u\n", ms->format);
 	drm_printf(p, "\tn_planes=%u\n", ms->n_planes);
+	drm_printf(p, "\tmmu_prefetch_mode=%s\n",
+		   prefetch_mode_names[ms->mmu_prefetch_mode]);
+	drm_printf(p, "\tmmu_prefetch_pgsize=%d\n", ms->mmu_prefetch_pgsize);
 }
 
 static const struct drm_plane_funcs malidp_de_plane_funcs = {
@@ -176,6 +198,9 @@  static int malidp_se_check_scaling(struct malidp_plane *mp,
 	return 0;
 }
 
+static void malidp_de_prefetch_settings(struct malidp_plane *mp,
+					struct malidp_plane_state *ms);
+
 static int malidp_de_plane_check(struct drm_plane *plane,
 				 struct drm_plane_state *state)
 {
@@ -245,6 +270,7 @@  static int malidp_de_plane_check(struct drm_plane *plane,
 		ms->rotmem_size = val;
 	}
 
+	malidp_de_prefetch_settings(mp, ms);
 	return 0;
 }
 
@@ -321,6 +347,210 @@  static void malidp_de_set_color_encoding(struct malidp_plane *plane,
 	}
 }
 
+static u32 malidp_get_pgsize_bitmap(struct malidp_plane *mp)
+{
+	u32 pgsize_bitmap = 0;
+
+	if (iommu_present(&platform_bus_type)) {
+		struct iommu_domain *mmu_dom =
+			iommu_get_domain_for_dev(mp->base.dev->dev);
+
+		if (mmu_dom)
+			pgsize_bitmap = mmu_dom->pgsize_bitmap;
+	}
+
+	return pgsize_bitmap;
+}
+
+/*
+ * Check if the framebuffer is entirely made up of pages at least pgsize in
+ * size. Only a heuristic: assumes that each scatterlist entry has been aligned
+ * to the largest page size smaller than its length and that the MMU maps to
+ * the largest page size possible.
+ */
+static bool malidp_check_pages_threshold(struct malidp_plane_state *ms,
+					 u32 pgsize)
+{
+	int i;
+
+	for (i = 0; i < ms->n_planes; i++) {
+		struct drm_gem_object *obj;
+		struct sg_table *sgt;
+		struct scatterlist *sgl;
+
+		obj = drm_gem_fb_get_obj(ms->base.fb, i);
+		sgt = obj->dev->driver->gem_prime_get_sg_table(obj);
+
+		if (!sgt)
+			return false;
+
+		sgl = sgt->sgl;
+
+		while (sgl) {
+			if (sgl->length < pgsize) {
+				kfree(sgt);
+				return false;
+			}
+
+			sgl = sg_next(sgl);
+		}
+
+		kfree(sgt);
+	}
+
+	return true;
+}
+
+/*
+ * Check if it is possible to enable partial-frame MMU prefetch given the
+ * current format, AFBC state and rotation.
+ */
+static bool malidp_partial_prefetch_supported(u32 format, u64 modifier,
+					      unsigned int rotation)
+{
+	bool afbc, sparse;
+
+	/* rotation and horizontal flip not supported for partial prefetch */
+	if (rotation & (DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_180 |
+			DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_X))
+		return false;
+
+	afbc = modifier & DRM_FORMAT_MOD_ARM_AFBC(0);
+	sparse = modifier & AFBC_FORMAT_MOD_SPARSE;
+
+	switch (format) {
+	case DRM_FORMAT_ARGB2101010:
+	case DRM_FORMAT_RGBA1010102:
+	case DRM_FORMAT_BGRA1010102:
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_RGBA8888:
+	case DRM_FORMAT_BGRA8888:
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_RGBX8888:
+	case DRM_FORMAT_BGRX8888:
+	case DRM_FORMAT_RGB888:
+	case DRM_FORMAT_RGBA5551:
+	case DRM_FORMAT_RGB565:
+		/* always supported */
+		return true;
+
+	case DRM_FORMAT_ABGR2101010:
+	case DRM_FORMAT_ABGR8888:
+	case DRM_FORMAT_ABGR1555:
+	case DRM_FORMAT_BGR565:
+		/* supported, but if AFBC then must be sparse mode */
+		return (!afbc) || (afbc && sparse);
+
+	case DRM_FORMAT_BGR888:
+		/* supported, but not for AFBC */
+		return !afbc;
+
+	case DRM_FORMAT_YUYV:
+	case DRM_FORMAT_UYVY:
+	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_YUV420:
+		/* not supported */
+		return false;
+
+	default:
+		return false;
+	}
+}
+
+/*
+ * Select the preferred MMU prefetch mode. Full-frame prefetch is preferred as
+ * long as the framebuffer is all large pages. Otherwise partial-frame prefetch
+ * is selected as long as it is supported for the current format. The selected
+ * page size for prefetch is returned in pgsize_bitmap.
+ */
+static enum mmu_prefetch_mode malidp_mmu_prefetch_select_mode
+		(struct malidp_plane_state *ms,	u32 *pgsize_bitmap)
+{
+	u32 pgsizes;
+
+	/* get the full-frame prefetch page size(s) supported by the MMU */
+	pgsizes = *pgsize_bitmap & MALIDP_MMU_PREFETCH_FULL_PGSIZES;
+
+	while (pgsizes) {
+		u32 largest_pgsize = 1 << __fls(pgsizes);
+
+		if (malidp_check_pages_threshold(ms, largest_pgsize)) {
+			*pgsize_bitmap = largest_pgsize;
+			return MALIDP_PREFETCH_MODE_FULL;
+		}
+
+		pgsizes -= largest_pgsize;
+	}
+
+	/* get the partial-frame prefetch page size(s) supported by the MMU */
+	pgsizes = *pgsize_bitmap & MALIDP_MMU_PREFETCH_PARTIAL_PGSIZES;
+
+	if (malidp_partial_prefetch_supported(ms->base.fb->format->format,
+					      ms->base.fb->modifier,
+					      ms->base.rotation)) {
+		/* partial prefetch using the smallest page size */
+		*pgsize_bitmap = 1 << __ffs(pgsizes);
+		return MALIDP_PREFETCH_MODE_PARTIAL;
+	}
+	*pgsize_bitmap = 0;
+	return MALIDP_PREFETCH_MODE_NONE;
+}
+
+static u32 malidp_calc_mmu_control_value(enum mmu_prefetch_mode mode,
+					 u8 readahead, u8 n_planes, u32 pgsize)
+{
+	u32 mmu_ctrl = 0;
+
+	if (mode != MALIDP_PREFETCH_MODE_NONE) {
+		mmu_ctrl |= MALIDP_MMU_CTRL_EN;
+
+		if (mode == MALIDP_PREFETCH_MODE_PARTIAL) {
+			mmu_ctrl |= MALIDP_MMU_CTRL_MODE;
+			mmu_ctrl |= MALIDP_MMU_CTRL_PP_NUM_REQ(readahead);
+		}
+
+		if (pgsize == SZ_64K || pgsize == SZ_2M) {
+			int i;
+
+			for (i = 0; i < n_planes; i++)
+				mmu_ctrl |= MALIDP_MMU_CTRL_PX_PS(i);
+		}
+	}
+
+	return mmu_ctrl;
+}
+
+static void malidp_de_prefetch_settings(struct malidp_plane *mp,
+					struct malidp_plane_state *ms)
+{
+	if (!mp->layer->mmu_ctrl_offset)
+		return;
+
+	/* get the page sizes supported by the MMU */
+	ms->mmu_prefetch_pgsize = malidp_get_pgsize_bitmap(mp);
+	ms->mmu_prefetch_mode  =
+		malidp_mmu_prefetch_select_mode(ms, &ms->mmu_prefetch_pgsize);
+}
+
+static void malidp_de_set_mmu_control(struct malidp_plane *mp,
+				      struct malidp_plane_state *ms)
+{
+	u32 mmu_ctrl;
+
+	/* check hardware supports MMU prefetch */
+	if (!mp->layer->mmu_ctrl_offset)
+		return;
+
+	mmu_ctrl = malidp_calc_mmu_control_value(ms->mmu_prefetch_mode,
+						 MALIDP_MMU_PREFETCH_READAHEAD,
+						 ms->n_planes,
+						 ms->mmu_prefetch_pgsize);
+
+	malidp_hw_write(mp->hwdev, mmu_ctrl,
+			mp->layer->base + mp->layer->mmu_ctrl_offset);
+}
+
 static void malidp_de_plane_update(struct drm_plane *plane,
 				   struct drm_plane_state *old_state)
 {
@@ -351,6 +581,9 @@  static void malidp_de_plane_update(struct drm_plane *plane,
 		malidp_hw_write(mp->hwdev, lower_32_bits(fb_addr), ptr);
 		malidp_hw_write(mp->hwdev, upper_32_bits(fb_addr), ptr + 4);
 	}
+
+	malidp_de_set_mmu_control(mp, ms);
+
 	malidp_de_set_plane_pitches(mp, ms->n_planes,
 				    plane->state->fb->pitches);
 
diff --git a/drivers/gpu/drm/arm/malidp_regs.h b/drivers/gpu/drm/arm/malidp_regs.h
index 3579d36b2a71..c60fff4b2aaf 100644
--- a/drivers/gpu/drm/arm/malidp_regs.h
+++ b/drivers/gpu/drm/arm/malidp_regs.h
@@ -245,6 +245,17 @@ 
 #define MALIDP550_CONFIG_VALID		0x0c014
 #define MALIDP550_CONFIG_ID		0x0ffd4
 
+/* register offsets specific to DP650 */
+#define MALIDP650_DE_LV_MMU_CTRL	0x000D0
+#define MALIDP650_DE_LG_MMU_CTRL	0x00048
+#define MALIDP650_DE_LS_MMU_CTRL	0x00078
+
+/* bit masks to set the MMU control register */
+#define MALIDP_MMU_CTRL_EN		(1 << 0)
+#define MALIDP_MMU_CTRL_MODE		(1 << 4)
+#define MALIDP_MMU_CTRL_PX_PS(x)	(1 << (8 + (x)))
+#define MALIDP_MMU_CTRL_PP_NUM_REQ(x)	(((x) & 0x7f) << 12)
+
 /*
  * Starting with DP550 the register map blocks has been standardised to the
  * following layout: