diff mbox series

[v3,2/6] drm/i915/gt: Clear compress metadata for Flat-ccs objects

Message ID 20220307134038.30525-3-ramalingam.c@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/ttm: Evict and restore of compressed object | expand

Commit Message

Ramalingam C March 7, 2022, 1:40 p.m. UTC
Xe-HP and latest devices support Flat CCS which reserved a portion of
the device memory to store compression metadata, during the clearing of
device memory buffer object we also need to clear the associated
CCS buffer.

XY_FAST_COLOR_BLT cmd provides a option to clear the ccs metadata
corresponding to the main memory that is cleared. So on Flat-CCS capable
platform we use this option to clear the CCS meta data along with main
memory.

v2: Fixed issues with platform naming [Lucas]
v3: Rebased [Ram]
    Used the round_up funcs [Bob]
v4: Fixed ccs blk calculation [Ram]
    Added Kdoc on flat-ccs.
v5: GENMASK is used [Matt]
    mocs fix [Matt]
    Comments Fix [Matt]
    Flush address programming [Ram]
v6: FLUSH_DW is fixed
    Few coding style fix
v7: Adopting the XY_FAST_COLOR_BLT (Thomas]

Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  3 ++
 drivers/gpu/drm/i915/gt/intel_migrate.c      | 39 ++++++++++++++++++--
 2 files changed, 39 insertions(+), 3 deletions(-)

Comments

Hellstrom, Thomas March 7, 2022, 2:32 p.m. UTC | #1
On Mon, 2022-03-07 at 19:10 +0530, Ramalingam C wrote:
> Xe-HP and latest devices support Flat CCS which reserved a portion of
> the device memory to store compression metadata, during the clearing
> of
> device memory buffer object we also need to clear the associated
> CCS buffer.
> 
> XY_FAST_COLOR_BLT cmd provides a option to clear the ccs metadata
> corresponding to the main memory that is cleared. So on Flat-CCS
> capable
> platform we use this option to clear the CCS meta data along with
> main
> memory.
> 
> v2: Fixed issues with platform naming [Lucas]
> v3: Rebased [Ram]
>     Used the round_up funcs [Bob]
> v4: Fixed ccs blk calculation [Ram]
>     Added Kdoc on flat-ccs.
> v5: GENMASK is used [Matt]
>     mocs fix [Matt]
>     Comments Fix [Matt]
>     Flush address programming [Ram]
> v6: FLUSH_DW is fixed
>     Few coding style fix
> v7: Adopting the XY_FAST_COLOR_BLT (Thomas]
> 
> Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
> Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  3 ++
>  drivers/gpu/drm/i915/gt/intel_migrate.c      | 39
> ++++++++++++++++++--
>  2 files changed, 39 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index 925e55b6a94f..34cead49f35e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -207,8 +207,11 @@
>  #define XY_COLOR_BLT_CMD               (2 << 29 | 0x50 << 22)
>  #define XY_FAST_COLOR_BLT_CMD          (2 << 29 | 0x44 << 22)
>  #define   XY_FAST_COLOR_BLT_DEPTH_32   (2 << 19)
> +#define   FAST_CLEAR_0                 (2 << 12)
>  #define   XY_FAST_COLOR_BLT_DW         16
>  #define   XY_FAST_COLOR_BLT_MOCS_MASK  GENMASK(27, 21)
> +#define   XY_FAST_COLOR_BLT_AUX_MASK   GENMASK(20, 18)
> +#define   XY_FAST_COLOR_BLT_AUX_CCS_E  5
>  #define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
>  #define SRC_COPY_BLT_CMD               (2 << 29 | 0x43 << 22)
>  #define GEN9_XY_FAST_COPY_BLT_CMD      (2 << 29 | 0x42 << 22)
> diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c
> b/drivers/gpu/drm/i915/gt/intel_migrate.c
> index cb68f7bf6b28..05262f1b438e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_migrate.c
> +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
> @@ -469,6 +469,34 @@ static bool wa_1209644611_applies(int ver, u32
> size)
>         return height % 4 == 3 && height <= 8;
>  }
>  
> +/**
> + * DOC: Flat-CCS - Memory compression for Local memory
> + *
> + * On Xe-HP and later devices, we use dedicated compression control
> state (CCS)
> + * stored in local memory for each surface, to support the 3D and
> media
> + * compression formats.
> + *
> + * The memory required for the CCS of the entire local memory is
> 1/256 of the
> + * local memory size. So before the kernel boot, the required memory
> is reserved
> + * for the CCS data and a secure register will be programmed with
> the CCS base
> + * address.
> + *
> + * Flat CCS data needs to be cleared when a lmem object is
> allocated.
> + * And CCS data can be copied in and out of CCS region through
> + * XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly.
> + *
> + * When we exhaust the lmem, if the object's placements support
> smem, then we can
> + * directly decompress the compressed lmem object into smem and
> start using it
> + * from smem itself.
> + *
> + * But when we need to swapout the compressed lmem object into a
> smem region
> + * though objects' placement doesn't support smem, then we copy the
> lmem content
> + * as it is into smem region along with ccs data (using
> XY_CTRL_SURF_COPY_BLT).
> + * When the object is referred, lmem content will be swaped in along
> with
> + * restoration of the CCS data (using XY_CTRL_SURF_COPY_BLT) at
> corresponding
> + * location.
> + */
> +
>  static int emit_copy(struct i915_request *rq,
>                      u32 dst_offset, u32 src_offset, int size)
>  {
> @@ -621,8 +649,8 @@ static int emit_clear(struct i915_request *rq,
> u64 offset, int size,
>  {
>         struct drm_i915_private *i915 = rq->engine->i915;
>         int mocs = rq->engine->gt->mocs.uc_index << 1;
> +       u32 *cs, spl_mode = 0, aux = 0, mem_type = 0;
>         const int ver = GRAPHICS_VER(i915);
> -       u32 *cs, mem_type = 0;
>         int ring_sz;
>  
>         GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
> @@ -644,10 +672,15 @@ static int emit_clear(struct i915_request *rq,
> u64 offset, int size,
>                 return PTR_ERR(cs);
>  
>         if (ver >= 12) {
> +               if (HAS_FLAT_CCS(i915)) {
> +                       spl_mode = FAST_CLEAR_0;
> +                       aux = FIELD_PREP(XY_FAST_COLOR_BLT_AUX_MASK,

Did you have a chance to verify that this actually works, and whether
setting aux will clear just the CCS data or both CCS & main DATA?

If so,
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>



> +                                       
> XY_FAST_COLOR_BLT_AUX_CCS_E);
> +               }
>                 *cs++ = XY_FAST_COLOR_BLT_CMD |
> XY_FAST_COLOR_BLT_DEPTH_32 |
> -                       (XY_FAST_COLOR_BLT_DW - 2);
> +                       spl_mode | (XY_FAST_COLOR_BLT_DW - 2);
>                 *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs)
> |
> -                       (PAGE_SIZE - 1);
> +                       (PAGE_SIZE - 1) | aux;
>                 *cs++ = 0;
>                 *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
>                 *cs++ = lower_32_bits(offset);

----------------------------------------------------------------------
Intel Sweden AB
Registered Office: Isafjordsgatan 30B, 164 40 Kista, Stockholm, Sweden
Registration Number: 556189-6027

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 925e55b6a94f..34cead49f35e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -207,8 +207,11 @@ 
 #define XY_COLOR_BLT_CMD		(2 << 29 | 0x50 << 22)
 #define XY_FAST_COLOR_BLT_CMD		(2 << 29 | 0x44 << 22)
 #define   XY_FAST_COLOR_BLT_DEPTH_32	(2 << 19)
+#define   FAST_CLEAR_0			(2 << 12)
 #define   XY_FAST_COLOR_BLT_DW		16
 #define   XY_FAST_COLOR_BLT_MOCS_MASK	GENMASK(27, 21)
+#define   XY_FAST_COLOR_BLT_AUX_MASK	GENMASK(20, 18)
+#define   XY_FAST_COLOR_BLT_AUX_CCS_E	5
 #define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
 #define SRC_COPY_BLT_CMD		(2 << 29 | 0x43 << 22)
 #define GEN9_XY_FAST_COPY_BLT_CMD	(2 << 29 | 0x42 << 22)
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index cb68f7bf6b28..05262f1b438e 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -469,6 +469,34 @@  static bool wa_1209644611_applies(int ver, u32 size)
 	return height % 4 == 3 && height <= 8;
 }
 
+/**
+ * DOC: Flat-CCS - Memory compression for Local memory
+ *
+ * On Xe-HP and later devices, we use dedicated compression control state (CCS)
+ * stored in local memory for each surface, to support the 3D and media
+ * compression formats.
+ *
+ * The memory required for the CCS of the entire local memory is 1/256 of the
+ * local memory size. So before the kernel boot, the required memory is reserved
+ * for the CCS data and a secure register will be programmed with the CCS base
+ * address.
+ *
+ * Flat CCS data needs to be cleared when a lmem object is allocated.
+ * And CCS data can be copied in and out of CCS region through
+ * XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly.
+ *
+ * When we exhaust the lmem, if the object's placements support smem, then we can
+ * directly decompress the compressed lmem object into smem and start using it
+ * from smem itself.
+ *
+ * But when we need to swapout the compressed lmem object into a smem region
+ * though objects' placement doesn't support smem, then we copy the lmem content
+ * as it is into smem region along with ccs data (using XY_CTRL_SURF_COPY_BLT).
+ * When the object is referred, lmem content will be swaped in along with
+ * restoration of the CCS data (using XY_CTRL_SURF_COPY_BLT) at corresponding
+ * location.
+ */
+
 static int emit_copy(struct i915_request *rq,
 		     u32 dst_offset, u32 src_offset, int size)
 {
@@ -621,8 +649,8 @@  static int emit_clear(struct i915_request *rq, u64 offset, int size,
 {
 	struct drm_i915_private *i915 = rq->engine->i915;
 	int mocs = rq->engine->gt->mocs.uc_index << 1;
+	u32 *cs, spl_mode = 0, aux = 0, mem_type = 0;
 	const int ver = GRAPHICS_VER(i915);
-	u32 *cs, mem_type = 0;
 	int ring_sz;
 
 	GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
@@ -644,10 +672,15 @@  static int emit_clear(struct i915_request *rq, u64 offset, int size,
 		return PTR_ERR(cs);
 
 	if (ver >= 12) {
+		if (HAS_FLAT_CCS(i915)) {
+			spl_mode = FAST_CLEAR_0;
+			aux = FIELD_PREP(XY_FAST_COLOR_BLT_AUX_MASK,
+					 XY_FAST_COLOR_BLT_AUX_CCS_E);
+		}
 		*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
-			(XY_FAST_COLOR_BLT_DW - 2);
+			spl_mode | (XY_FAST_COLOR_BLT_DW - 2);
 		*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
-			(PAGE_SIZE - 1);
+			(PAGE_SIZE - 1) | aux;
 		*cs++ = 0;
 		*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
 		*cs++ = lower_32_bits(offset);