diff mbox series

[v4,4/8] drm/i915/selftest_migrate: Check CCS meta data clear

Message ID 20220319204229.9846-5-ramalingam.c@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/ttm: Evict and restore of compressed object | expand

Commit Message

Ramalingam C March 19, 2022, 8:42 p.m. UTC
While clearing the Flat-CCS capable lmem object, we need to clear the CCS
meta data corresponding to the memory.

As part of live_migrate_clear add check for the ccs meta data clear for
the Flat-CCS capable lmem object.

Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_migrate.c    |  32 +++
 drivers/gpu/drm/i915/gt/selftest_migrate.c | 274 ++++++++++++++++++---
 2 files changed, 278 insertions(+), 28 deletions(-)

Comments

kernel test robot March 20, 2022, 1:39 a.m. UTC | #1
Hi Ramalingam,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on drm/drm-next drm-tip/drm-tip next-20220318]
[cannot apply to v5.17-rc8]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Ramalingam-C/drm-i915-ttm-Evict-and-restore-of-compressed-object/20220320-044242
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-allyesconfig (https://download.01.org/0day-ci/archive/20220320/202203200912.4mqFVTe9-lkp@intel.com/config)
compiler: gcc-9 (Ubuntu 9.4.0-1ubuntu1~20.04) 9.4.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/afd58bdbf43437bf72ff2313776c3036ebf99a11
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Ramalingam-C/drm-i915-ttm-Evict-and-restore-of-compressed-object/20220320-044242
        git checkout afd58bdbf43437bf72ff2313776c3036ebf99a11
        # save the config file to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from include/linux/kernel.h:29,
                    from arch/x86/include/asm/percpu.h:27,
                    from arch/x86/include/asm/current.h:6,
                    from arch/x86/include/asm/processor.h:17,
                    from arch/x86/include/asm/kvm_para.h:5,
                    from arch/x86/include/asm/hypervisor.h:37,
                    from drivers/gpu/drm/i915/i915_drv.h:35,
                    from drivers/gpu/drm/i915/gt/intel_migrate.c:6:
   drivers/gpu/drm/i915/gt/selftest_migrate.c: In function 'clear':
>> include/linux/kern_levels.h:5:18: error: format '%lu' expects argument of type 'long unsigned int', but argument 4 has type 'unsigned int' [-Werror=format=]
       5 | #define KERN_SOH "\001"  /* ASCII Start Of Header */
         |                  ^~~~~~
   include/linux/printk.h:418:11: note: in definition of macro 'printk_index_wrap'
     418 |   _p_func(_fmt, ##__VA_ARGS__);    \
         |           ^~~~
   include/linux/printk.h:489:2: note: in expansion of macro 'printk'
     489 |  printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
         |  ^~~~~~
   include/linux/kern_levels.h:11:18: note: in expansion of macro 'KERN_SOH'
      11 | #define KERN_ERR KERN_SOH "3" /* error conditions */
         |                  ^~~~~~~~
   include/linux/printk.h:489:9: note: in expansion of macro 'KERN_ERR'
     489 |  printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
         |         ^~~~~~~~
   drivers/gpu/drm/i915/gt/selftest_migrate.c:403:6: note: in expansion of macro 'pr_err'
     403 |      pr_err("%ps ccs clearing failed, offset: %d/%lu\n",
         |      ^~~~~~
   In file included from drivers/gpu/drm/i915/gt/intel_migrate.c:1014:
   drivers/gpu/drm/i915/gt/selftest_migrate.c:403:52: note: format string is defined here
     403 |      pr_err("%ps ccs clearing failed, offset: %d/%lu\n",
         |                                                  ~~^
         |                                                    |
         |                                                    long unsigned int
         |                                                  %u
   drivers/gpu/drm/i915/gt/intel_migrate.c: In function 'intel_context_copy_ccs':
>> drivers/gpu/drm/i915/gt/selftest_migrate.c:157:19: error: 'rq' is used uninitialized in this function [-Werror=uninitialized]
     157 |  offset += (u64)rq->engine->instance << 32;
         |                 ~~^~~~~~~~
   cc1: all warnings being treated as errors


vim +/rq +157 drivers/gpu/drm/i915/gt/selftest_migrate.c

   134	
   135	static int intel_context_copy_ccs(struct intel_context *ce,
   136					  const struct i915_deps *deps,
   137					  struct scatterlist *sg,
   138					  enum i915_cache_level cache_level,
   139					  bool write_to_ccs,
   140					  struct i915_request **out)
   141	{
   142		u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS;
   143		u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS;
   144		struct sgt_dma it = sg_sgt(sg);
   145		struct i915_request *rq;
   146		u32 offset;
   147		int err;
   148	
   149		GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
   150		*out = NULL;
   151	
   152		GEM_BUG_ON(ce->ring->size < SZ_64K);
   153	
   154		offset = 0;
   155		if (HAS_64K_PAGES(ce->engine->i915))
   156			offset = CHUNK_SZ;
 > 157		offset += (u64)rq->engine->instance << 32;
   158	
   159		do {
   160			int len;
   161	
   162			rq = i915_request_create(ce);
   163			if (IS_ERR(rq)) {
   164				err = PTR_ERR(rq);
   165				goto out_ce;
   166			}
   167	
   168			if (deps) {
   169				err = i915_request_await_deps(rq, deps);
   170				if (err)
   171					goto out_rq;
   172	
   173				if (rq->engine->emit_init_breadcrumb) {
   174					err = rq->engine->emit_init_breadcrumb(rq);
   175					if (err)
   176						goto out_rq;
   177				}
   178	
   179				deps = NULL;
   180			}
   181	
   182			/* The PTE updates + clear must not be interrupted. */
   183			err = emit_no_arbitration(rq);
   184			if (err)
   185				goto out_rq;
   186	
   187			len = emit_pte(rq, &it, cache_level, true, offset, CHUNK_SZ);
   188			if (len <= 0) {
   189				err = len;
   190				goto out_rq;
   191			}
   192	
   193			err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
   194			if (err)
   195				goto out_rq;
   196	
   197			err = emit_copy_ccs(rq, offset, dst_access,
   198					    offset, src_access, len);
   199			if (err)
   200				goto out_rq;
   201	
   202			err = rq->engine->emit_flush(rq, EMIT_INVALIDATE |
   203						     MI_FLUSH_DW_CCS);
   204	
   205			/* Arbitration is re-enabled between requests. */
   206	out_rq:
   207			if (*out)
   208				i915_request_put(*out);
   209			*out = i915_request_get(rq);
   210			i915_request_add(rq);
   211			if (err || !it.sg || !sg_dma_len(it.sg))
   212				break;
   213	
   214			cond_resched();
   215		} while (1);
   216	
   217	out_ce:
   218		return err;
   219	}
   220
Hellstrom, Thomas March 21, 2022, 10:39 a.m. UTC | #2
On Sun, 2022-03-20 at 02:12 +0530, Ramalingam C wrote:
> While clearing the Flat-CCS capable lmem object, we need to clear the
> CCS
> meta data corresponding to the memory.
> 
> As part of live_migrate_clear add check for the ccs meta data clear
> for
> the Flat-CCS capable lmem object.
> 
> Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_migrate.c    |  32 +++
>  drivers/gpu/drm/i915/gt/selftest_migrate.c | 274 ++++++++++++++++++-
> --
>  2 files changed, 278 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c
> b/drivers/gpu/drm/i915/gt/intel_migrate.c
> index c1db8daf994a..bbfea570c239 100644
> --- a/drivers/gpu/drm/i915/gt/intel_migrate.c
> +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
> @@ -572,6 +572,38 @@ static u32 *_i915_ctrl_surf_copy_blt(u32 *cmd,
> u64 src_addr, u64 dst_addr,
>         return cmd;
>  }
>  
> +static int emit_copy_ccs(struct i915_request *rq,
> +                        u32 dst_offset, u8 dst_access,
> +                        u32 src_offset, u8 src_access, int size)
> +{
> +       struct drm_i915_private *i915 = rq->engine->i915;
> +       int mocs = rq->engine->gt->mocs.uc_index << 1;
> +       u32 num_ccs_blks, ccs_ring_size;
> +       u32 *cs;
> +
> +       ccs_ring_size = calc_ctrl_surf_instr_size(i915, size);
> +       WARN_ON(!ccs_ring_size);
> +
> +       cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2));
> +       if (IS_ERR(cs))
> +               return PTR_ERR(cs);
> +
> +       num_ccs_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size),
> +                                   NUM_CCS_BYTES_PER_BLOCK);
> +
> +       cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
> +       cs = _i915_ctrl_surf_copy_blt(cs, src_offset, dst_offset,
> +                                     src_access, dst_access,
> +                                     mocs, mocs, num_ccs_blks);
> +       cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
> +       if (ccs_ring_size & 1)
> +               *cs++ = MI_NOOP;
> +
> +       intel_ring_advance(rq, cs);
> +
> +       return 0;
> +}


This would be an unused function if selftests are not configured,
right?


> +
>  static int emit_copy(struct i915_request *rq,
>                      u32 dst_offset, u32 src_offset, int size)
>  {
> diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c
> b/drivers/gpu/drm/i915/gt/selftest_migrate.c
> index b5da8b8cd039..e32cc994f4a2 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
> @@ -132,6 +132,126 @@ static int copy(struct intel_migrate *migrate,
>         return err;
>  }
>  
> +static int intel_context_copy_ccs(struct intel_context *ce,
> +                                 const struct i915_deps *deps,
> +                                 struct scatterlist *sg,
> +                                 enum i915_cache_level cache_level,
> +                                 bool write_to_ccs,
> +                                 struct i915_request **out)
> +{
> +       u8 src_access = write_to_ccs ? DIRECT_ACCESS :
> INDIRECT_ACCESS;
> +       u8 dst_access = write_to_ccs ? INDIRECT_ACCESS :
> DIRECT_ACCESS;
> +       struct sgt_dma it = sg_sgt(sg);
> +       struct i915_request *rq;
> +       u32 offset;
> +       int err;
> +
> +       GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
> +       *out = NULL;
> +
> +       GEM_BUG_ON(ce->ring->size < SZ_64K);
> +
> +       offset = 0;
> +       if (HAS_64K_PAGES(ce->engine->i915))
> +               offset = CHUNK_SZ;
> +       offset += (u64)rq->engine->instance << 32;
> +
> +       do {
> +               int len;
> +
> +               rq = i915_request_create(ce);
> +               if (IS_ERR(rq)) {
> +                       err = PTR_ERR(rq);
> +                       goto out_ce;
> +               }
> +
> +               if (deps) {
> +                       err = i915_request_await_deps(rq, deps);
> +                       if (err)
> +                               goto out_rq;
> +
> +                       if (rq->engine->emit_init_breadcrumb) {
> +                               err = rq->engine-
> >emit_init_breadcrumb(rq);
> +                               if (err)
> +                                       goto out_rq;
> +                       }
> +
> +                       deps = NULL;
> +               }
> +
> +               /* The PTE updates + clear must not be interrupted.
> */
> +               err = emit_no_arbitration(rq);
> +               if (err)
> +                       goto out_rq;
> +
> +               len = emit_pte(rq, &it, cache_level, true, offset,
> CHUNK_SZ);
> +               if (len <= 0) {
> +                       err = len;
> +                       goto out_rq;
> +               }
> +
> +               err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
> +               if (err)
> +                       goto out_rq;
> +
> +               err = emit_copy_ccs(rq, offset, dst_access,
> +                                   offset, src_access, len);
> +               if (err)
> +                       goto out_rq;
> +
> +               err = rq->engine->emit_flush(rq, EMIT_INVALIDATE |
> +                                            MI_FLUSH_DW_CCS);
> +
> +               /* Arbitration is re-enabled between requests. */
> +out_rq:
> +               if (*out)
> +                       i915_request_put(*out);
> +               *out = i915_request_get(rq);
> +               i915_request_add(rq);
> +               if (err || !it.sg || !sg_dma_len(it.sg))
> +                       break;
> +
> +               cond_resched();
> +       } while (1);
> +
> +out_ce:
> +       return err;
> +}
> +
> +static int
> +intel_migrate_ccs_copy(struct intel_migrate *m,
> +                      struct i915_gem_ww_ctx *ww,
> +                      const struct i915_deps *deps,
> +                      struct scatterlist *sg,
> +                      enum i915_cache_level cache_level,
> +                      bool write_to_ccs,
> +                      struct i915_request **out)
> +{
> +       struct intel_context *ce;
> +       int err;
> +
> +       *out = NULL;
> +       if (!m->context)
> +               return -ENODEV;
> +
> +       ce = intel_migrate_create_context(m);
> +       if (IS_ERR(ce))
> +               ce = intel_context_get(m->context);
> +       GEM_BUG_ON(IS_ERR(ce));
> +
> +       err = intel_context_pin_ww(ce, ww);
> +       if (err)
> +               goto out;
> +
> +       err = intel_context_copy_ccs(ce, deps, sg, cache_level,
> +                                    write_to_ccs, out);
> +
> +       intel_context_unpin(ce);
> +out:
> +       intel_context_put(ce);
> +       return err;
> +}
> +
>  static int clear(struct intel_migrate *migrate,
>                  int (*fn)(struct intel_migrate *migrate,
>                            struct i915_gem_ww_ctx *ww,
> @@ -144,7 +264,8 @@ static int clear(struct intel_migrate *migrate,
>         struct drm_i915_gem_object *obj;
>         struct i915_request *rq;
>         struct i915_gem_ww_ctx ww;
> -       u32 *vaddr;
> +       u32 *vaddr, val = 0;
> +       bool ccs_cap = false;
>         int err = 0;
>         int i;
>  
> @@ -155,7 +276,12 @@ static int clear(struct intel_migrate *migrate,
>         /* Consider the rounded up memory too */
>         sz = obj->base.size;
>  
> +       if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
> +               ccs_cap = true;
> +
>         for_i915_gem_ww(&ww, err, true) {
> +               int ccs_bytes;
> +
>                 err = i915_gem_object_lock(obj, &ww);
>                 if (err)
>                         continue;
> @@ -170,44 +296,136 @@ static int clear(struct intel_migrate
> *migrate,
>                         vaddr[i] = ~i;
>                 i915_gem_object_flush_map(obj);
>  
> -               err = fn(migrate, &ww, obj, sz, &rq);
> -               if (!err)
> -                       continue;
> +               if (ccs_cap && !val) {
> +                       /* Write the obj data into ccs surface */
> +                       err = intel_migrate_ccs_copy(migrate, &ww,
> NULL,
> +                                                    obj->mm.pages-
> >sgl,
> +                                                    obj-
> >cache_level,
> +                                                    true, &rq);
> +                       if (rq && !err) {
> +                               if (i915_request_wait(rq, 0, HZ) < 0)
> {
> +                                       pr_err("%ps timed out, size:
> %u\n",
> +                                              fn, sz);
> +                                       err = -ETIME;
> +                               }
> +                               i915_request_put(rq);
> +                               rq = NULL;
> +                       }
> +                       if (err)
> +                               continue;
> +
> +                       for (i = 0; i < sz / sizeof(u32); i++)
> +                               vaddr[i] = 0x5a5a5a5a;
> +                       i915_gem_object_flush_map(obj);
> +
> +                       err = intel_migrate_ccs_copy(migrate, &ww,
> NULL, obj->mm.pages->sgl,
> +                                                    obj-
> >cache_level, false, &rq);

Why do we read back CCS content here?

> +                       if (rq && !err) {
> +                               if (i915_request_wait(rq, 0, HZ) < 0)
> {
> +                                       pr_err("%ps timed out, size:
> %u\n",
> +                                              fn, sz);
> +                                       err = -ETIME;
> +                               }
> +                               i915_request_put(rq);
> +                               rq = NULL;
> +                       }
> +                       if (err)
> +                               continue;
> +
> +                       i915_gem_object_flush_map(obj);
> +                       for (i = 0; !err && i < ccs_bytes; i += 4) {
> +                               if (vaddr[i] != ~i) {
> +                                       pr_err("%ps ccs write and
> read failed, offset: %d\n",
> +                                              fn, i);
> +                                       err = -EINVAL;
> +                               }
> +                       }
> +                       if (err)
> +                               continue;
> +
> +                       i915_gem_object_flush_map(obj);
> +               }
>  
> -               if (err != -EDEADLK && err != -EINTR && err != -
> ERESTARTSYS)
> -                       pr_err("%ps failed, size: %u\n", fn, sz);
> -               if (rq) {
> -                       i915_request_wait(rq, 0, HZ);
> +               err = fn(migrate, &ww, obj, val, &rq);
> +               if (rq && !err) {
> +                       if (i915_request_wait(rq, 0, HZ) < 0) {
> +                               pr_err("%ps timed out, size: %u\n",
> fn, sz);
> +                               err = -ETIME;
> +                       }
>                         i915_request_put(rq);
> +                       rq = NULL;
>                 }
> -               i915_gem_object_unpin_map(obj);
> -       }
> -       if (err)
> -               goto err_out;
> +               if (err)
> +                       continue;
>  
> -       if (rq) {
> -               if (i915_request_wait(rq, 0, HZ) < 0) {
> -                       pr_err("%ps timed out, size: %u\n", fn, sz);
> -                       err = -ETIME;
> +               i915_gem_object_flush_map(obj);
> +
> +               /* Verify the set/clear of the obj mem */
> +               for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
> +                       int x = i * 1024 +
> +                               i915_prandom_u32_max_state(1024,
> prng);
> +
> +                       if (vaddr[x] != val) {
> +                               pr_err("%ps failed, (%u != %u),
> offset: %zu\n",
> +                                      fn, vaddr[x], val,  x *
> sizeof(u32));
> +                               igt_hexdump(vaddr + i * 1024, 4096);
> +                               err = -EINVAL;
> +                       }
>                 }
> -               i915_request_put(rq);
> -       }
> +               if (err)
> +                       continue;
>  
> -       for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
> -               int x = i * 1024 + i915_prandom_u32_max_state(1024,
> prng);
> +               if (ccs_cap && !val) {
> +                       for (i = 0; i < sz / sizeof(u32); i++)
> +                               vaddr[i] = ~i;
> +                       i915_gem_object_flush_map(obj);
> +
> +                       err = intel_migrate_ccs_copy(migrate, &ww,
> NULL,
> +                                                    obj->mm.pages-
> >sgl,
> +                                                    obj-
> >cache_level,
> +                                                    false, &rq);
> +                       if (rq && !err) {
> +                               if (i915_request_wait(rq, 0, HZ) < 0)
> {
> +                                       pr_err("%ps timed out, size:
> %u\n",
> +                                              fn, sz);
> +                                       err = -ETIME;
> +                               }
> +                               i915_request_put(rq);
> +                               rq = NULL;
> +                       }
> +                       if (err)
> +                               continue;
> +
> +                       ccs_bytes = GET_CCS_BYTES(i915, sz);
> +                       i915_gem_object_flush_map(obj);
> +                       for (i = 0; !err && i < ccs_bytes /
> sizeof(u32); i++) {
> +                               if (vaddr[i]) {

I think this is incorrect. This assumes that CCS data is read back
contiguous for the whole buffer, but instead CCS data is read back
per 8MiB chunk and placed at the beginning of each chunk?

/Thomas



> +                                       pr_err("%ps ccs clearing
> failed, offset: %d/%lu\n",
> +                                              fn, i, (ccs_bytes /
> sizeof(u32)) -  1);
> +                                       igt_hexdump(vaddr + i,
> ccs_bytes - i * sizeof(u32));
> +                                       err = -EINVAL;
> +                               }
> +                       }
> +                       if (err)
> +                               continue;
> +               }
> +               i915_gem_object_unpin_map(obj);
> +       }
>  
> -               if (vaddr[x] != sz) {
> -                       pr_err("%ps failed, size: %u, offset: %zu\n",
> -                              fn, sz, x * sizeof(u32));
> -                       igt_hexdump(vaddr + i * 1024, 4096);
> -                       err = -EINVAL;
> +       if (err) {
> +               if (err != -EDEADLK && err != -EINTR && err != -
> ERESTARTSYS)
> +                       pr_err("%ps failed, size: %u\n", fn, sz);
> +               if (rq && err != -EINVAL) {
> +                       i915_request_wait(rq, 0, HZ);
> +                       i915_request_put(rq);
>                 }
> +
> +               i915_gem_object_unpin_map(obj);
> +       } else {
> +               pr_debug("%ps Passed. size: %u\n", fn, sz);
>         }
>  
> -       i915_gem_object_unpin_map(obj);
> -err_out:
>         i915_gem_object_put(obj);
> -
>         return err;
>  }
>  

----------------------------------------------------------------------
Intel Sweden AB
Registered Office: Isafjordsgatan 30B, 164 40 Kista, Stockholm, Sweden
Registration Number: 556189-6027

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
Ramalingam C March 21, 2022, 11:05 p.m. UTC | #3
On 2022-03-21 at 16:09:08 +0530, Hellstrom, Thomas wrote:
> On Sun, 2022-03-20 at 02:12 +0530, Ramalingam C wrote:
> > While clearing the Flat-CCS capable lmem object, we need to clear the
> > CCS
> > meta data corresponding to the memory.
> >
> > As part of live_migrate_clear add check for the ccs meta data clear
> > for
> > the Flat-CCS capable lmem object.
> >
> > Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_migrate.c    |  32 +++
> >  drivers/gpu/drm/i915/gt/selftest_migrate.c | 274 ++++++++++++++++++-
> > --
> >  2 files changed, 278 insertions(+), 28 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c
> > b/drivers/gpu/drm/i915/gt/intel_migrate.c
> > index c1db8daf994a..bbfea570c239 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_migrate.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
> > @@ -572,6 +572,38 @@ static u32 *_i915_ctrl_surf_copy_blt(u32 *cmd,
> > u64 src_addr, u64 dst_addr,
> >         return cmd;
> >  }
> >
> > +static int emit_copy_ccs(struct i915_request *rq,
> > +                        u32 dst_offset, u8 dst_access,
> > +                        u32 src_offset, u8 src_access, int size)
> > +{
> > +       struct drm_i915_private *i915 = rq->engine->i915;
> > +       int mocs = rq->engine->gt->mocs.uc_index << 1;
> > +       u32 num_ccs_blks, ccs_ring_size;
> > +       u32 *cs;
> > +
> > +       ccs_ring_size = calc_ctrl_surf_instr_size(i915, size);
> > +       WARN_ON(!ccs_ring_size);
> > +
> > +       cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2));
> > +       if (IS_ERR(cs))
> > +               return PTR_ERR(cs);
> > +
> > +       num_ccs_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size),
> > +                                   NUM_CCS_BYTES_PER_BLOCK);
> > +
> > +       cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
> > +       cs = _i915_ctrl_surf_copy_blt(cs, src_offset, dst_offset,
> > +                                     src_access, dst_access,
> > +                                     mocs, mocs, num_ccs_blks);
> > +       cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
> > +       if (ccs_ring_size & 1)
> > +               *cs++ = MI_NOOP;
> > +
> > +       intel_ring_advance(rq, cs);
> > +
> > +       return 0;
> > +}
> 
> 
> This would be an unused function if selftests are not configured,
> right?
No Thomas. This is reused between selftest and eviction flow. in next
version i am reusing it for evict_clear too.

> 
> 
> > +
> >  static int emit_copy(struct i915_request *rq,
> >                      u32 dst_offset, u32 src_offset, int size)
> >  {
> > diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c
> > b/drivers/gpu/drm/i915/gt/selftest_migrate.c
> > index b5da8b8cd039..e32cc994f4a2 100644
> > --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
> > @@ -132,6 +132,126 @@ static int copy(struct intel_migrate *migrate,
> >         return err;
> >  }
> >
> > +static int intel_context_copy_ccs(struct intel_context *ce,
> > +                                 const struct i915_deps *deps,
> > +                                 struct scatterlist *sg,
> > +                                 enum i915_cache_level cache_level,
> > +                                 bool write_to_ccs,
> > +                                 struct i915_request **out)
> > +{
> > +       u8 src_access = write_to_ccs ? DIRECT_ACCESS :
> > INDIRECT_ACCESS;
> > +       u8 dst_access = write_to_ccs ? INDIRECT_ACCESS :
> > DIRECT_ACCESS;
> > +       struct sgt_dma it = sg_sgt(sg);
> > +       struct i915_request *rq;
> > +       u32 offset;
> > +       int err;
> > +
> > +       GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
> > +       *out = NULL;
> > +
> > +       GEM_BUG_ON(ce->ring->size < SZ_64K);
> > +
> > +       offset = 0;
> > +       if (HAS_64K_PAGES(ce->engine->i915))
> > +               offset = CHUNK_SZ;
> > +       offset += (u64)rq->engine->instance << 32;
> > +
> > +       do {
> > +               int len;
> > +
> > +               rq = i915_request_create(ce);
> > +               if (IS_ERR(rq)) {
> > +                       err = PTR_ERR(rq);
> > +                       goto out_ce;
> > +               }
> > +
> > +               if (deps) {
> > +                       err = i915_request_await_deps(rq, deps);
> > +                       if (err)
> > +                               goto out_rq;
> > +
> > +                       if (rq->engine->emit_init_breadcrumb) {
> > +                               err = rq->engine-
> > >emit_init_breadcrumb(rq);
> > +                               if (err)
> > +                                       goto out_rq;
> > +                       }
> > +
> > +                       deps = NULL;
> > +               }
> > +
> > +               /* The PTE updates + clear must not be interrupted.
> > */
> > +               err = emit_no_arbitration(rq);
> > +               if (err)
> > +                       goto out_rq;
> > +
> > +               len = emit_pte(rq, &it, cache_level, true, offset,
> > CHUNK_SZ);
> > +               if (len <= 0) {
> > +                       err = len;
> > +                       goto out_rq;
> > +               }
> > +
> > +               err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
> > +               if (err)
> > +                       goto out_rq;
> > +
> > +               err = emit_copy_ccs(rq, offset, dst_access,
> > +                                   offset, src_access, len);
> > +               if (err)
> > +                       goto out_rq;
> > +
> > +               err = rq->engine->emit_flush(rq, EMIT_INVALIDATE |
> > +                                            MI_FLUSH_DW_CCS);
> > +
> > +               /* Arbitration is re-enabled between requests. */
> > +out_rq:
> > +               if (*out)
> > +                       i915_request_put(*out);
> > +               *out = i915_request_get(rq);
> > +               i915_request_add(rq);
> > +               if (err || !it.sg || !sg_dma_len(it.sg))
> > +                       break;
> > +
> > +               cond_resched();
> > +       } while (1);
> > +
> > +out_ce:
> > +       return err;
> > +}
> > +
> > +static int
> > +intel_migrate_ccs_copy(struct intel_migrate *m,
> > +                      struct i915_gem_ww_ctx *ww,
> > +                      const struct i915_deps *deps,
> > +                      struct scatterlist *sg,
> > +                      enum i915_cache_level cache_level,
> > +                      bool write_to_ccs,
> > +                      struct i915_request **out)
> > +{
> > +       struct intel_context *ce;
> > +       int err;
> > +
> > +       *out = NULL;
> > +       if (!m->context)
> > +               return -ENODEV;
> > +
> > +       ce = intel_migrate_create_context(m);
> > +       if (IS_ERR(ce))
> > +               ce = intel_context_get(m->context);
> > +       GEM_BUG_ON(IS_ERR(ce));
> > +
> > +       err = intel_context_pin_ww(ce, ww);
> > +       if (err)
> > +               goto out;
> > +
> > +       err = intel_context_copy_ccs(ce, deps, sg, cache_level,
> > +                                    write_to_ccs, out);
> > +
> > +       intel_context_unpin(ce);
> > +out:
> > +       intel_context_put(ce);
> > +       return err;
> > +}
> > +
> >  static int clear(struct intel_migrate *migrate,
> >                  int (*fn)(struct intel_migrate *migrate,
> >                            struct i915_gem_ww_ctx *ww,
> > @@ -144,7 +264,8 @@ static int clear(struct intel_migrate *migrate,
> >         struct drm_i915_gem_object *obj;
> >         struct i915_request *rq;
> >         struct i915_gem_ww_ctx ww;
> > -       u32 *vaddr;
> > +       u32 *vaddr, val = 0;
> > +       bool ccs_cap = false;
> >         int err = 0;
> >         int i;
> >
> > @@ -155,7 +276,12 @@ static int clear(struct intel_migrate *migrate,
> >         /* Consider the rounded up memory too */
> >         sz = obj->base.size;
> >
> > +       if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
> > +               ccs_cap = true;
> > +
> >         for_i915_gem_ww(&ww, err, true) {
> > +               int ccs_bytes;
> > +
> >                 err = i915_gem_object_lock(obj, &ww);
> >                 if (err)
> >                         continue;
> > @@ -170,44 +296,136 @@ static int clear(struct intel_migrate
> > *migrate,
> >                         vaddr[i] = ~i;
> >                 i915_gem_object_flush_map(obj);
> >
> > -               err = fn(migrate, &ww, obj, sz, &rq);
> > -               if (!err)
> > -                       continue;
> > +               if (ccs_cap && !val) {
> > +                       /* Write the obj data into ccs surface */
> > +                       err = intel_migrate_ccs_copy(migrate, &ww,
> > NULL,
> > +                                                    obj->mm.pages-
> > >sgl,
> > +                                                    obj-
> > >cache_level,
> > +                                                    true, &rq);
> > +                       if (rq && !err) {
> > +                               if (i915_request_wait(rq, 0, HZ) < 0)
> > {
> > +                                       pr_err("%ps timed out, size:
> > %u\n",
> > +                                              fn, sz);
> > +                                       err = -ETIME;
> > +                               }
> > +                               i915_request_put(rq);
> > +                               rq = NULL;
> > +                       }
> > +                       if (err)
> > +                               continue;
> > +
> > +                       for (i = 0; i < sz / sizeof(u32); i++)
> > +                               vaddr[i] = 0x5a5a5a5a;
> > +                       i915_gem_object_flush_map(obj);
> > +
> > +                       err = intel_migrate_ccs_copy(migrate, &ww,
> > NULL, obj->mm.pages->sgl,
> > +                                                    obj-
> > >cache_level, false, &rq);
> 
> Why do we read back CCS content here?
Was rechecking the ccs copy. but this is not needed for real intention.
Removing in next version.
> 
> > +                       if (rq && !err) {
> > +                               if (i915_request_wait(rq, 0, HZ) < 0)
> > {
> > +                                       pr_err("%ps timed out, size:
> > %u\n",
> > +                                              fn, sz);
> > +                                       err = -ETIME;
> > +                               }
> > +                               i915_request_put(rq);
> > +                               rq = NULL;
> > +                       }
> > +                       if (err)
> > +                               continue;
> > +
> > +                       i915_gem_object_flush_map(obj);
> > +                       for (i = 0; !err && i < ccs_bytes; i += 4) {
> > +                               if (vaddr[i] != ~i) {
> > +                                       pr_err("%ps ccs write and
> > read failed, offset: %d\n",
> > +                                              fn, i);
> > +                                       err = -EINVAL;
> > +                               }
> > +                       }
> > +                       if (err)
> > +                               continue;
> > +
> > +                       i915_gem_object_flush_map(obj);
> > +               }
> >
> > -               if (err != -EDEADLK && err != -EINTR && err != -
> > ERESTARTSYS)
> > -                       pr_err("%ps failed, size: %u\n", fn, sz);
> > -               if (rq) {
> > -                       i915_request_wait(rq, 0, HZ);
> > +               err = fn(migrate, &ww, obj, val, &rq);
> > +               if (rq && !err) {
> > +                       if (i915_request_wait(rq, 0, HZ) < 0) {
> > +                               pr_err("%ps timed out, size: %u\n",
> > fn, sz);
> > +                               err = -ETIME;
> > +                       }
> >                         i915_request_put(rq);
> > +                       rq = NULL;
> >                 }
> > -               i915_gem_object_unpin_map(obj);
> > -       }
> > -       if (err)
> > -               goto err_out;
> > +               if (err)
> > +                       continue;
> >
> > -       if (rq) {
> > -               if (i915_request_wait(rq, 0, HZ) < 0) {
> > -                       pr_err("%ps timed out, size: %u\n", fn, sz);
> > -                       err = -ETIME;
> > +               i915_gem_object_flush_map(obj);
> > +
> > +               /* Verify the set/clear of the obj mem */
> > +               for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
> > +                       int x = i * 1024 +
> > +                               i915_prandom_u32_max_state(1024,
> > prng);
> > +
> > +                       if (vaddr[x] != val) {
> > +                               pr_err("%ps failed, (%u != %u),
> > offset: %zu\n",
> > +                                      fn, vaddr[x], val,  x *
> > sizeof(u32));
> > +                               igt_hexdump(vaddr + i * 1024, 4096);
> > +                               err = -EINVAL;
> > +                       }
> >                 }
> > -               i915_request_put(rq);
> > -       }
> > +               if (err)
> > +                       continue;
> >
> > -       for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
> > -               int x = i * 1024 + i915_prandom_u32_max_state(1024,
> > prng);
> > +               if (ccs_cap && !val) {
> > +                       for (i = 0; i < sz / sizeof(u32); i++)
> > +                               vaddr[i] = ~i;
> > +                       i915_gem_object_flush_map(obj);
> > +
> > +                       err = intel_migrate_ccs_copy(migrate, &ww,
> > NULL,
> > +                                                    obj->mm.pages-
> > >sgl,
> > +                                                    obj-
> > >cache_level,
> > +                                                    false, &rq);
> > +                       if (rq && !err) {
> > +                               if (i915_request_wait(rq, 0, HZ) < 0)
> > {
> > +                                       pr_err("%ps timed out, size:
> > %u\n",
> > +                                              fn, sz);
> > +                                       err = -ETIME;
> > +                               }
> > +                               i915_request_put(rq);
> > +                               rq = NULL;
> > +                       }
> > +                       if (err)
> > +                               continue;
> > +
> > +                       ccs_bytes = GET_CCS_BYTES(i915, sz);
> > +                       i915_gem_object_flush_map(obj);
> > +                       for (i = 0; !err && i < ccs_bytes /
> > sizeof(u32); i++) {
> > +                               if (vaddr[i]) {
> 
> I think this is incorrect. This assumes that CCS data is read back
> contiguous for the whole buffer, but instead CCS data is read back
> per 8MiB chunk and placed at the beginning of each chunk?

Yes. This is the source for the problem I was discussing with you. Fixed
it in the next version. Please share your feedback. could have used a
dedicated obj for ccs, but just calculated the offset of the ccs bytes.

Ram
> 
> /Thomas
> 
> 
> 
> > +                                       pr_err("%ps ccs clearing
> > failed, offset: %d/%lu\n",
> > +                                              fn, i, (ccs_bytes /
> > sizeof(u32)) -  1);
> > +                                       igt_hexdump(vaddr + i,
> > ccs_bytes - i * sizeof(u32));
> > +                                       err = -EINVAL;
> > +                               }
> > +                       }
> > +                       if (err)
> > +                               continue;
> > +               }
> > +               i915_gem_object_unpin_map(obj);
> > +       }
> >
> > -               if (vaddr[x] != sz) {
> > -                       pr_err("%ps failed, size: %u, offset: %zu\n",
> > -                              fn, sz, x * sizeof(u32));
> > -                       igt_hexdump(vaddr + i * 1024, 4096);
> > -                       err = -EINVAL;
> > +       if (err) {
> > +               if (err != -EDEADLK && err != -EINTR && err != -
> > ERESTARTSYS)
> > +                       pr_err("%ps failed, size: %u\n", fn, sz);
> > +               if (rq && err != -EINVAL) {
> > +                       i915_request_wait(rq, 0, HZ);
> > +                       i915_request_put(rq);
> >                 }
> > +
> > +               i915_gem_object_unpin_map(obj);
> > +       } else {
> > +               pr_debug("%ps Passed. size: %u\n", fn, sz);
> >         }
> >
> > -       i915_gem_object_unpin_map(obj);
> > -err_out:
> >         i915_gem_object_put(obj);
> > -
> >         return err;
> >  }
> >
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index c1db8daf994a..bbfea570c239 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -572,6 +572,38 @@  static u32 *_i915_ctrl_surf_copy_blt(u32 *cmd, u64 src_addr, u64 dst_addr,
 	return cmd;
 }
 
+static int emit_copy_ccs(struct i915_request *rq,
+			 u32 dst_offset, u8 dst_access,
+			 u32 src_offset, u8 src_access, int size)
+{
+	struct drm_i915_private *i915 = rq->engine->i915;
+	int mocs = rq->engine->gt->mocs.uc_index << 1;
+	u32 num_ccs_blks, ccs_ring_size;
+	u32 *cs;
+
+	ccs_ring_size = calc_ctrl_surf_instr_size(i915, size);
+	WARN_ON(!ccs_ring_size);
+
+	cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2));
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	num_ccs_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size),
+				    NUM_CCS_BYTES_PER_BLOCK);
+
+	cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
+	cs = _i915_ctrl_surf_copy_blt(cs, src_offset, dst_offset,
+				      src_access, dst_access,
+				      mocs, mocs, num_ccs_blks);
+	cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
+	if (ccs_ring_size & 1)
+		*cs++ = MI_NOOP;
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
 static int emit_copy(struct i915_request *rq,
 		     u32 dst_offset, u32 src_offset, int size)
 {
diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c
index b5da8b8cd039..e32cc994f4a2 100644
--- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
+++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
@@ -132,6 +132,126 @@  static int copy(struct intel_migrate *migrate,
 	return err;
 }
 
+static int intel_context_copy_ccs(struct intel_context *ce,
+				  const struct i915_deps *deps,
+				  struct scatterlist *sg,
+				  enum i915_cache_level cache_level,
+				  bool write_to_ccs,
+				  struct i915_request **out)
+{
+	u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS;
+	u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS;
+	struct sgt_dma it = sg_sgt(sg);
+	struct i915_request *rq;
+	u32 offset;
+	int err;
+
+	GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
+	*out = NULL;
+
+	GEM_BUG_ON(ce->ring->size < SZ_64K);
+
+	offset = 0;
+	if (HAS_64K_PAGES(ce->engine->i915))
+		offset = CHUNK_SZ;
+	offset += (u64)rq->engine->instance << 32;
+
+	do {
+		int len;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out_ce;
+		}
+
+		if (deps) {
+			err = i915_request_await_deps(rq, deps);
+			if (err)
+				goto out_rq;
+
+			if (rq->engine->emit_init_breadcrumb) {
+				err = rq->engine->emit_init_breadcrumb(rq);
+				if (err)
+					goto out_rq;
+			}
+
+			deps = NULL;
+		}
+
+		/* The PTE updates + clear must not be interrupted. */
+		err = emit_no_arbitration(rq);
+		if (err)
+			goto out_rq;
+
+		len = emit_pte(rq, &it, cache_level, true, offset, CHUNK_SZ);
+		if (len <= 0) {
+			err = len;
+			goto out_rq;
+		}
+
+		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+		if (err)
+			goto out_rq;
+
+		err = emit_copy_ccs(rq, offset, dst_access,
+				    offset, src_access, len);
+		if (err)
+			goto out_rq;
+
+		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE |
+					     MI_FLUSH_DW_CCS);
+
+		/* Arbitration is re-enabled between requests. */
+out_rq:
+		if (*out)
+			i915_request_put(*out);
+		*out = i915_request_get(rq);
+		i915_request_add(rq);
+		if (err || !it.sg || !sg_dma_len(it.sg))
+			break;
+
+		cond_resched();
+	} while (1);
+
+out_ce:
+	return err;
+}
+
+static int
+intel_migrate_ccs_copy(struct intel_migrate *m,
+		       struct i915_gem_ww_ctx *ww,
+		       const struct i915_deps *deps,
+		       struct scatterlist *sg,
+		       enum i915_cache_level cache_level,
+		       bool write_to_ccs,
+		       struct i915_request **out)
+{
+	struct intel_context *ce;
+	int err;
+
+	*out = NULL;
+	if (!m->context)
+		return -ENODEV;
+
+	ce = intel_migrate_create_context(m);
+	if (IS_ERR(ce))
+		ce = intel_context_get(m->context);
+	GEM_BUG_ON(IS_ERR(ce));
+
+	err = intel_context_pin_ww(ce, ww);
+	if (err)
+		goto out;
+
+	err = intel_context_copy_ccs(ce, deps, sg, cache_level,
+				     write_to_ccs, out);
+
+	intel_context_unpin(ce);
+out:
+	intel_context_put(ce);
+	return err;
+}
+
 static int clear(struct intel_migrate *migrate,
 		 int (*fn)(struct intel_migrate *migrate,
 			   struct i915_gem_ww_ctx *ww,
@@ -144,7 +264,8 @@  static int clear(struct intel_migrate *migrate,
 	struct drm_i915_gem_object *obj;
 	struct i915_request *rq;
 	struct i915_gem_ww_ctx ww;
-	u32 *vaddr;
+	u32 *vaddr, val = 0;
+	bool ccs_cap = false;
 	int err = 0;
 	int i;
 
@@ -155,7 +276,12 @@  static int clear(struct intel_migrate *migrate,
 	/* Consider the rounded up memory too */
 	sz = obj->base.size;
 
+	if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
+		ccs_cap = true;
+
 	for_i915_gem_ww(&ww, err, true) {
+		int ccs_bytes;
+
 		err = i915_gem_object_lock(obj, &ww);
 		if (err)
 			continue;
@@ -170,44 +296,136 @@  static int clear(struct intel_migrate *migrate,
 			vaddr[i] = ~i;
 		i915_gem_object_flush_map(obj);
 
-		err = fn(migrate, &ww, obj, sz, &rq);
-		if (!err)
-			continue;
+		if (ccs_cap && !val) {
+			/* Write the obj data into ccs surface */
+			err = intel_migrate_ccs_copy(migrate, &ww, NULL,
+						     obj->mm.pages->sgl,
+						     obj->cache_level,
+						     true, &rq);
+			if (rq && !err) {
+				if (i915_request_wait(rq, 0, HZ) < 0) {
+					pr_err("%ps timed out, size: %u\n",
+					       fn, sz);
+					err = -ETIME;
+				}
+				i915_request_put(rq);
+				rq = NULL;
+			}
+			if (err)
+				continue;
+
+			for (i = 0; i < sz / sizeof(u32); i++)
+				vaddr[i] = 0x5a5a5a5a;
+			i915_gem_object_flush_map(obj);
+
+			err = intel_migrate_ccs_copy(migrate, &ww, NULL, obj->mm.pages->sgl,
+						     obj->cache_level, false, &rq);
+			if (rq && !err) {
+				if (i915_request_wait(rq, 0, HZ) < 0) {
+					pr_err("%ps timed out, size: %u\n",
+					       fn, sz);
+					err = -ETIME;
+				}
+				i915_request_put(rq);
+				rq = NULL;
+			}
+			if (err)
+				continue;
+
+			i915_gem_object_flush_map(obj);
+			for (i = 0; !err && i < ccs_bytes; i += 4) {
+				if (vaddr[i] != ~i) {
+					pr_err("%ps ccs write and read failed, offset: %d\n",
+					       fn, i);
+					err = -EINVAL;
+				}
+			}
+			if (err)
+				continue;
+
+			i915_gem_object_flush_map(obj);
+		}
 
-		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
-			pr_err("%ps failed, size: %u\n", fn, sz);
-		if (rq) {
-			i915_request_wait(rq, 0, HZ);
+		err = fn(migrate, &ww, obj, val, &rq);
+		if (rq && !err) {
+			if (i915_request_wait(rq, 0, HZ) < 0) {
+				pr_err("%ps timed out, size: %u\n", fn, sz);
+				err = -ETIME;
+			}
 			i915_request_put(rq);
+			rq = NULL;
 		}
-		i915_gem_object_unpin_map(obj);
-	}
-	if (err)
-		goto err_out;
+		if (err)
+			continue;
 
-	if (rq) {
-		if (i915_request_wait(rq, 0, HZ) < 0) {
-			pr_err("%ps timed out, size: %u\n", fn, sz);
-			err = -ETIME;
+		i915_gem_object_flush_map(obj);
+
+		/* Verify the set/clear of the obj mem */
+		for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
+			int x = i * 1024 +
+				i915_prandom_u32_max_state(1024, prng);
+
+			if (vaddr[x] != val) {
+				pr_err("%ps failed, (%u != %u), offset: %zu\n",
+				       fn, vaddr[x], val,  x * sizeof(u32));
+				igt_hexdump(vaddr + i * 1024, 4096);
+				err = -EINVAL;
+			}
 		}
-		i915_request_put(rq);
-	}
+		if (err)
+			continue;
 
-	for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
-		int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
+		if (ccs_cap && !val) {
+			for (i = 0; i < sz / sizeof(u32); i++)
+				vaddr[i] = ~i;
+			i915_gem_object_flush_map(obj);
+
+			err = intel_migrate_ccs_copy(migrate, &ww, NULL,
+						     obj->mm.pages->sgl,
+						     obj->cache_level,
+						     false, &rq);
+			if (rq && !err) {
+				if (i915_request_wait(rq, 0, HZ) < 0) {
+					pr_err("%ps timed out, size: %u\n",
+					       fn, sz);
+					err = -ETIME;
+				}
+				i915_request_put(rq);
+				rq = NULL;
+			}
+			if (err)
+				continue;
+
+			ccs_bytes = GET_CCS_BYTES(i915, sz);
+			i915_gem_object_flush_map(obj);
+			for (i = 0; !err && i < ccs_bytes / sizeof(u32); i++) {
+				if (vaddr[i]) {
+					pr_err("%ps ccs clearing failed, offset: %d/%lu\n",
+					       fn, i, (ccs_bytes / sizeof(u32)) -  1);
+					igt_hexdump(vaddr + i, ccs_bytes - i * sizeof(u32));
+					err = -EINVAL;
+				}
+			}
+			if (err)
+				continue;
+		}
+		i915_gem_object_unpin_map(obj);
+	}
 
-		if (vaddr[x] != sz) {
-			pr_err("%ps failed, size: %u, offset: %zu\n",
-			       fn, sz, x * sizeof(u32));
-			igt_hexdump(vaddr + i * 1024, 4096);
-			err = -EINVAL;
+	if (err) {
+		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
+			pr_err("%ps failed, size: %u\n", fn, sz);
+		if (rq && err != -EINVAL) {
+			i915_request_wait(rq, 0, HZ);
+			i915_request_put(rq);
 		}
+
+		i915_gem_object_unpin_map(obj);
+	} else {
+		pr_debug("%ps Passed. size: %u\n", fn, sz);
 	}
 
-	i915_gem_object_unpin_map(obj);
-err_out:
 	i915_gem_object_put(obj);
-
 	return err;
 }