[v2,2/8] drm/i915/gtt: add xehpsdv_ppgtt_insert_entry

Message ID	20211203122426.2859679-3-matthew.auld@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <dri-devel-bounces@lists.freedesktop.org> From: Matthew Auld <matthew.auld@intel.com> To: intel-gfx@lists.freedesktop.org Subject: [PATCH v2 2/8] drm/i915/gtt: add xehpsdv_ppgtt_insert_entry Date: Fri, 3 Dec 2021 12:24:20 +0000 Message-Id: <20211203122426.2859679-3-matthew.auld@intel.com> In-Reply-To: <20211203122426.2859679-1-matthew.auld@intel.com> References: <20211203122426.2859679-1-matthew.auld@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Precedence: list Cc: bob.beckett@collabora.com, =?utf-8?q?Thomas_Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>, adrian.larumbe@collabora.com, dri-devel@lists.freedesktop.org Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" <dri-devel-bounces@lists.freedesktop.org>
Series	DG2 accelerated migration/clearing support \| expand [v2,0/8] DG2 accelerated migration/clearing support [v2,1/8] drm/i915/migrate: don't check the scratch page [v2,2/8] drm/i915/gtt: add xehpsdv_ppgtt_insert_entry [v2,3/8] drm/i915/gtt: add gtt mappable plumbing [v2,4/8] drm/i915/migrate: fix offset calculation [v2,5/8] drm/i915/migrate: fix length calculation [v2,6/8] drm/i915/selftests: handle object rounding [v2,7/8] drm/i915/migrate: add acceleration support for DG2 [v2,8/8] drm/i915/migrate: turn on acceleration for DG2

Message ID

20211203122426.2859679-3-matthew.auld@intel.com (mailing list archive)

State

New, archived

Headers

From: Matthew Auld <matthew.auld@intel.com>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH v2 2/8] drm/i915/gtt: add xehpsdv_ppgtt_insert_entry
Date: Fri,  3 Dec 2021 12:24:20 +0000
Message-Id: <20211203122426.2859679-3-matthew.auld@intel.com>
In-Reply-To: <20211203122426.2859679-1-matthew.auld@intel.com>
References: <20211203122426.2859679-1-matthew.auld@intel.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Precedence: list
Cc: bob.beckett@collabora.com,
 =?utf-8?q?Thomas_Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>,
 adrian.larumbe@collabora.com, dri-devel@lists.freedesktop.org
Errors-To: dri-devel-bounces@lists.freedesktop.org
Sender: "dri-devel" <dri-devel-bounces@lists.freedesktop.org>

Series

DG2 accelerated migration/clearing support | expand

Commit Message

Matthew Auld Dec. 3, 2021, 12:24 p.m. UTC

If this is LMEM then we get a 32 entry PT, with each PTE pointing to
some 64K block of memory, otherwise it's just the usual 512 entry PT.
This very much assumes the caller knows what they are doing.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 50 ++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

Comments

Ramalingam C Dec. 3, 2021, 4:59 p.m. UTC | #1

On 2021-12-03 at 12:24:20 +0000, Matthew Auld wrote:
> If this is LMEM then we get a 32 entry PT, with each PTE pointing to
> some 64K block of memory, otherwise it's just the usual 512 entry PT.
> This very much assumes the caller knows what they are doing.
> 
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Ramalingam C <ramalingam.c@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 50 ++++++++++++++++++++++++++--
>  1 file changed, 48 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index bd3ca0996a23..312b2267bf87 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -728,13 +728,56 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
>  		gen8_pdp_for_page_index(vm, idx);
>  	struct i915_page_directory *pd =
>  		i915_pd_entry(pdp, gen8_pd_index(idx, 2));
> +	struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
>  	gen8_pte_t *vaddr;
>  
> -	vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
> +	GEM_BUG_ON(pt->is_compact);

Do we have compact PT for smem with 64k pages?

> +
> +	vaddr = px_vaddr(pt);
>  	vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
>  	clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
>  }
>  
> +static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
> +					    dma_addr_t addr,
> +					    u64 offset,
> +					    enum i915_cache_level level,
> +					    u32 flags)
> +{
> +	u64 idx = offset >> GEN8_PTE_SHIFT;
> +	struct i915_page_directory * const pdp =
> +		gen8_pdp_for_page_index(vm, idx);
> +	struct i915_page_directory *pd =
> +		i915_pd_entry(pdp, gen8_pd_index(idx, 2));
> +	struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
> +	gen8_pte_t *vaddr;
> +
> +	GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
> +	GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
> +
> +	if (!pt->is_compact) {
> +		vaddr = px_vaddr(pd);
> +		vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
> +		pt->is_compact = true;
> +	}
> +
> +	vaddr = px_vaddr(pt);
> +	vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
> +}
> +
> +static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
> +				       dma_addr_t addr,
> +				       u64 offset,
> +				       enum i915_cache_level level,
> +				       u32 flags)
> +{
> +	if (flags & PTE_LM)
> +		return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset,
> +						       level, flags);
> +
> +	return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags);
Matt,

Is this call for gen8_*** is for insertion of smem PTE entries on the
64K capable platforms like DG2?

Ram

> +}
> +
>  static int gen8_init_scratch(struct i915_address_space *vm)
>  {
>  	u32 pte_flags;
> @@ -937,7 +980,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>  
>  	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>  	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
> -	ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
> +	if (HAS_64K_PAGES(gt->i915))
> +		ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry;
> +	else
> +		ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
>  	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
>  	ppgtt->vm.clear_range = gen8_ppgtt_clear;
>  	ppgtt->vm.foreach = gen8_ppgtt_foreach;
> -- 
> 2.31.1
>

Matthew Auld Dec. 3, 2021, 5:31 p.m. UTC | #2

On 03/12/2021 16:59, Ramalingam C wrote:
> On 2021-12-03 at 12:24:20 +0000, Matthew Auld wrote:
>> If this is LMEM then we get a 32 entry PT, with each PTE pointing to
>> some 64K block of memory, otherwise it's just the usual 512 entry PT.
>> This very much assumes the caller knows what they are doing.
>>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> Cc: Ramalingam C <ramalingam.c@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 50 ++++++++++++++++++++++++++--
>>   1 file changed, 48 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> index bd3ca0996a23..312b2267bf87 100644
>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> @@ -728,13 +728,56 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
>>   		gen8_pdp_for_page_index(vm, idx);
>>   	struct i915_page_directory *pd =
>>   		i915_pd_entry(pdp, gen8_pd_index(idx, 2));
>> +	struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
>>   	gen8_pte_t *vaddr;
>>   
>> -	vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
>> +	GEM_BUG_ON(pt->is_compact);
> 
> Do we have compact PT for smem with 64k pages?

It's technically possible but we don't bother trying to support it in 
the driver.

> 
>> +
>> +	vaddr = px_vaddr(pt);
>>   	vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
>>   	clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
>>   }
>>   
>> +static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
>> +					    dma_addr_t addr,
>> +					    u64 offset,
>> +					    enum i915_cache_level level,
>> +					    u32 flags)
>> +{
>> +	u64 idx = offset >> GEN8_PTE_SHIFT;
>> +	struct i915_page_directory * const pdp =
>> +		gen8_pdp_for_page_index(vm, idx);
>> +	struct i915_page_directory *pd =
>> +		i915_pd_entry(pdp, gen8_pd_index(idx, 2));
>> +	struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
>> +	gen8_pte_t *vaddr;
>> +
>> +	GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
>> +	GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
>> +
>> +	if (!pt->is_compact) {
>> +		vaddr = px_vaddr(pd);
>> +		vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
>> +		pt->is_compact = true;
>> +	}
>> +
>> +	vaddr = px_vaddr(pt);
>> +	vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
>> +}
>> +
>> +static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
>> +				       dma_addr_t addr,
>> +				       u64 offset,
>> +				       enum i915_cache_level level,
>> +				       u32 flags)
>> +{
>> +	if (flags & PTE_LM)
>> +		return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset,
>> +						       level, flags);
>> +
>> +	return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags);
> Matt,
> 
> Is this call for gen8_*** is for insertion of smem PTE entries on the
> 64K capable platforms like DG2?

Yeah, this just falls back to the generic 512 entry layout for the PT.

> 
> Ram
> 
>> +}
>> +
>>   static int gen8_init_scratch(struct i915_address_space *vm)
>>   {
>>   	u32 pte_flags;
>> @@ -937,7 +980,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>>   
>>   	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>>   	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
>> -	ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
>> +	if (HAS_64K_PAGES(gt->i915))
>> +		ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry;
>> +	else
>> +		ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
>>   	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
>>   	ppgtt->vm.clear_range = gen8_ppgtt_clear;
>>   	ppgtt->vm.foreach = gen8_ppgtt_foreach;
>> -- 
>> 2.31.1
>>

Ramalingam C Dec. 3, 2021, 5:45 p.m. UTC | #3

On 2021-12-03 at 17:31:11 +0000, Matthew Auld wrote:
> On 03/12/2021 16:59, Ramalingam C wrote:
> > On 2021-12-03 at 12:24:20 +0000, Matthew Auld wrote:
> > > If this is LMEM then we get a 32 entry PT, with each PTE pointing to
> > > some 64K block of memory, otherwise it's just the usual 512 entry PT.
> > > This very much assumes the caller knows what they are doing.
> > > 
> > > Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> > > Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > > Cc: Ramalingam C <ramalingam.c@intel.com>
> > > ---
> > >   drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 50 ++++++++++++++++++++++++++--
> > >   1 file changed, 48 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> > > index bd3ca0996a23..312b2267bf87 100644
> > > --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> > > +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> > > @@ -728,13 +728,56 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
> > >   		gen8_pdp_for_page_index(vm, idx);
> > >   	struct i915_page_directory *pd =
> > >   		i915_pd_entry(pdp, gen8_pd_index(idx, 2));
> > > +	struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
> > >   	gen8_pte_t *vaddr;
> > > -	vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
> > > +	GEM_BUG_ON(pt->is_compact);
> > 
> > Do we have compact PT for smem with 64k pages?
> 
> It's technically possible but we don't bother trying to support it in the
> driver.
Ok.

Reviewed-by: Ramalingam C <ramalingam.c@intel.com>
> 
> > 
> > > +
> > > +	vaddr = px_vaddr(pt);
> > >   	vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
> > >   	clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
> > >   }
> > > +static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
> > > +					    dma_addr_t addr,
> > > +					    u64 offset,
> > > +					    enum i915_cache_level level,
> > > +					    u32 flags)
> > > +{
> > > +	u64 idx = offset >> GEN8_PTE_SHIFT;
> > > +	struct i915_page_directory * const pdp =
> > > +		gen8_pdp_for_page_index(vm, idx);
> > > +	struct i915_page_directory *pd =
> > > +		i915_pd_entry(pdp, gen8_pd_index(idx, 2));
> > > +	struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
> > > +	gen8_pte_t *vaddr;
> > > +
> > > +	GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
> > > +	GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
> > > +
> > > +	if (!pt->is_compact) {
> > > +		vaddr = px_vaddr(pd);
> > > +		vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
> > > +		pt->is_compact = true;
> > > +	}
> > > +
> > > +	vaddr = px_vaddr(pt);
> > > +	vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
> > > +}
> > > +
> > > +static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
> > > +				       dma_addr_t addr,
> > > +				       u64 offset,
> > > +				       enum i915_cache_level level,
> > > +				       u32 flags)
> > > +{
> > > +	if (flags & PTE_LM)
> > > +		return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset,
> > > +						       level, flags);
> > > +
> > > +	return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags);
> > Matt,
> > 
> > Is this call for gen8_*** is for insertion of smem PTE entries on the
> > 64K capable platforms like DG2?
> 
> Yeah, this just falls back to the generic 512 entry layout for the PT.
> 
> > 
> > Ram
> > 
> > > +}
> > > +
> > >   static int gen8_init_scratch(struct i915_address_space *vm)
> > >   {
> > >   	u32 pte_flags;
> > > @@ -937,7 +980,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
> > >   	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
> > >   	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
> > > -	ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
> > > +	if (HAS_64K_PAGES(gt->i915))
> > > +		ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry;
> > > +	else
> > > +		ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
> > >   	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
> > >   	ppgtt->vm.clear_range = gen8_ppgtt_clear;
> > >   	ppgtt->vm.foreach = gen8_ppgtt_foreach;
> > > -- 
> > > 2.31.1
> > >

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index bd3ca0996a23..312b2267bf87 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -728,13 +728,56 @@  static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
 		gen8_pdp_for_page_index(vm, idx);
 	struct i915_page_directory *pd =
 		i915_pd_entry(pdp, gen8_pd_index(idx, 2));
+	struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
 	gen8_pte_t *vaddr;
 
-	vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
+	GEM_BUG_ON(pt->is_compact);
+
+	vaddr = px_vaddr(pt);
 	vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
 	clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
 }
 
+static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
+					    dma_addr_t addr,
+					    u64 offset,
+					    enum i915_cache_level level,
+					    u32 flags)
+{
+	u64 idx = offset >> GEN8_PTE_SHIFT;
+	struct i915_page_directory * const pdp =
+		gen8_pdp_for_page_index(vm, idx);
+	struct i915_page_directory *pd =
+		i915_pd_entry(pdp, gen8_pd_index(idx, 2));
+	struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
+	gen8_pte_t *vaddr;
+
+	GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
+	GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
+
+	if (!pt->is_compact) {
+		vaddr = px_vaddr(pd);
+		vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
+		pt->is_compact = true;
+	}
+
+	vaddr = px_vaddr(pt);
+	vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
+}
+
+static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
+				       dma_addr_t addr,
+				       u64 offset,
+				       enum i915_cache_level level,
+				       u32 flags)
+{
+	if (flags & PTE_LM)
+		return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset,
+						       level, flags);
+
+	return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags);
+}
+
 static int gen8_init_scratch(struct i915_address_space *vm)
 {
 	u32 pte_flags;
@@ -937,7 +980,10 @@  struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 
 	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
 	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
-	ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
+	if (HAS_64K_PAGES(gt->i915))
+		ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry;
+	else
+		ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
 	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
 	ppgtt->vm.clear_range = gen8_ppgtt_clear;
 	ppgtt->vm.foreach = gen8_ppgtt_foreach;

[v2,2/8] drm/i915/gtt: add xehpsdv_ppgtt_insert_entry

Commit Message

Comments

Patch