diff mbox series

[v3,3/3] drm/i915/gtt/dg1: add PTE_LM plumbing for GGTT

Message ID 20210203152333.511453-3-matthew.auld@intel.com (mailing list archive)
State New, archived
Headers show
Series [v3,1/3] drm/i915: Distinction of memory regions | expand

Commit Message

Matthew Auld Feb. 3, 2021, 3:23 p.m. UTC
For the PTEs we get an LM bit, to signal whether the page resides in
SMEM or LMEM.

Based on a patch from Michel Thierry.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 24 +++++++++++++++++++-----
 drivers/gpu/drm/i915/gt/intel_gtt.h  |  4 +++-
 2 files changed, 22 insertions(+), 6 deletions(-)

Comments

Tang, CQ Feb. 3, 2021, 4:51 p.m. UTC | #1
> -----Original Message-----
> From: Intel-gfx <intel-gfx-bounces@lists.freedesktop.org> On Behalf Of
> Matthew Auld
> Sent: Wednesday, February 3, 2021 7:24 AM
> To: intel-gfx@lists.freedesktop.org
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Subject: [Intel-gfx] [PATCH v3 3/3] drm/i915/gtt/dg1: add PTE_LM plumbing
> for GGTT
> 
> For the PTEs we get an LM bit, to signal whether the page resides in SMEM or
> LMEM.
> 
> Based on a patch from Michel Thierry.
> 
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gt/intel_ggtt.c | 24 +++++++++++++++++++-----
> drivers/gpu/drm/i915/gt/intel_gtt.h  |  4 +++-
>  2 files changed, 22 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index fc399ac16eda..b0b8ded834f0 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -10,6 +10,8 @@
> 
>  #include <drm/i915_drm.h>
> 
> +#include "gem/i915_gem_lmem.h"
> +
>  #include "intel_gt.h"
>  #include "i915_drv.h"
>  #include "i915_scatterlist.h"
> @@ -189,7 +191,12 @@ static u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>  				enum i915_cache_level level,
>  				u32 flags)
>  {
> -	return addr | _PAGE_PRESENT;
> +	gen8_pte_t pte = addr | _PAGE_PRESENT;
> +
> +	if (flags & PTE_LM)
> +		pte |= GEN12_GGTT_PTE_LM;
> +
> +	return pte;
>  }
> 
>  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) @@ -201,13
> +208,13 @@ static void gen8_ggtt_insert_page(struct i915_address_space
> *vm,
>  				  dma_addr_t addr,
>  				  u64 offset,
>  				  enum i915_cache_level level,
> -				  u32 unused)
> +				  u32 flags)
>  {
>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>  	gen8_pte_t __iomem *pte =
>  		(gen8_pte_t __iomem *)ggtt->gsm + offset /
> I915_GTT_PAGE_SIZE;
> 
> -	gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0));
> +	gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
> 
>  	ggtt->invalidate(ggtt);
>  }
> @@ -217,7 +224,7 @@ static void gen8_ggtt_insert_entries(struct
> i915_address_space *vm,
>  				     enum i915_cache_level level,
>  				     u32 flags)
>  {
> -	const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0);
> +	const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level,
> flags);
>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>  	gen8_pte_t __iomem *gte;
>  	gen8_pte_t __iomem *end;
> @@ -459,6 +466,8 @@ static void ggtt_bind_vma(struct i915_address_space
> *vm,
>  	pte_flags = 0;
>  	if (i915_gem_object_is_readonly(obj))
>  		pte_flags |= PTE_READ_ONLY;
> +	if (i915_gem_object_is_lmem(obj))
> +		pte_flags |= PTE_LM;
> 
>  	vm->insert_entries(vm, vma, cache_level, pte_flags);
>  	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; @@ -794,6 +803,7 @@
> static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
>  	struct drm_i915_private *i915 = ggtt->vm.i915;
>  	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
>  	phys_addr_t phys_addr;
> +	u32 pte_flags;
>  	int ret;
> 
>  	/* For Modern GENs the PTEs and register space are split in the BAR
> */ @@ -823,9 +833,13 @@ static int ggtt_probe_common(struct i915_ggtt
> *ggtt, u64 size)
>  		return ret;
>  	}
> 
> +	pte_flags = 0;
> +	if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
> +		pte_flags |= PTE_LM;
> +
>  	ggtt->vm.scratch[0]->encode =
>  		ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
> -				    I915_CACHE_NONE, 0);
> +				    I915_CACHE_NONE, pte_flags);
> 
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h
> b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index 0eef625dd787..24b5808df16d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -85,7 +85,9 @@ typedef u64 gen8_pte_t;
>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
>  #define BYT_PTE_WRITEABLE		REG_BIT(1)
> 
> -#define GEN12_PPGTT_PTE_LM BIT_ULL(11)
> +#define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
> +
> +#define GEN12_GGTT_PTE_LM	BIT_ULL(1)

Where does the Bspec say bit-1 is for LMEM?

--CQ

> 
>  /*
>   * Cacheability Control is a 4-bit value. The low three bits are stored in bits
> --
> 2.26.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Matthew Auld Feb. 3, 2021, 5:02 p.m. UTC | #2
On Wed, 3 Feb 2021 at 16:51, Tang, CQ <cq.tang@intel.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Intel-gfx <intel-gfx-bounces@lists.freedesktop.org> On Behalf Of
> > Matthew Auld
> > Sent: Wednesday, February 3, 2021 7:24 AM
> > To: intel-gfx@lists.freedesktop.org
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Subject: [Intel-gfx] [PATCH v3 3/3] drm/i915/gtt/dg1: add PTE_LM plumbing
> > for GGTT
> >
> > For the PTEs we get an LM bit, to signal whether the page resides in SMEM or
> > LMEM.
> >
> > Based on a patch from Michel Thierry.
> >
> > Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_ggtt.c | 24 +++++++++++++++++++-----
> > drivers/gpu/drm/i915/gt/intel_gtt.h  |  4 +++-
> >  2 files changed, 22 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > index fc399ac16eda..b0b8ded834f0 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > @@ -10,6 +10,8 @@
> >
> >  #include <drm/i915_drm.h>
> >
> > +#include "gem/i915_gem_lmem.h"
> > +
> >  #include "intel_gt.h"
> >  #include "i915_drv.h"
> >  #include "i915_scatterlist.h"
> > @@ -189,7 +191,12 @@ static u64 gen8_ggtt_pte_encode(dma_addr_t addr,
> >                               enum i915_cache_level level,
> >                               u32 flags)
> >  {
> > -     return addr | _PAGE_PRESENT;
> > +     gen8_pte_t pte = addr | _PAGE_PRESENT;
> > +
> > +     if (flags & PTE_LM)
> > +             pte |= GEN12_GGTT_PTE_LM;
> > +
> > +     return pte;
> >  }
> >
> >  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) @@ -201,13
> > +208,13 @@ static void gen8_ggtt_insert_page(struct i915_address_space
> > *vm,
> >                                 dma_addr_t addr,
> >                                 u64 offset,
> >                                 enum i915_cache_level level,
> > -                               u32 unused)
> > +                               u32 flags)
> >  {
> >       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> >       gen8_pte_t __iomem *pte =
> >               (gen8_pte_t __iomem *)ggtt->gsm + offset /
> > I915_GTT_PAGE_SIZE;
> >
> > -     gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0));
> > +     gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
> >
> >       ggtt->invalidate(ggtt);
> >  }
> > @@ -217,7 +224,7 @@ static void gen8_ggtt_insert_entries(struct
> > i915_address_space *vm,
> >                                    enum i915_cache_level level,
> >                                    u32 flags)
> >  {
> > -     const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0);
> > +     const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level,
> > flags);
> >       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> >       gen8_pte_t __iomem *gte;
> >       gen8_pte_t __iomem *end;
> > @@ -459,6 +466,8 @@ static void ggtt_bind_vma(struct i915_address_space
> > *vm,
> >       pte_flags = 0;
> >       if (i915_gem_object_is_readonly(obj))
> >               pte_flags |= PTE_READ_ONLY;
> > +     if (i915_gem_object_is_lmem(obj))
> > +             pte_flags |= PTE_LM;
> >
> >       vm->insert_entries(vm, vma, cache_level, pte_flags);
> >       vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; @@ -794,6 +803,7 @@
> > static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
> >       struct drm_i915_private *i915 = ggtt->vm.i915;
> >       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
> >       phys_addr_t phys_addr;
> > +     u32 pte_flags;
> >       int ret;
> >
> >       /* For Modern GENs the PTEs and register space are split in the BAR
> > */ @@ -823,9 +833,13 @@ static int ggtt_probe_common(struct i915_ggtt
> > *ggtt, u64 size)
> >               return ret;
> >       }
> >
> > +     pte_flags = 0;
> > +     if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
> > +             pte_flags |= PTE_LM;
> > +
> >       ggtt->vm.scratch[0]->encode =
> >               ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
> > -                                 I915_CACHE_NONE, 0);
> > +                                 I915_CACHE_NONE, pte_flags);
> >
> >       return 0;
> >  }
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h
> > b/drivers/gpu/drm/i915/gt/intel_gtt.h
> > index 0eef625dd787..24b5808df16d 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> > @@ -85,7 +85,9 @@ typedef u64 gen8_pte_t;
> >  #define BYT_PTE_SNOOPED_BY_CPU_CACHES        REG_BIT(2)
> >  #define BYT_PTE_WRITEABLE            REG_BIT(1)
> >
> > -#define GEN12_PPGTT_PTE_LM BIT_ULL(11)
> > +#define GEN12_PPGTT_PTE_LM   BIT_ULL(11)
> > +
> > +#define GEN12_GGTT_PTE_LM    BIT_ULL(1)
>
> Where does the Bspec say bit-1 is for LMEM?

Bspec: 45015 <- GGTT
Bspec: 45040 <- ppGTT

I'll update the commit messages.

>
> --CQ
>
> >
> >  /*
> >   * Cacheability Control is a 4-bit value. The low three bits are stored in bits
> > --
> > 2.26.2
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Tang, CQ Feb. 3, 2021, 6:01 p.m. UTC | #3
> -----Original Message-----
> From: Matthew Auld <matthew.william.auld@gmail.com>
> Sent: Wednesday, February 3, 2021 9:03 AM
> To: Tang, CQ <cq.tang@intel.com>
> Cc: Auld, Matthew <matthew.auld@intel.com>; intel-
> gfx@lists.freedesktop.org; Chris Wilson <chris@chris-wilson.co.uk>
> Subject: Re: [Intel-gfx] [PATCH v3 3/3] drm/i915/gtt/dg1: add PTE_LM
> plumbing for GGTT
> 
> On Wed, 3 Feb 2021 at 16:51, Tang, CQ <cq.tang@intel.com> wrote:
> >
> >
> >
> > > -----Original Message-----
> > > From: Intel-gfx <intel-gfx-bounces@lists.freedesktop.org> On Behalf
> > > Of Matthew Auld
> > > Sent: Wednesday, February 3, 2021 7:24 AM
> > > To: intel-gfx@lists.freedesktop.org
> > > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > > Subject: [Intel-gfx] [PATCH v3 3/3] drm/i915/gtt/dg1: add PTE_LM
> > > plumbing for GGTT
> > >
> > > For the PTEs we get an LM bit, to signal whether the page resides in
> > > SMEM or LMEM.
> > >
> > > Based on a patch from Michel Thierry.
> > >
> > > Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > > Signed-off-by: Daniele Ceraolo Spurio
> > > <daniele.ceraolospurio@intel.com>
> > > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >  drivers/gpu/drm/i915/gt/intel_ggtt.c | 24 +++++++++++++++++++-----
> > > drivers/gpu/drm/i915/gt/intel_gtt.h  |  4 +++-
> > >  2 files changed, 22 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > index fc399ac16eda..b0b8ded834f0 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > @@ -10,6 +10,8 @@
> > >
> > >  #include <drm/i915_drm.h>
> > >
> > > +#include "gem/i915_gem_lmem.h"
> > > +
> > >  #include "intel_gt.h"
> > >  #include "i915_drv.h"
> > >  #include "i915_scatterlist.h"
> > > @@ -189,7 +191,12 @@ static u64 gen8_ggtt_pte_encode(dma_addr_t
> addr,
> > >                               enum i915_cache_level level,
> > >                               u32 flags)  {
> > > -     return addr | _PAGE_PRESENT;
> > > +     gen8_pte_t pte = addr | _PAGE_PRESENT;
> > > +
> > > +     if (flags & PTE_LM)
> > > +             pte |= GEN12_GGTT_PTE_LM;
> > > +
> > > +     return pte;
> > >  }
> > >
> > >  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) @@
> > > -201,13
> > > +208,13 @@ static void gen8_ggtt_insert_page(struct
> > > +i915_address_space
> > > *vm,
> > >                                 dma_addr_t addr,
> > >                                 u64 offset,
> > >                                 enum i915_cache_level level,
> > > -                               u32 unused)
> > > +                               u32 flags)
> > >  {
> > >       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> > >       gen8_pte_t __iomem *pte =
> > >               (gen8_pte_t __iomem *)ggtt->gsm + offset /
> > > I915_GTT_PAGE_SIZE;
> > >
> > > -     gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0));
> > > +     gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
> > >
> > >       ggtt->invalidate(ggtt);
> > >  }
> > > @@ -217,7 +224,7 @@ static void gen8_ggtt_insert_entries(struct
> > > i915_address_space *vm,
> > >                                    enum i915_cache_level level,
> > >                                    u32 flags)  {
> > > -     const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0);
> > > +     const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level,
> > > flags);
> > >       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> > >       gen8_pte_t __iomem *gte;
> > >       gen8_pte_t __iomem *end;
> > > @@ -459,6 +466,8 @@ static void ggtt_bind_vma(struct
> > > i915_address_space *vm,
> > >       pte_flags = 0;
> > >       if (i915_gem_object_is_readonly(obj))
> > >               pte_flags |= PTE_READ_ONLY;
> > > +     if (i915_gem_object_is_lmem(obj))
> > > +             pte_flags |= PTE_LM;
> > >
> > >       vm->insert_entries(vm, vma, cache_level, pte_flags);
> > >       vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; @@ -794,6 +803,7 @@
> > > static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
> > >       struct drm_i915_private *i915 = ggtt->vm.i915;
> > >       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
> > >       phys_addr_t phys_addr;
> > > +     u32 pte_flags;
> > >       int ret;
> > >
> > >       /* For Modern GENs the PTEs and register space are split in
> > > the BAR */ @@ -823,9 +833,13 @@ static int ggtt_probe_common(struct
> > > i915_ggtt *ggtt, u64 size)
> > >               return ret;
> > >       }
> > >
> > > +     pte_flags = 0;
> > > +     if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
> > > +             pte_flags |= PTE_LM;
> > > +
> > >       ggtt->vm.scratch[0]->encode =
> > >               ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
> > > -                                 I915_CACHE_NONE, 0);
> > > +                                 I915_CACHE_NONE, pte_flags);
> > >
> > >       return 0;
> > >  }
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > b/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > index 0eef625dd787..24b5808df16d 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > @@ -85,7 +85,9 @@ typedef u64 gen8_pte_t;
> > >  #define BYT_PTE_SNOOPED_BY_CPU_CACHES        REG_BIT(2)
> > >  #define BYT_PTE_WRITEABLE            REG_BIT(1)
> > >
> > > -#define GEN12_PPGTT_PTE_LM BIT_ULL(11)
> > > +#define GEN12_PPGTT_PTE_LM   BIT_ULL(11)
> > > +
> > > +#define GEN12_GGTT_PTE_LM    BIT_ULL(1)
> >
> > Where does the Bspec say bit-1 is for LMEM?
> 
> Bspec: 45015 <- GGTT
> Bspec: 45040 <- ppGTT

I looked both document, I don't see bit-1 is used as DM indicator, it is R/W bit. I also see bit-11 is ignored.
The only place I see bit-11 as DM is Bspec 53521

--CQ


> 
> I'll update the commit messages.
> 
> >
> > --CQ
> >
> > >
> > >  /*
> > >   * Cacheability Control is a 4-bit value. The low three bits are
> > > stored in bits
> > > --
> > > 2.26.2
> > >
> > > _______________________________________________
> > > Intel-gfx mailing list
> > > Intel-gfx@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Matthew Auld Feb. 3, 2021, 6:32 p.m. UTC | #4
On Wed, 3 Feb 2021 at 18:01, Tang, CQ <cq.tang@intel.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Matthew Auld <matthew.william.auld@gmail.com>
> > Sent: Wednesday, February 3, 2021 9:03 AM
> > To: Tang, CQ <cq.tang@intel.com>
> > Cc: Auld, Matthew <matthew.auld@intel.com>; intel-
> > gfx@lists.freedesktop.org; Chris Wilson <chris@chris-wilson.co.uk>
> > Subject: Re: [Intel-gfx] [PATCH v3 3/3] drm/i915/gtt/dg1: add PTE_LM
> > plumbing for GGTT
> >
> > On Wed, 3 Feb 2021 at 16:51, Tang, CQ <cq.tang@intel.com> wrote:
> > >
> > >
> > >
> > > > -----Original Message-----
> > > > From: Intel-gfx <intel-gfx-bounces@lists.freedesktop.org> On Behalf
> > > > Of Matthew Auld
> > > > Sent: Wednesday, February 3, 2021 7:24 AM
> > > > To: intel-gfx@lists.freedesktop.org
> > > > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > > > Subject: [Intel-gfx] [PATCH v3 3/3] drm/i915/gtt/dg1: add PTE_LM
> > > > plumbing for GGTT
> > > >
> > > > For the PTEs we get an LM bit, to signal whether the page resides in
> > > > SMEM or LMEM.
> > > >
> > > > Based on a patch from Michel Thierry.
> > > >
> > > > Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> > > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > > > Signed-off-by: Daniele Ceraolo Spurio
> > > > <daniele.ceraolospurio@intel.com>
> > > > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > ---
> > > >  drivers/gpu/drm/i915/gt/intel_ggtt.c | 24 +++++++++++++++++++-----
> > > > drivers/gpu/drm/i915/gt/intel_gtt.h  |  4 +++-
> > > >  2 files changed, 22 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > > b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > > index fc399ac16eda..b0b8ded834f0 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > > @@ -10,6 +10,8 @@
> > > >
> > > >  #include <drm/i915_drm.h>
> > > >
> > > > +#include "gem/i915_gem_lmem.h"
> > > > +
> > > >  #include "intel_gt.h"
> > > >  #include "i915_drv.h"
> > > >  #include "i915_scatterlist.h"
> > > > @@ -189,7 +191,12 @@ static u64 gen8_ggtt_pte_encode(dma_addr_t
> > addr,
> > > >                               enum i915_cache_level level,
> > > >                               u32 flags)  {
> > > > -     return addr | _PAGE_PRESENT;
> > > > +     gen8_pte_t pte = addr | _PAGE_PRESENT;
> > > > +
> > > > +     if (flags & PTE_LM)
> > > > +             pte |= GEN12_GGTT_PTE_LM;
> > > > +
> > > > +     return pte;
> > > >  }
> > > >
> > > >  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) @@
> > > > -201,13
> > > > +208,13 @@ static void gen8_ggtt_insert_page(struct
> > > > +i915_address_space
> > > > *vm,
> > > >                                 dma_addr_t addr,
> > > >                                 u64 offset,
> > > >                                 enum i915_cache_level level,
> > > > -                               u32 unused)
> > > > +                               u32 flags)
> > > >  {
> > > >       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> > > >       gen8_pte_t __iomem *pte =
> > > >               (gen8_pte_t __iomem *)ggtt->gsm + offset /
> > > > I915_GTT_PAGE_SIZE;
> > > >
> > > > -     gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0));
> > > > +     gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
> > > >
> > > >       ggtt->invalidate(ggtt);
> > > >  }
> > > > @@ -217,7 +224,7 @@ static void gen8_ggtt_insert_entries(struct
> > > > i915_address_space *vm,
> > > >                                    enum i915_cache_level level,
> > > >                                    u32 flags)  {
> > > > -     const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0);
> > > > +     const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level,
> > > > flags);
> > > >       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> > > >       gen8_pte_t __iomem *gte;
> > > >       gen8_pte_t __iomem *end;
> > > > @@ -459,6 +466,8 @@ static void ggtt_bind_vma(struct
> > > > i915_address_space *vm,
> > > >       pte_flags = 0;
> > > >       if (i915_gem_object_is_readonly(obj))
> > > >               pte_flags |= PTE_READ_ONLY;
> > > > +     if (i915_gem_object_is_lmem(obj))
> > > > +             pte_flags |= PTE_LM;
> > > >
> > > >       vm->insert_entries(vm, vma, cache_level, pte_flags);
> > > >       vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; @@ -794,6 +803,7 @@
> > > > static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
> > > >       struct drm_i915_private *i915 = ggtt->vm.i915;
> > > >       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
> > > >       phys_addr_t phys_addr;
> > > > +     u32 pte_flags;
> > > >       int ret;
> > > >
> > > >       /* For Modern GENs the PTEs and register space are split in
> > > > the BAR */ @@ -823,9 +833,13 @@ static int ggtt_probe_common(struct
> > > > i915_ggtt *ggtt, u64 size)
> > > >               return ret;
> > > >       }
> > > >
> > > > +     pte_flags = 0;
> > > > +     if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
> > > > +             pte_flags |= PTE_LM;
> > > > +
> > > >       ggtt->vm.scratch[0]->encode =
> > > >               ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
> > > > -                                 I915_CACHE_NONE, 0);
> > > > +                                 I915_CACHE_NONE, pte_flags);
> > > >
> > > >       return 0;
> > > >  }
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > > b/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > > index 0eef625dd787..24b5808df16d 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > > @@ -85,7 +85,9 @@ typedef u64 gen8_pte_t;
> > > >  #define BYT_PTE_SNOOPED_BY_CPU_CACHES        REG_BIT(2)
> > > >  #define BYT_PTE_WRITEABLE            REG_BIT(1)
> > > >
> > > > -#define GEN12_PPGTT_PTE_LM BIT_ULL(11)
> > > > +#define GEN12_PPGTT_PTE_LM   BIT_ULL(11)
> > > > +
> > > > +#define GEN12_GGTT_PTE_LM    BIT_ULL(1)
> > >
> > > Where does the Bspec say bit-1 is for LMEM?
> >
> > Bspec: 45015 <- GGTT
> > Bspec: 45040 <- ppGTT
>
> I looked both document, I don't see bit-1 is used as DM indicator, it is R/W bit. I also see bit-11 is ignored.
> The only place I see bit-11 as DM is Bspec 53521

I see bit-1 as "Local Memory" in 45015, and bit-11 as "Local Memory"
in 45040. Also there is no R/W bit for the GGTT on DG1, so not sure
what version of the spec you are looking at. Just to be clear there
are two different bits here, one for the ppGTT and one for the GGTT,
they both have different layouts for their respective PTEs.

Bspec: 53521 is not applicable to DG1.

>
> --CQ
>
>
> >
> > I'll update the commit messages.
> >
> > >
> > > --CQ
> > >
> > > >
> > > >  /*
> > > >   * Cacheability Control is a 4-bit value. The low three bits are
> > > > stored in bits
> > > > --
> > > > 2.26.2
> > > >
> > > > _______________________________________________
> > > > Intel-gfx mailing list
> > > > Intel-gfx@lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > > _______________________________________________
> > > Intel-gfx mailing list
> > > Intel-gfx@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Piotr Piórkowski Feb. 4, 2021, 5:43 p.m. UTC | #5
Matthew Auld <matthew.william.auld@gmail.com> wrote on śro [2021-lut-03 18:32:34 +0000]:
> On Wed, 3 Feb 2021 at 18:01, Tang, CQ <cq.tang@intel.com> wrote:
> >
> >
> >
> > > -----Original Message-----
> > > From: Matthew Auld <matthew.william.auld@gmail.com>
> > > Sent: Wednesday, February 3, 2021 9:03 AM
> > > To: Tang, CQ <cq.tang@intel.com>
> > > Cc: Auld, Matthew <matthew.auld@intel.com>; intel-
> > > gfx@lists.freedesktop.org; Chris Wilson <chris@chris-wilson.co.uk>
> > > Subject: Re: [Intel-gfx] [PATCH v3 3/3] drm/i915/gtt/dg1: add PTE_LM
> > > plumbing for GGTT
> > >
> > > On Wed, 3 Feb 2021 at 16:51, Tang, CQ <cq.tang@intel.com> wrote:
> > > >
> > > >
> > > >
> > > > > -----Original Message-----
> > > > > From: Intel-gfx <intel-gfx-bounces@lists.freedesktop.org> On Behalf
> > > > > Of Matthew Auld
> > > > > Sent: Wednesday, February 3, 2021 7:24 AM
> > > > > To: intel-gfx@lists.freedesktop.org
> > > > > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > > > > Subject: [Intel-gfx] [PATCH v3 3/3] drm/i915/gtt/dg1: add PTE_LM
> > > > > plumbing for GGTT
> > > > >
> > > > > For the PTEs we get an LM bit, to signal whether the page resides in
> > > > > SMEM or LMEM.
> > > > >
> > > > > Based on a patch from Michel Thierry.
> > > > >
> > > > > Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> > > > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > > > > Signed-off-by: Daniele Ceraolo Spurio
> > > > > <daniele.ceraolospurio@intel.com>
> > > > > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > > ---
> > > > >  drivers/gpu/drm/i915/gt/intel_ggtt.c | 24 +++++++++++++++++++-----
> > > > > drivers/gpu/drm/i915/gt/intel_gtt.h  |  4 +++-
> > > > >  2 files changed, 22 insertions(+), 6 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > > > b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > > > index fc399ac16eda..b0b8ded834f0 100644
> > > > > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > > > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > > > @@ -10,6 +10,8 @@
> > > > >
> > > > >  #include <drm/i915_drm.h>
> > > > >
> > > > > +#include "gem/i915_gem_lmem.h"
> > > > > +
> > > > >  #include "intel_gt.h"
> > > > >  #include "i915_drv.h"
> > > > >  #include "i915_scatterlist.h"
> > > > > @@ -189,7 +191,12 @@ static u64 gen8_ggtt_pte_encode(dma_addr_t
> > > addr,
> > > > >                               enum i915_cache_level level,
> > > > >                               u32 flags)  {
> > > > > -     return addr | _PAGE_PRESENT;
> > > > > +     gen8_pte_t pte = addr | _PAGE_PRESENT;
> > > > > +
> > > > > +     if (flags & PTE_LM)
> > > > > +             pte |= GEN12_GGTT_PTE_LM;
> > > > > +
> > > > > +     return pte;
> > > > >  }
> > > > >
> > > > >  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) @@
> > > > > -201,13
> > > > > +208,13 @@ static void gen8_ggtt_insert_page(struct
> > > > > +i915_address_space
> > > > > *vm,
> > > > >                                 dma_addr_t addr,
> > > > >                                 u64 offset,
> > > > >                                 enum i915_cache_level level,
> > > > > -                               u32 unused)
> > > > > +                               u32 flags)
> > > > >  {
> > > > >       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> > > > >       gen8_pte_t __iomem *pte =
> > > > >               (gen8_pte_t __iomem *)ggtt->gsm + offset /
> > > > > I915_GTT_PAGE_SIZE;
> > > > >
> > > > > -     gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0));
> > > > > +     gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
> > > > >
> > > > >       ggtt->invalidate(ggtt);
> > > > >  }
> > > > > @@ -217,7 +224,7 @@ static void gen8_ggtt_insert_entries(struct
> > > > > i915_address_space *vm,
> > > > >                                    enum i915_cache_level level,
> > > > >                                    u32 flags)  {
> > > > > -     const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0);
> > > > > +     const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level,
> > > > > flags);
> > > > >       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> > > > >       gen8_pte_t __iomem *gte;
> > > > >       gen8_pte_t __iomem *end;
> > > > > @@ -459,6 +466,8 @@ static void ggtt_bind_vma(struct
> > > > > i915_address_space *vm,
> > > > >       pte_flags = 0;
> > > > >       if (i915_gem_object_is_readonly(obj))
> > > > >               pte_flags |= PTE_READ_ONLY;
> > > > > +     if (i915_gem_object_is_lmem(obj))
> > > > > +             pte_flags |= PTE_LM;
> > > > >
> > > > >       vm->insert_entries(vm, vma, cache_level, pte_flags);
> > > > >       vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; @@ -794,6 +803,7 @@
> > > > > static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
> > > > >       struct drm_i915_private *i915 = ggtt->vm.i915;
> > > > >       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
> > > > >       phys_addr_t phys_addr;
> > > > > +     u32 pte_flags;
> > > > >       int ret;
> > > > >
> > > > >       /* For Modern GENs the PTEs and register space are split in
> > > > > the BAR */ @@ -823,9 +833,13 @@ static int ggtt_probe_common(struct
> > > > > i915_ggtt *ggtt, u64 size)
> > > > >               return ret;
> > > > >       }
> > > > >
> > > > > +     pte_flags = 0;
> > > > > +     if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
> > > > > +             pte_flags |= PTE_LM;
> > > > > +
> > > > >       ggtt->vm.scratch[0]->encode =
> > > > >               ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
> > > > > -                                 I915_CACHE_NONE, 0);
> > > > > +                                 I915_CACHE_NONE, pte_flags);
> > > > >
> > > > >       return 0;
> > > > >  }
> > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > > > b/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > > > index 0eef625dd787..24b5808df16d 100644
> > > > > --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > > > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> > > > > @@ -85,7 +85,9 @@ typedef u64 gen8_pte_t;
> > > > >  #define BYT_PTE_SNOOPED_BY_CPU_CACHES        REG_BIT(2)
> > > > >  #define BYT_PTE_WRITEABLE            REG_BIT(1)
> > > > >
> > > > > -#define GEN12_PPGTT_PTE_LM BIT_ULL(11)
> > > > > +#define GEN12_PPGTT_PTE_LM   BIT_ULL(11)
> > > > > +
> > > > > +#define GEN12_GGTT_PTE_LM    BIT_ULL(1)
> > > >
> > > > Where does the Bspec say bit-1 is for LMEM?
> > >
> > > Bspec: 45015 <- GGTT
> > > Bspec: 45040 <- ppGTT
> >
> > I looked both document, I don't see bit-1 is used as DM indicator, it is R/W bit. I also see bit-11 is ignored.
> > The only place I see bit-11 as DM is Bspec 53521
> 
> I see bit-1 as "Local Memory" in 45015, and bit-11 as "Local Memory"
> in 45040. Also there is no R/W bit for the GGTT on DG1, so not sure
> what version of the spec you are looking at. Just to be clear there
> are two different bits here, one for the ppGTT and one for the GGTT,
> they both have different layouts for their respective PTEs.
> 
> Bspec: 53521 is not applicable to DG1.

Matthew has right. Bspec 45015 is applicable here for GGTT.
I'm currently working on an internal series where I'm cleaning this area of code.

Based on 45015 I prepared this table:

/*
 * GEN12 GGTT Table Entry format:
 *   63:54 | 53:52 |   51:46 |   45:12 |    11:2 |  1 |       0
 * ignored |   PAT | ignored | address | ignored | LM | present
 */


Piotr

> 
> >
> > --CQ
> >
> >
> > >
> > > I'll update the commit messages.
> > >
> > > >
> > > > --CQ
> > > >
> > > > >
> > > > >  /*
> > > > >   * Cacheability Control is a 4-bit value. The low three bits are
> > > > > stored in bits
> > > > > --
> > > > > 2.26.2
> > > > >
> > > > > _______________________________________________
> > > > > Intel-gfx mailing list
> > > > > Intel-gfx@lists.freedesktop.org
> > > > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > > > _______________________________________________
> > > > Intel-gfx mailing list
> > > > Intel-gfx@lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index fc399ac16eda..b0b8ded834f0 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -10,6 +10,8 @@ 
 
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_lmem.h"
+
 #include "intel_gt.h"
 #include "i915_drv.h"
 #include "i915_scatterlist.h"
@@ -189,7 +191,12 @@  static u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 				enum i915_cache_level level,
 				u32 flags)
 {
-	return addr | _PAGE_PRESENT;
+	gen8_pte_t pte = addr | _PAGE_PRESENT;
+
+	if (flags & PTE_LM)
+		pte |= GEN12_GGTT_PTE_LM;
+
+	return pte;
 }
 
 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
@@ -201,13 +208,13 @@  static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 				  dma_addr_t addr,
 				  u64 offset,
 				  enum i915_cache_level level,
-				  u32 unused)
+				  u32 flags)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	gen8_pte_t __iomem *pte =
 		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
 
-	gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0));
+	gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
 
 	ggtt->invalidate(ggtt);
 }
@@ -217,7 +224,7 @@  static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     enum i915_cache_level level,
 				     u32 flags)
 {
-	const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0);
+	const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	gen8_pte_t __iomem *gte;
 	gen8_pte_t __iomem *end;
@@ -459,6 +466,8 @@  static void ggtt_bind_vma(struct i915_address_space *vm,
 	pte_flags = 0;
 	if (i915_gem_object_is_readonly(obj))
 		pte_flags |= PTE_READ_ONLY;
+	if (i915_gem_object_is_lmem(obj))
+		pte_flags |= PTE_LM;
 
 	vm->insert_entries(vm, vma, cache_level, pte_flags);
 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
@@ -794,6 +803,7 @@  static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 	struct drm_i915_private *i915 = ggtt->vm.i915;
 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
 	phys_addr_t phys_addr;
+	u32 pte_flags;
 	int ret;
 
 	/* For Modern GENs the PTEs and register space are split in the BAR */
@@ -823,9 +833,13 @@  static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 		return ret;
 	}
 
+	pte_flags = 0;
+	if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
+		pte_flags |= PTE_LM;
+
 	ggtt->vm.scratch[0]->encode =
 		ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
-				    I915_CACHE_NONE, 0);
+				    I915_CACHE_NONE, pte_flags);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 0eef625dd787..24b5808df16d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -85,7 +85,9 @@  typedef u64 gen8_pte_t;
 #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
 #define BYT_PTE_WRITEABLE		REG_BIT(1)
 
-#define GEN12_PPGTT_PTE_LM BIT_ULL(11)
+#define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
+
+#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
 
 /*
  * Cacheability Control is a 4-bit value. The low three bits are stored in bits