diff mbox series

[1/1] drm/i915/mtl: enable local stolen memory

Message ID 20220920071940.3775059-1-aravind.iddamsetty@intel.com (mailing list archive)
State New, archived
Headers show
Series [1/1] drm/i915/mtl: enable local stolen memory | expand

Commit Message

Iddamsetty, Aravind Sept. 20, 2022, 7:19 a.m. UTC
As an integrated GPU, MTL does not have local memory and
HAS_LMEM() returns false.  However the platform's stolen memory
is presented via BAR2 (i.e., the BAR we traditionally consider
to be the LMEM BAR) and should be managed by the driver the same
way that local memory is on dgpu platforms (which includes
setting the "lmem" bit on page table entries).  We use the term
"local stolen memory" to refer to this model.

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>

Signed-off-by: CQ Tang <cq.tang@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Original-author: CQ Tang
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 113 +++++++++++++++++----
 drivers/gpu/drm/i915/gt/intel_ggtt.c       |   2 +-
 drivers/gpu/drm/i915/i915_drv.h            |   3 +
 3 files changed, 100 insertions(+), 18 deletions(-)

Comments

Jani Nikula Sept. 20, 2022, 7:35 a.m. UTC | #1
On Tue, 20 Sep 2022, Aravind Iddamsetty <aravind.iddamsetty@intel.com> wrote:
> As an integrated GPU, MTL does not have local memory and
> HAS_LMEM() returns false.  However the platform's stolen memory
> is presented via BAR2 (i.e., the BAR we traditionally consider
> to be the LMEM BAR) and should be managed by the driver the same
> way that local memory is on dgpu platforms (which includes
> setting the "lmem" bit on page table entries).  We use the term
> "local stolen memory" to refer to this model.
>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>
> Signed-off-by: CQ Tang <cq.tang@intel.com>
> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> Original-author: CQ Tang
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 113 +++++++++++++++++----
>  drivers/gpu/drm/i915/gt/intel_ggtt.c       |   2 +-
>  drivers/gpu/drm/i915/i915_drv.h            |   3 +
>  3 files changed, 100 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index acc561c0f0aa..bad5250fb764 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -77,6 +77,19 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
>  	mutex_unlock(&i915->mm.stolen_lock);
>  }
>  
> +static bool is_dsm_invalid(struct drm_i915_private *i915, struct resource *dsm)

Abstracting this as a separate function looks like a separate patch.

I generally recommend using positive naming, "is dsm valid", to avoid
any double negatives that might pop up, now or in the
future. !is_dsm_invalid() gets slower for human brains to parse.

BR,
Jani.


> +{
> +	if (!HAS_BAR2_SMEM_STOLEN(i915)) {
> +		if (dsm->start == 0)
> +			return true;
> +	}
> +
> +	if (dsm->end <= dsm->start)
> +		return true;
> +
> +	return false;
> +}
> +
>  static int i915_adjust_stolen(struct drm_i915_private *i915,
>  			      struct resource *dsm)
>  {
> @@ -84,7 +97,7 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
>  	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
>  	struct resource *r;
>  
> -	if (dsm->start == 0 || dsm->end <= dsm->start)
> +	if (is_dsm_invalid(i915, dsm))
>  		return -EINVAL;
>  
>  	/*
> @@ -136,7 +149,7 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
>  	 * overlaps with the non-stolen system memory range, since lmem is local
>  	 * to the gpu.
>  	 */
> -	if (HAS_LMEM(i915))
> +	if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915))
>  		return 0;
>  
>  	/*
> @@ -371,8 +384,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
>  
>  	drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
>  
> -	*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
> -
>  	switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
>  	case GEN8_STOLEN_RESERVED_1M:
>  		*size = 1024 * 1024;
> @@ -390,6 +401,12 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
>  		*size = 8 * 1024 * 1024;
>  		MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
>  	}
> +
> +	if ((GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) && !IS_DGFX(i915))
> +		/* the base is initialized to stolen top so subtract size to get base */
> +		*base -= *size;
> +	else
> +		*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
>  }
>  
>  static int i915_gem_init_stolen(struct intel_memory_region *mem)
> @@ -423,8 +440,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
>  	if (i915_adjust_stolen(i915, &i915->dsm))
>  		return 0;
>  
> -	GEM_BUG_ON(i915->dsm.start == 0);
> -	GEM_BUG_ON(i915->dsm.end <= i915->dsm.start);
> +	GEM_BUG_ON(is_dsm_invalid(i915, &i915->dsm));
>  
>  	stolen_top = i915->dsm.end + 1;
>  	reserved_base = stolen_top;
> @@ -796,6 +812,46 @@ static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = {
>  	.init_object = _i915_gem_object_stolen_init,
>  };
>  
> +static int get_mtl_gms_size(struct intel_uncore *uncore)
> +{
> +	u16 ggc, gms;
> +
> +	ggc = intel_uncore_read16(uncore, _MMIO(0x108040));
> +
> +	/* check GGMS, should be fixed 0x3 (8MB) */
> +	if ((ggc & 0xc0) != 0xc0)
> +		return -EIO;
> +
> +	/* return valid GMS value, -EIO if invalid */
> +	gms = ggc >> 8;
> +	switch (gms) {
> +	case 0x0 ... 0x10:
> +		return gms * 32;
> +	case 0x20:
> +		return 1024;
> +	case 0x30:
> +		return 1536;
> +	case 0x40:
> +		return 2048;
> +	case 0xf0 ... 0xfe:
> +		return (gms - 0xf0 + 1) * 4;
> +	default:
> +		return -EIO;
> +	}
> +}
> +
> +static inline bool lmembar_is_igpu_stolen(struct drm_i915_private *i915)
> +{
> +	u32 regions = RUNTIME_INFO(i915)->memory_regions;
> +
> +	if (regions & REGION_LMEM)
> +		return false;
> +
> +	drm_WARN_ON(&i915->drm, (regions & REGION_STOLEN_LMEM) == 0);
> +
> +	return true;
> +}
> +
>  struct intel_memory_region *
>  i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
>  			   u16 instance)
> @@ -806,19 +862,16 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
>  	struct intel_memory_region *mem;
>  	resource_size_t io_start, io_size;
>  	resource_size_t min_page_size;
> +	int ret;
>  
>  	if (WARN_ON_ONCE(instance))
>  		return ERR_PTR(-ENODEV);
>  
> -	if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR))
> +	if (!i915_pci_resource_valid(pdev, GFXMEM_BAR))
>  		return ERR_PTR(-ENXIO);
>  
> -	/* Use DSM base address instead for stolen memory */
> -	dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
> -	if (IS_DG1(uncore->i915)) {
> -		lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
> -		if (WARN_ON(lmem_size < dsm_base))
> -			return ERR_PTR(-ENODEV);
> +	if (lmembar_is_igpu_stolen(i915) || IS_DG1(i915)) {
> +		lmem_size = pci_resource_len(pdev, GFXMEM_BAR);
>  	} else {
>  		resource_size_t lmem_range;
>  
> @@ -827,13 +880,39 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
>  		lmem_size *= SZ_1G;
>  	}
>  
> -	dsm_size = lmem_size - dsm_base;
> -	if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
> +	if (HAS_BAR2_SMEM_STOLEN(i915)) {
> +		/*
> +		 * MTL dsm size is in GGC register, not the bar size.
> +		 * also MTL uses offset to DSMBASE in ptes, so i915
> +		 * uses dsm_base = 0 to setup stolen region.
> +		 */
> +		ret = get_mtl_gms_size(uncore);
> +		if (ret < 0) {
> +			drm_err(&i915->drm, "invalid MTL GGC register setting\n");
> +			return ERR_PTR(ret);
> +		}
> +
> +		dsm_base = 0;
> +		dsm_size = (resource_size_t)(ret * SZ_1M);
> +
> +		GEM_BUG_ON(pci_resource_len(pdev, GFXMEM_BAR) != 256 * SZ_1M);
> +		GEM_BUG_ON((dsm_size + 8 * SZ_1M) > lmem_size);
> +	} else {
> +		/* Use DSM base address instead for stolen memory */
> +		dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
> +		if (WARN_ON(lmem_size < dsm_base))
> +			return ERR_PTR(-ENODEV);
> +		dsm_size = lmem_size - dsm_base;
> +	}
> +
> +	io_size = dsm_size;
> +	if (pci_resource_len(pdev, GFXMEM_BAR) < dsm_size) {
>  		io_start = 0;
>  		io_size = 0;
> +	} else if (HAS_BAR2_SMEM_STOLEN(i915)) {
> +		io_start = pci_resource_start(pdev, GFXMEM_BAR) + 8 * SZ_1M;
>  	} else {
> -		io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + dsm_base;
> -		io_size = dsm_size;
> +		io_start = pci_resource_start(pdev, GFXMEM_BAR) + dsm_base;
>  	}
>  
>  	min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 30cf5c3369d9..b31fe0fb013f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -931,7 +931,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>  	unsigned int size;
>  	u16 snb_gmch_ctl;
>  
> -	if (!HAS_LMEM(i915)) {
> +	if (!HAS_LMEM(i915) && !HAS_BAR2_SMEM_STOLEN(i915)) {
>  		if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
>  			return -ENXIO;
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 134fc1621821..ef3120322077 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -973,6 +973,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>  
>  #define HAS_ONE_EU_PER_FUSE_BIT(i915)	(INTEL_INFO(i915)->has_one_eu_per_fuse_bit)
>  
> +#define HAS_BAR2_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \
> +				    GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
> +
>  /* intel_device_info.c */
>  static inline struct intel_device_info *
>  mkwrite_device_info(struct drm_i915_private *dev_priv)
Lucas De Marchi Sept. 20, 2022, 8:31 a.m. UTC | #2
On Tue, Sep 20, 2022 at 12:49:40PM +0530, Aravind Iddamsetty wrote:
>As an integrated GPU, MTL does not have local memory and
>HAS_LMEM() returns false.  However the platform's stolen memory
>is presented via BAR2 (i.e., the BAR we traditionally consider
>to be the LMEM BAR) and should be managed by the driver the same
>way that local memory is on dgpu platforms (which includes
>setting the "lmem" bit on page table entries).  We use the term
>"local stolen memory" to refer to this model.
>
>Cc: Matt Roper <matthew.d.roper@intel.com>
>Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>
>Signed-off-by: CQ Tang <cq.tang@intel.com>
>Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>Original-author: CQ Tang
>---
> drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 113 +++++++++++++++++----
> drivers/gpu/drm/i915/gt/intel_ggtt.c       |   2 +-
> drivers/gpu/drm/i915/i915_drv.h            |   3 +
> 3 files changed, 100 insertions(+), 18 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>index acc561c0f0aa..bad5250fb764 100644
>--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>@@ -77,6 +77,19 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
> 	mutex_unlock(&i915->mm.stolen_lock);
> }
>
>+static bool is_dsm_invalid(struct drm_i915_private *i915, struct resource *dsm)
>+{
>+	if (!HAS_BAR2_SMEM_STOLEN(i915)) {

I called a similar function as is_dsm_valid() in
https://patchwork.freedesktop.org/series/108620/

sounds weird  with "invalid" and the double negation on return early
style.

>+		if (dsm->start == 0)
>+			return true;
>+	}
>+
>+	if (dsm->end <= dsm->start)
>+		return true;
>+
>+	return false;
>+}
>+
> static int i915_adjust_stolen(struct drm_i915_private *i915,
> 			      struct resource *dsm)
> {
>@@ -84,7 +97,7 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
> 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
> 	struct resource *r;
>
>-	if (dsm->start == 0 || dsm->end <= dsm->start)
>+	if (is_dsm_invalid(i915, dsm))
> 		return -EINVAL;
>
> 	/*
>@@ -136,7 +149,7 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
> 	 * overlaps with the non-stolen system memory range, since lmem is local
> 	 * to the gpu.
> 	 */
>-	if (HAS_LMEM(i915))
>+	if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915))

comment above makes no sense when you add this.  For this specific case
it's still system memory, reserved by the BIOS and that we access by
mapping the lmembar

> 		return 0;
>
> 	/*
>@@ -371,8 +384,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
>
> 	drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
>
>-	*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
>-
> 	switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
> 	case GEN8_STOLEN_RESERVED_1M:
> 		*size = 1024 * 1024;
>@@ -390,6 +401,12 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
> 		*size = 8 * 1024 * 1024;
> 		MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
> 	}
>+
>+	if ((GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) && !IS_DGFX(i915))
>+		/* the base is initialized to stolen top so subtract size to get base */
>+		*base -= *size;

that doesn't necessarily holds true.  According to the spec the wopcm
base is 1MB aligned so even if it is "at the top", it may not mean it is at the
very very top that we can just subtract the size.


>+	else
>+		*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
> }
>
> static int i915_gem_init_stolen(struct intel_memory_region *mem)
>@@ -423,8 +440,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
> 	if (i915_adjust_stolen(i915, &i915->dsm))
> 		return 0;
>
>-	GEM_BUG_ON(i915->dsm.start == 0);
>-	GEM_BUG_ON(i915->dsm.end <= i915->dsm.start);
>+	GEM_BUG_ON(is_dsm_invalid(i915, &i915->dsm));
>
> 	stolen_top = i915->dsm.end + 1;
> 	reserved_base = stolen_top;
>@@ -796,6 +812,46 @@ static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = {
> 	.init_object = _i915_gem_object_stolen_init,
> };
>
>+static int get_mtl_gms_size(struct intel_uncore *uncore)
>+{
>+	u16 ggc, gms;
>+
>+	ggc = intel_uncore_read16(uncore, _MMIO(0x108040));

??

>+
>+	/* check GGMS, should be fixed 0x3 (8MB) */
>+	if ((ggc & 0xc0) != 0xc0)
>+		return -EIO;
>+
>+	/* return valid GMS value, -EIO if invalid */
>+	gms = ggc >> 8;
>+	switch (gms) {
>+	case 0x0 ... 0x10:
>+		return gms * 32;
>+	case 0x20:
>+		return 1024;
>+	case 0x30:
>+		return 1536;
>+	case 0x40:
>+		return 2048;
>+	case 0xf0 ... 0xfe:
>+		return (gms - 0xf0 + 1) * 4;
>+	default:
>+		return -EIO;
>+	}
>+}
>+
>+static inline bool lmembar_is_igpu_stolen(struct drm_i915_private *i915)

doesn't deserve an inline. lmembar_is_igpu_stolen() doesn't make much
sense as the lmembar is never igpu stolen.... you probably mean
something else here

>+{
>+	u32 regions = RUNTIME_INFO(i915)->memory_regions;
>+
>+	if (regions & REGION_LMEM)
>+		return false;
>+
>+	drm_WARN_ON(&i915->drm, (regions & REGION_STOLEN_LMEM) == 0);
>+
>+	return true;
>+}
>+
> struct intel_memory_region *
> i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
> 			   u16 instance)
>@@ -806,19 +862,16 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
> 	struct intel_memory_region *mem;
> 	resource_size_t io_start, io_size;
> 	resource_size_t min_page_size;
>+	int ret;
>
> 	if (WARN_ON_ONCE(instance))
> 		return ERR_PTR(-ENODEV);
>
>-	if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR))
>+	if (!i915_pci_resource_valid(pdev, GFXMEM_BAR))

at least for MTL, Bspec 63830 still calls this lmembar. So the rename
for me is a net loss

> 		return ERR_PTR(-ENXIO);
>
>-	/* Use DSM base address instead for stolen memory */
>-	dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
>-	if (IS_DG1(uncore->i915)) {
>-		lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
>-		if (WARN_ON(lmem_size < dsm_base))
>-			return ERR_PTR(-ENODEV);
>+	if (lmembar_is_igpu_stolen(i915) || IS_DG1(i915)) {
>+		lmem_size = pci_resource_len(pdev, GFXMEM_BAR);

this looks confusing, but apparently correct. For DG1 the stolen is
on top of lmem. For MTL, it's on the end of lmembar (256M). This works
because on DG1 aperture == lmem size.

> 	} else {
> 		resource_size_t lmem_range;
>
>@@ -827,13 +880,39 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
> 		lmem_size *= SZ_1G;
> 	}
>
>-	dsm_size = lmem_size - dsm_base;
>-	if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
>+	if (HAS_BAR2_SMEM_STOLEN(i915)) {
>+		/*
>+		 * MTL dsm size is in GGC register, not the bar size.

it's not exclusive to MTL. it has been there for ages and it was never
the BAR size like this comment says. Or at least it doesn't match the
else condition that is using the GEN12_DSMBASE register

>+		 * also MTL uses offset to DSMBASE in ptes, so i915
>+		 * uses dsm_base = 0 to setup stolen region.
>+		 */
>+		ret = get_mtl_gms_size(uncore);
>+		if (ret < 0) {
>+			drm_err(&i915->drm, "invalid MTL GGC register setting\n");
>+			return ERR_PTR(ret);
>+		}
>+
>+		dsm_base = 0;

if we stop handling part of the values in the registers as relative to
the mapping and rather handle them as we read from the registers
(physical addresses), the size calculations should still match and we
shouldn't need all the if/else dance. If we pass the right io_start we
can then make them relative to the mapping by subtracting it, or if we
don't want GTT to be in the mapping we subtract it.

That makes me wonder if choosing the i915_gem_stolen_lmem_setup() for
all of this is even the right choice given we are actually talking about
system memory that is mapped through the lmembar.

>+		dsm_size = (resource_size_t)(ret * SZ_1M);
>+
>+		GEM_BUG_ON(pci_resource_len(pdev, GFXMEM_BAR) != 256 * SZ_1M);
>+		GEM_BUG_ON((dsm_size + 8 * SZ_1M) > lmem_size);
>+	} else {
>+		/* Use DSM base address instead for stolen memory */
>+		dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
>+		if (WARN_ON(lmem_size < dsm_base))
>+			return ERR_PTR(-ENODEV);
>+		dsm_size = lmem_size - dsm_base;
>+	}
>+
>+	io_size = dsm_size;
>+	if (pci_resource_len(pdev, GFXMEM_BAR) < dsm_size) {
> 		io_start = 0;
> 		io_size = 0;
>+	} else if (HAS_BAR2_SMEM_STOLEN(i915)) {
>+		io_start = pci_resource_start(pdev, GFXMEM_BAR) + 8 * SZ_1M;

should be the GGSM?


Lucas De Marchi

> 	} else {
>-		io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + dsm_base;
>-		io_size = dsm_size;
>+		io_start = pci_resource_start(pdev, GFXMEM_BAR) + dsm_base;
> 	}
>
> 	min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
>diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>index 30cf5c3369d9..b31fe0fb013f 100644
>--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>@@ -931,7 +931,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> 	unsigned int size;
> 	u16 snb_gmch_ctl;
>
>-	if (!HAS_LMEM(i915)) {
>+	if (!HAS_LMEM(i915) && !HAS_BAR2_SMEM_STOLEN(i915)) {
> 		if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
> 			return -ENXIO;
>
>diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>index 134fc1621821..ef3120322077 100644
>--- a/drivers/gpu/drm/i915/i915_drv.h
>+++ b/drivers/gpu/drm/i915/i915_drv.h
>@@ -973,6 +973,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>
> #define HAS_ONE_EU_PER_FUSE_BIT(i915)	(INTEL_INFO(i915)->has_one_eu_per_fuse_bit)
>
>+#define HAS_BAR2_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \
>+				    GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>+
> /* intel_device_info.c */
> static inline struct intel_device_info *
> mkwrite_device_info(struct drm_i915_private *dev_priv)
>-- 
>2.25.1
>
Andi Shyti Sept. 20, 2022, 4:57 p.m. UTC | #3
Hi Aravind,

> +static int get_mtl_gms_size(struct intel_uncore *uncore)
> +{
> +	u16 ggc, gms;
> +
> +	ggc = intel_uncore_read16(uncore, _MMIO(0x108040));
> +
> +	/* check GGMS, should be fixed 0x3 (8MB) */
> +	if ((ggc & 0xc0) != 0xc0)
> +		return -EIO;
> +
> +	/* return valid GMS value, -EIO if invalid */
> +	gms = ggc >> 8;
> +	switch (gms) {
> +	case 0x0 ... 0x10:
> +		return gms * 32;
> +	case 0x20:
> +		return 1024;
> +	case 0x30:
> +		return 1536;
> +	case 0x40:
> +		return 2048;
> +	case 0xf0 ... 0xfe:

just a bit puzzled by the fact that case ranges are not standard
and are supported only by GCC, unless, of course, I miss
something. Do we still want to use them as they are widely used
around the kernel?

Andi

> +		return (gms - 0xf0 + 1) * 4;
> +	default:
> +		return -EIO;
> +	}
> +}
Lucas De Marchi Sept. 20, 2022, 8:01 p.m. UTC | #4
On Tue, Sep 20, 2022 at 06:57:46PM +0200, Andi Shyti wrote:
>Hi Aravind,
>
>> +static int get_mtl_gms_size(struct intel_uncore *uncore)
>> +{
>> +	u16 ggc, gms;
>> +
>> +	ggc = intel_uncore_read16(uncore, _MMIO(0x108040));
>> +
>> +	/* check GGMS, should be fixed 0x3 (8MB) */
>> +	if ((ggc & 0xc0) != 0xc0)
>> +		return -EIO;
>> +
>> +	/* return valid GMS value, -EIO if invalid */
>> +	gms = ggc >> 8;
>> +	switch (gms) {
>> +	case 0x0 ... 0x10:
>> +		return gms * 32;
>> +	case 0x20:
>> +		return 1024;
>> +	case 0x30:
>> +		return 1536;
>> +	case 0x40:
>> +		return 2048;
>> +	case 0xf0 ... 0xfe:
>
>just a bit puzzled by the fact that case ranges are not standard
>and are supported only by GCC, unless, of course, I miss

clang also supports it and can build the kernel (or a great portion of
it).

Lucas De Marchi
Lucas De Marchi Sept. 20, 2022, 8:05 p.m. UTC | #5
On Tue, Sep 20, 2022 at 01:31:49AM -0700, Lucas De Marchi wrote:
>On Tue, Sep 20, 2022 at 12:49:40PM +0530, Aravind Iddamsetty wrote:
>>As an integrated GPU, MTL does not have local memory and
>>HAS_LMEM() returns false.  However the platform's stolen memory
>>is presented via BAR2 (i.e., the BAR we traditionally consider
>>to be the LMEM BAR) and should be managed by the driver the same
>>way that local memory is on dgpu platforms (which includes
>>setting the "lmem" bit on page table entries).  We use the term
>>"local stolen memory" to refer to this model.
>>
>>Cc: Matt Roper <matthew.d.roper@intel.com>
>>Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>>
>>Signed-off-by: CQ Tang <cq.tang@intel.com>
>>Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>>Original-author: CQ Tang
>>---
>>drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 113 +++++++++++++++++----
>>drivers/gpu/drm/i915/gt/intel_ggtt.c       |   2 +-
>>drivers/gpu/drm/i915/i915_drv.h            |   3 +
>>3 files changed, 100 insertions(+), 18 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>index acc561c0f0aa..bad5250fb764 100644
>>--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>@@ -77,6 +77,19 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
>>	mutex_unlock(&i915->mm.stolen_lock);
>>}
>>
>>+static bool is_dsm_invalid(struct drm_i915_private *i915, struct resource *dsm)
>>+{
>>+	if (!HAS_BAR2_SMEM_STOLEN(i915)) {
>
>I called a similar function as is_dsm_valid() in
>https://patchwork.freedesktop.org/series/108620/
>
>sounds weird  with "invalid" and the double negation on return early
>style.
>
>>+		if (dsm->start == 0)
>>+			return true;
>>+	}
>>+
>>+	if (dsm->end <= dsm->start)
>>+		return true;
>>+
>>+	return false;
>>+}
>>+
>>static int i915_adjust_stolen(struct drm_i915_private *i915,
>>			      struct resource *dsm)
>>{
>>@@ -84,7 +97,7 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
>>	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
>>	struct resource *r;
>>
>>-	if (dsm->start == 0 || dsm->end <= dsm->start)
>>+	if (is_dsm_invalid(i915, dsm))
>>		return -EINVAL;
>>
>>	/*
>>@@ -136,7 +149,7 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
>>	 * overlaps with the non-stolen system memory range, since lmem is local
>>	 * to the gpu.
>>	 */
>>-	if (HAS_LMEM(i915))
>>+	if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915))
>
>comment above makes no sense when you add this.  For this specific case
>it's still system memory, reserved by the BIOS and that we access by
>mapping the lmembar
>
>>		return 0;
>>
>>	/*
>>@@ -371,8 +384,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
>>
>>	drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
>>
>>-	*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
>>-
>>	switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
>>	case GEN8_STOLEN_RESERVED_1M:
>>		*size = 1024 * 1024;
>>@@ -390,6 +401,12 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
>>		*size = 8 * 1024 * 1024;
>>		MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
>>	}
>>+
>>+	if ((GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) && !IS_DGFX(i915))
>>+		/* the base is initialized to stolen top so subtract size to get base */
>>+		*base -= *size;
>
>that doesn't necessarily holds true.  According to the spec the wopcm
>base is 1MB aligned so even if it is "at the top", it may not mean it is at the
>very very top that we can just subtract the size.
>
>
>>+	else
>>+		*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
>>}
>>
>>static int i915_gem_init_stolen(struct intel_memory_region *mem)
>>@@ -423,8 +440,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
>>	if (i915_adjust_stolen(i915, &i915->dsm))
>>		return 0;
>>
>>-	GEM_BUG_ON(i915->dsm.start == 0);
>>-	GEM_BUG_ON(i915->dsm.end <= i915->dsm.start);
>>+	GEM_BUG_ON(is_dsm_invalid(i915, &i915->dsm));
>>
>>	stolen_top = i915->dsm.end + 1;
>>	reserved_base = stolen_top;
>>@@ -796,6 +812,46 @@ static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = {
>>	.init_object = _i915_gem_object_stolen_init,
>>};
>>
>>+static int get_mtl_gms_size(struct intel_uncore *uncore)
>>+{
>>+	u16 ggc, gms;
>>+
>>+	ggc = intel_uncore_read16(uncore, _MMIO(0x108040));
>
>??
>
>>+
>>+	/* check GGMS, should be fixed 0x3 (8MB) */
>>+	if ((ggc & 0xc0) != 0xc0)
>>+		return -EIO;
>>+
>>+	/* return valid GMS value, -EIO if invalid */
>>+	gms = ggc >> 8;
>>+	switch (gms) {
>>+	case 0x0 ... 0x10:
>>+		return gms * 32;
>>+	case 0x20:
>>+		return 1024;
>>+	case 0x30:
>>+		return 1536;
>>+	case 0x40:
>>+		return 2048;
>>+	case 0xf0 ... 0xfe:
>>+		return (gms - 0xf0 + 1) * 4;
>>+	default:
>>+		return -EIO;
>>+	}
>>+}
>>+
>>+static inline bool lmembar_is_igpu_stolen(struct drm_i915_private *i915)
>
>doesn't deserve an inline. lmembar_is_igpu_stolen() doesn't make much
>sense as the lmembar is never igpu stolen.... you probably mean
>something else here
>
>>+{
>>+	u32 regions = RUNTIME_INFO(i915)->memory_regions;
>>+
>>+	if (regions & REGION_LMEM)
>>+		return false;
>>+
>>+	drm_WARN_ON(&i915->drm, (regions & REGION_STOLEN_LMEM) == 0);
>>+
>>+	return true;
>>+}
>>+
>>struct intel_memory_region *
>>i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
>>			   u16 instance)
>>@@ -806,19 +862,16 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
>>	struct intel_memory_region *mem;
>>	resource_size_t io_start, io_size;
>>	resource_size_t min_page_size;
>>+	int ret;
>>
>>	if (WARN_ON_ONCE(instance))
>>		return ERR_PTR(-ENODEV);
>>
>>-	if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR))
>>+	if (!i915_pci_resource_valid(pdev, GFXMEM_BAR))
>
>at least for MTL, Bspec 63830 still calls this lmembar. So the rename
>for me is a net loss
>
>>		return ERR_PTR(-ENXIO);
>>
>>-	/* Use DSM base address instead for stolen memory */
>>-	dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
>>-	if (IS_DG1(uncore->i915)) {
>>-		lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
>>-		if (WARN_ON(lmem_size < dsm_base))
>>-			return ERR_PTR(-ENODEV);
>>+	if (lmembar_is_igpu_stolen(i915) || IS_DG1(i915)) {
>>+		lmem_size = pci_resource_len(pdev, GFXMEM_BAR);
>
>this looks confusing, but apparently correct. For DG1 the stolen is
>on top of lmem. For MTL, it's on the end of lmembar (256M). This works
>because on DG1 aperture == lmem size.
>
>>	} else {
>>		resource_size_t lmem_range;
>>
>>@@ -827,13 +880,39 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
>>		lmem_size *= SZ_1G;
>>	}
>>
>>-	dsm_size = lmem_size - dsm_base;
>>-	if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
>>+	if (HAS_BAR2_SMEM_STOLEN(i915)) {
>>+		/*
>>+		 * MTL dsm size is in GGC register, not the bar size.
>
>it's not exclusive to MTL. it has been there for ages and it was never
>the BAR size like this comment says. Or at least it doesn't match the
>else condition that is using the GEN12_DSMBASE register
>
>>+		 * also MTL uses offset to DSMBASE in ptes, so i915
>>+		 * uses dsm_base = 0 to setup stolen region.
>>+		 */
>>+		ret = get_mtl_gms_size(uncore);
>>+		if (ret < 0) {
>>+			drm_err(&i915->drm, "invalid MTL GGC register setting\n");
>>+			return ERR_PTR(ret);
>>+		}
>>+
>>+		dsm_base = 0;
>
>if we stop handling part of the values in the registers as relative to
>the mapping and rather handle them as we read from the registers
>(physical addresses), the size calculations should still match and we
>shouldn't need all the if/else dance. If we pass the right io_start we
>can then make them relative to the mapping by subtracting it, or if we
>don't want GTT to be in the mapping we subtract it.
>
>That makes me wonder if choosing the i915_gem_stolen_lmem_setup() for
>all of this is even the right choice given we are actually talking about
>system memory that is mapped through the lmembar.


another approach could be:  handle the several coding style issues,
function names etc. Since this works for MTL, it's better than the
hypothetical solution that is not written and may not work well. I may
have missed something in my analysis. So I'd be fine to go with improved
version of this patch, and I can add to my todo list to try to clean
this up later.

Lucas De Marchi


>
>>+		dsm_size = (resource_size_t)(ret * SZ_1M);
>>+
>>+		GEM_BUG_ON(pci_resource_len(pdev, GFXMEM_BAR) != 256 * SZ_1M);
>>+		GEM_BUG_ON((dsm_size + 8 * SZ_1M) > lmem_size);
>>+	} else {
>>+		/* Use DSM base address instead for stolen memory */
>>+		dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
>>+		if (WARN_ON(lmem_size < dsm_base))
>>+			return ERR_PTR(-ENODEV);
>>+		dsm_size = lmem_size - dsm_base;
>>+	}
>>+
>>+	io_size = dsm_size;
>>+	if (pci_resource_len(pdev, GFXMEM_BAR) < dsm_size) {
>>		io_start = 0;
>>		io_size = 0;
>>+	} else if (HAS_BAR2_SMEM_STOLEN(i915)) {
>>+		io_start = pci_resource_start(pdev, GFXMEM_BAR) + 8 * SZ_1M;
>
>should be the GGSM?
>
>
>Lucas De Marchi
>
>>	} else {
>>-		io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + dsm_base;
>>-		io_size = dsm_size;
>>+		io_start = pci_resource_start(pdev, GFXMEM_BAR) + dsm_base;
>>	}
>>
>>	min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
>>diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>index 30cf5c3369d9..b31fe0fb013f 100644
>>--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>@@ -931,7 +931,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>>	unsigned int size;
>>	u16 snb_gmch_ctl;
>>
>>-	if (!HAS_LMEM(i915)) {
>>+	if (!HAS_LMEM(i915) && !HAS_BAR2_SMEM_STOLEN(i915)) {
>>		if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
>>			return -ENXIO;
>>
>>diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>index 134fc1621821..ef3120322077 100644
>>--- a/drivers/gpu/drm/i915/i915_drv.h
>>+++ b/drivers/gpu/drm/i915/i915_drv.h
>>@@ -973,6 +973,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>>
>>#define HAS_ONE_EU_PER_FUSE_BIT(i915)	(INTEL_INFO(i915)->has_one_eu_per_fuse_bit)
>>
>>+#define HAS_BAR2_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \
>>+				    GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>>+
>>/* intel_device_info.c */
>>static inline struct intel_device_info *
>>mkwrite_device_info(struct drm_i915_private *dev_priv)
>>-- 
>>2.25.1
>>
Matt Roper Sept. 20, 2022, 8:40 p.m. UTC | #6
On Tue, Sep 20, 2022 at 06:57:46PM +0200, Andi Shyti wrote:
> Hi Aravind,
> 
> > +static int get_mtl_gms_size(struct intel_uncore *uncore)
> > +{
> > +	u16 ggc, gms;
> > +
> > +	ggc = intel_uncore_read16(uncore, _MMIO(0x108040));
> > +
> > +	/* check GGMS, should be fixed 0x3 (8MB) */
> > +	if ((ggc & 0xc0) != 0xc0)
> > +		return -EIO;
> > +
> > +	/* return valid GMS value, -EIO if invalid */
> > +	gms = ggc >> 8;
> > +	switch (gms) {
> > +	case 0x0 ... 0x10:
> > +		return gms * 32;
> > +	case 0x20:
> > +		return 1024;
> > +	case 0x30:
> > +		return 1536;
> > +	case 0x40:
> > +		return 2048;
> > +	case 0xf0 ... 0xfe:
> 
> just a bit puzzled by the fact that case ranges are not standard
> and are supported only by GCC, unless, of course, I miss
> something. Do we still want to use them as they are widely used
> around the kernel?

i915 already has 17 other uses of this notation and the DRM subsystem in
general has about 50 today.  So it's not super common, but I think it
should be okay to use.  I believe clang supports this language extension
as well and the coding style doc doesn't say anything one way or the
other.


Matt

> 
> Andi
> 
> > +		return (gms - 0xf0 + 1) * 4;
> > +	default:
> > +		return -EIO;
> > +	}
> > +}
Andi Shyti Sept. 20, 2022, 10:33 p.m. UTC | #7
Hi Matt, Lucas,

thanks for your feedback,

> > > +	switch (gms) {
> > > +	case 0x0 ... 0x10:
> > > +		return gms * 32;
> > > +	case 0x20:
> > > +		return 1024;
> > > +	case 0x30:
> > > +		return 1536;
> > > +	case 0x40:
> > > +		return 2048;
> > > +	case 0xf0 ... 0xfe:
> > 
> > just a bit puzzled by the fact that case ranges are not standard
> > and are supported only by GCC, unless, of course, I miss
> > something. Do we still want to use them as they are widely used
> > around the kernel?
> 
> i915 already has 17 other uses of this notation and the DRM subsystem in
> general has about 50 today.  So it's not super common, but I think it
> should be okay to use.  I believe clang supports this language extension
> as well and the coding style doc doesn't say anything one way or the
> other.

I thought clang supports it for C++ rather than C, but I'm not a
clang expert, so that I might be wrong.

The fact that it is widely used is never a convincing argument
and if gcc supports it, then it does it against the standard,
both C11 and C17 (this is what puzzles me everytime I see these C
tricks offered by gcc).

Anyway, I'm not against it (nor in favour) as the ellipsis is
becoming very commonly used. My comment, by the way, did not mean
to block the patch, I just wanted to check what other people
think about.

Thanks,
Andi
Iddamsetty, Aravind Sept. 21, 2022, 6:30 a.m. UTC | #8
replying here for earlier comments too.

On 21-09-2022 01:35, Lucas De Marchi wrote:
> On Tue, Sep 20, 2022 at 01:31:49AM -0700, Lucas De Marchi wrote:
>> On Tue, Sep 20, 2022 at 12:49:40PM +0530, Aravind Iddamsetty wrote:
>>> As an integrated GPU, MTL does not have local memory and
>>> HAS_LMEM() returns false.  However the platform's stolen memory
>>> is presented via BAR2 (i.e., the BAR we traditionally consider
>>> to be the LMEM BAR) and should be managed by the driver the same
>>> way that local memory is on dgpu platforms (which includes
>>> setting the "lmem" bit on page table entries).  We use the term
>>> "local stolen memory" to refer to this model.
>>>
>>> Cc: Matt Roper <matthew.d.roper@intel.com>
>>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>>>
>>> Signed-off-by: CQ Tang <cq.tang@intel.com>
>>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>>> Original-author: CQ Tang
>>> ---
>>> drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 113 +++++++++++++++++----
>>> drivers/gpu/drm/i915/gt/intel_ggtt.c       |   2 +-
>>> drivers/gpu/drm/i915/i915_drv.h            |   3 +
>>> 3 files changed, 100 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>> b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>> index acc561c0f0aa..bad5250fb764 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>> @@ -77,6 +77,19 @@ void i915_gem_stolen_remove_node(struct
>>> drm_i915_private *i915,
>>>     mutex_unlock(&i915->mm.stolen_lock);
>>> }
>>>
>>> +static bool is_dsm_invalid(struct drm_i915_private *i915, struct
>>> resource *dsm)
>>> +{
>>> +    if (!HAS_BAR2_SMEM_STOLEN(i915)) {
>>
>> I called a similar function as is_dsm_valid() in
>> https://patchwork.freedesktop.org/series/108620/
>>
>> sounds weird  with "invalid" and the double negation on return early
>> style.

sure, will change it hope i can use that from your patch.
>>
>>> +        if (dsm->start == 0)
>>> +            return true;
>>> +    }
>>> +
>>> +    if (dsm->end <= dsm->start)
>>> +        return true;
>>> +
>>> +    return false;
>>> +}
>>> +
>>> static int i915_adjust_stolen(struct drm_i915_private *i915,
>>>                   struct resource *dsm)
>>> {
>>> @@ -84,7 +97,7 @@ static int i915_adjust_stolen(struct
>>> drm_i915_private *i915,
>>>     struct intel_uncore *uncore = ggtt->vm.gt->uncore;
>>>     struct resource *r;
>>>
>>> -    if (dsm->start == 0 || dsm->end <= dsm->start)
>>> +    if (is_dsm_invalid(i915, dsm))
>>>         return -EINVAL;
>>>
>>>     /*
>>> @@ -136,7 +149,7 @@ static int i915_adjust_stolen(struct
>>> drm_i915_private *i915,
>>>      * overlaps with the non-stolen system memory range, since lmem
>>> is local
>>>      * to the gpu.
>>>      */
>>> -    if (HAS_LMEM(i915))
>>> +    if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915))
>>
>> comment above makes no sense when you add this.  For this specific case
>> it's still system memory, reserved by the BIOS and that we access by
>> mapping the lmembar

thanks for catching this.
>>
>>>         return 0;
>>>
>>>     /*
>>> @@ -371,8 +384,6 @@ static void icl_get_stolen_reserved(struct
>>> drm_i915_private *i915,
>>>
>>>     drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
>>>
>>> -    *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
>>> -
>>>     switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
>>>     case GEN8_STOLEN_RESERVED_1M:
>>>         *size = 1024 * 1024;
>>> @@ -390,6 +401,12 @@ static void icl_get_stolen_reserved(struct
>>> drm_i915_private *i915,
>>>         *size = 8 * 1024 * 1024;
>>>         MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
>>>     }
>>> +
>>> +    if ((GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) && !IS_DGFX(i915))
>>> +        /* the base is initialized to stolen top so subtract size to
>>> get base */
>>> +        *base -= *size;
>>
>> that doesn't necessarily holds true.  According to the spec the wopcm
>> base is 1MB aligned so even if it is "at the top", it may not mean it
>> is at the
>> very very top that we can just subtract the size.

well here the reserved_base is not to the top of the BAR, we got the
stolen size from GGC register so base is initialized to end of that
stolen size. hence we subtract the reserved size from it.

>>
>>
>>> +    else
>>> +        *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
>>> }
>>>
>>> static int i915_gem_init_stolen(struct intel_memory_region *mem)
>>> @@ -423,8 +440,7 @@ static int i915_gem_init_stolen(struct
>>> intel_memory_region *mem)
>>>     if (i915_adjust_stolen(i915, &i915->dsm))
>>>         return 0;
>>>
>>> -    GEM_BUG_ON(i915->dsm.start == 0);
>>> -    GEM_BUG_ON(i915->dsm.end <= i915->dsm.start);
>>> +    GEM_BUG_ON(is_dsm_invalid(i915, &i915->dsm));
>>>
>>>     stolen_top = i915->dsm.end + 1;
>>>     reserved_base = stolen_top;
>>> @@ -796,6 +812,46 @@ static const struct intel_memory_region_ops
>>> i915_region_stolen_lmem_ops = {
>>>     .init_object = _i915_gem_object_stolen_init,
>>> };
>>>
>>> +static int get_mtl_gms_size(struct intel_uncore *uncore)
>>> +{
>>> +    u16 ggc, gms;
>>> +
>>> +    ggc = intel_uncore_read16(uncore, _MMIO(0x108040));
>>
>> ??
>>
>>> +
>>> +    /* check GGMS, should be fixed 0x3 (8MB) */
>>> +    if ((ggc & 0xc0) != 0xc0)
>>> +        return -EIO;
>>> +
>>> +    /* return valid GMS value, -EIO if invalid */
>>> +    gms = ggc >> 8;
>>> +    switch (gms) {
>>> +    case 0x0 ... 0x10:
>>> +        return gms * 32;
>>> +    case 0x20:
>>> +        return 1024;
>>> +    case 0x30:
>>> +        return 1536;
>>> +    case 0x40:
>>> +        return 2048;
>>> +    case 0xf0 ... 0xfe:
>>> +        return (gms - 0xf0 + 1) * 4;
>>> +    default:
>>> +        return -EIO;
>>> +    }
>>> +}
>>> +
>>> +static inline bool lmembar_is_igpu_stolen(struct drm_i915_private
>>> *i915)
>>
>> doesn't deserve an inline. lmembar_is_igpu_stolen() doesn't make much
>> sense as the lmembar is never igpu stolen.... you probably mean
>> something else here
here the intent was to check the second BAR which is traditionally LMEM
is to be considered STOLEN, if the memory regions consists STOLEN_LMEM
and not LMEM. But I think i can rather replace this entire function and
just use HAS_LMEM check.
>>
>>> +{
>>> +    u32 regions = RUNTIME_INFO(i915)->memory_regions;
>>> +
>>> +    if (regions & REGION_LMEM)
>>> +        return false;
>>> +
>>> +    drm_WARN_ON(&i915->drm, (regions & REGION_STOLEN_LMEM) == 0);
>>> +
>>> +    return true;
>>> +}
>>> +
>>> struct intel_memory_region *
>>> i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
>>>                u16 instance)
>>> @@ -806,19 +862,16 @@ i915_gem_stolen_lmem_setup(struct
>>> drm_i915_private *i915, u16 type,
>>>     struct intel_memory_region *mem;
>>>     resource_size_t io_start, io_size;
>>>     resource_size_t min_page_size;
>>> +    int ret;
>>>
>>>     if (WARN_ON_ONCE(instance))
>>>         return ERR_PTR(-ENODEV);
>>>
>>> -    if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR))
>>> +    if (!i915_pci_resource_valid(pdev, GFXMEM_BAR))
>>
>> at least for MTL, Bspec 63830 still calls this lmembar. So the rename
>> for me is a net loss
that is right, will revert this.
>>
>>>         return ERR_PTR(-ENXIO);
>>>
>>> -    /* Use DSM base address instead for stolen memory */
>>> -    dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
>>> -    if (IS_DG1(uncore->i915)) {
>>> -        lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
>>> -        if (WARN_ON(lmem_size < dsm_base))
>>> -            return ERR_PTR(-ENODEV);
>>> +    if (lmembar_is_igpu_stolen(i915) || IS_DG1(i915)) {
>>> +        lmem_size = pci_resource_len(pdev, GFXMEM_BAR);
>>
>> this looks confusing, but apparently correct. For DG1 the stolen is
>> on top of lmem. For MTL, it's on the end of lmembar (256M). This works
>> because on DG1 aperture == lmem size.
>>
>>>     } else {
>>>         resource_size_t lmem_range;
>>>
>>> @@ -827,13 +880,39 @@ i915_gem_stolen_lmem_setup(struct
>>> drm_i915_private *i915, u16 type,
>>>         lmem_size *= SZ_1G;
>>>     }
>>>
>>> -    dsm_size = lmem_size - dsm_base;
>>> -    if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
>>> +    if (HAS_BAR2_SMEM_STOLEN(i915)) {
>>> +        /*
>>> +         * MTL dsm size is in GGC register, not the bar size.
>>
>> it's not exclusive to MTL. it has been there for ages and it was never
>> the BAR size like this comment says. Or at least it doesn't match the
>> else condition that is using the GEN12_DSMBASE register
>>
>>> +         * also MTL uses offset to DSMBASE in ptes, so i915
>>> +         * uses dsm_base = 0 to setup stolen region.
>>> +         */
>>> +        ret = get_mtl_gms_size(uncore);
>>> +        if (ret < 0) {
>>> +            drm_err(&i915->drm, "invalid MTL GGC register setting\n");
>>> +            return ERR_PTR(ret);
>>> +        }
>>> +
>>> +        dsm_base = 0;
>>
>> if we stop handling part of the values in the registers as relative to
>> the mapping and rather handle them as we read from the registers
>> (physical addresses), the size calculations should still match and we
>> shouldn't need all the if/else dance. If we pass the right io_start we
>> can then make them relative to the mapping by subtracting it, or if we
>> don't want GTT to be in the mapping we subtract it.
>>
>> That makes me wonder if choosing the i915_gem_stolen_lmem_setup() for
>> all of this is even the right choice given we are actually talking about
>> system memory that is mapped through the lmembar.
> 
> 
> another approach could be:  handle the several coding style issues,
> function names etc. Since this works for MTL, it's better than the
> hypothetical solution that is not written and may not work well. I may
> have missed something in my analysis. So I'd be fine to go with improved
> version of this patch, and I can add to my todo list to try to clean
> this up later.

Thanks for that.

> 
> Lucas De Marchi
> 
> 
>>
>>> +        dsm_size = (resource_size_t)(ret * SZ_1M);
>>> +
>>> +        GEM_BUG_ON(pci_resource_len(pdev, GFXMEM_BAR) != 256 * SZ_1M);
>>> +        GEM_BUG_ON((dsm_size + 8 * SZ_1M) > lmem_size);
>>> +    } else {
>>> +        /* Use DSM base address instead for stolen memory */
>>> +        dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
>>> +        if (WARN_ON(lmem_size < dsm_base))
>>> +            return ERR_PTR(-ENODEV);
>>> +        dsm_size = lmem_size - dsm_base;
>>> +    }
>>> +
>>> +    io_size = dsm_size;
>>> +    if (pci_resource_len(pdev, GFXMEM_BAR) < dsm_size) {
>>>         io_start = 0;
>>>         io_size = 0;
>>> +    } else if (HAS_BAR2_SMEM_STOLEN(i915)) {
>>> +        io_start = pci_resource_start(pdev, GFXMEM_BAR) + 8 * SZ_1M;
>>
>> should be the GGSM?
make sense can be derived from GGC registered.

Thanks,
Aravind.
>>
>>
>> Lucas De Marchi
>>
>>>     } else {
>>> -        io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + dsm_base;
>>> -        io_size = dsm_size;
>>> +        io_start = pci_resource_start(pdev, GFXMEM_BAR) + dsm_base;
>>>     }
>>>
>>>     min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>> index 30cf5c3369d9..b31fe0fb013f 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>> @@ -931,7 +931,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>>>     unsigned int size;
>>>     u16 snb_gmch_ctl;
>>>
>>> -    if (!HAS_LMEM(i915)) {
>>> +    if (!HAS_LMEM(i915) && !HAS_BAR2_SMEM_STOLEN(i915)) {
>>>         if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
>>>             return -ENXIO;
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 134fc1621821..ef3120322077 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -973,6 +973,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>>>
>>> #define HAS_ONE_EU_PER_FUSE_BIT(i915)   
>>> (INTEL_INFO(i915)->has_one_eu_per_fuse_bit)
>>>
>>> +#define HAS_BAR2_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \
>>> +                    GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>>> +
>>> /* intel_device_info.c */
>>> static inline struct intel_device_info *
>>> mkwrite_device_info(struct drm_i915_private *dev_priv)
>>> -- 
>>> 2.25.1
>>>
Iddamsetty, Aravind Sept. 21, 2022, 6:32 a.m. UTC | #9
On 20-09-2022 13:05, Jani Nikula wrote:
> On Tue, 20 Sep 2022, Aravind Iddamsetty <aravind.iddamsetty@intel.com> wrote:
>> As an integrated GPU, MTL does not have local memory and
>> HAS_LMEM() returns false.  However the platform's stolen memory
>> is presented via BAR2 (i.e., the BAR we traditionally consider
>> to be the LMEM BAR) and should be managed by the driver the same
>> way that local memory is on dgpu platforms (which includes
>> setting the "lmem" bit on page table entries).  We use the term
>> "local stolen memory" to refer to this model.
>>
>> Cc: Matt Roper <matthew.d.roper@intel.com>
>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>>
>> Signed-off-by: CQ Tang <cq.tang@intel.com>
>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>> Original-author: CQ Tang
>> ---
>>  drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 113 +++++++++++++++++----
>>  drivers/gpu/drm/i915/gt/intel_ggtt.c       |   2 +-
>>  drivers/gpu/drm/i915/i915_drv.h            |   3 +
>>  3 files changed, 100 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>> index acc561c0f0aa..bad5250fb764 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>> @@ -77,6 +77,19 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
>>  	mutex_unlock(&i915->mm.stolen_lock);
>>  }
>>  
>> +static bool is_dsm_invalid(struct drm_i915_private *i915, struct resource *dsm)
> 
> Abstracting this as a separate function looks like a separate patch.
> 
> I generally recommend using positive naming, "is dsm valid", to avoid
> any double negatives that might pop up, now or in the
> future. !is_dsm_invalid() gets slower for human brains to parse.
sure will change it.

Thanks,
Aravind.
> 
> BR,
> Jani.
> 
> 
>> +{
>> +	if (!HAS_BAR2_SMEM_STOLEN(i915)) {
>> +		if (dsm->start == 0)
>> +			return true;
>> +	}
>> +
>> +	if (dsm->end <= dsm->start)
>> +		return true;
>> +
>> +	return false;
>> +}
>> +
>>  static int i915_adjust_stolen(struct drm_i915_private *i915,
>>  			      struct resource *dsm)
>>  {
>> @@ -84,7 +97,7 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
>>  	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
>>  	struct resource *r;
>>  
>> -	if (dsm->start == 0 || dsm->end <= dsm->start)
>> +	if (is_dsm_invalid(i915, dsm))
>>  		return -EINVAL;
>>  
>>  	/*
>> @@ -136,7 +149,7 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
>>  	 * overlaps with the non-stolen system memory range, since lmem is local
>>  	 * to the gpu.
>>  	 */
>> -	if (HAS_LMEM(i915))
>> +	if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915))
>>  		return 0;
>>  
>>  	/*
>> @@ -371,8 +384,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
>>  
>>  	drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
>>  
>> -	*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
>> -
>>  	switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
>>  	case GEN8_STOLEN_RESERVED_1M:
>>  		*size = 1024 * 1024;
>> @@ -390,6 +401,12 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
>>  		*size = 8 * 1024 * 1024;
>>  		MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
>>  	}
>> +
>> +	if ((GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) && !IS_DGFX(i915))
>> +		/* the base is initialized to stolen top so subtract size to get base */
>> +		*base -= *size;
>> +	else
>> +		*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
>>  }
>>  
>>  static int i915_gem_init_stolen(struct intel_memory_region *mem)
>> @@ -423,8 +440,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
>>  	if (i915_adjust_stolen(i915, &i915->dsm))
>>  		return 0;
>>  
>> -	GEM_BUG_ON(i915->dsm.start == 0);
>> -	GEM_BUG_ON(i915->dsm.end <= i915->dsm.start);
>> +	GEM_BUG_ON(is_dsm_invalid(i915, &i915->dsm));
>>  
>>  	stolen_top = i915->dsm.end + 1;
>>  	reserved_base = stolen_top;
>> @@ -796,6 +812,46 @@ static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = {
>>  	.init_object = _i915_gem_object_stolen_init,
>>  };
>>  
>> +static int get_mtl_gms_size(struct intel_uncore *uncore)
>> +{
>> +	u16 ggc, gms;
>> +
>> +	ggc = intel_uncore_read16(uncore, _MMIO(0x108040));
>> +
>> +	/* check GGMS, should be fixed 0x3 (8MB) */
>> +	if ((ggc & 0xc0) != 0xc0)
>> +		return -EIO;
>> +
>> +	/* return valid GMS value, -EIO if invalid */
>> +	gms = ggc >> 8;
>> +	switch (gms) {
>> +	case 0x0 ... 0x10:
>> +		return gms * 32;
>> +	case 0x20:
>> +		return 1024;
>> +	case 0x30:
>> +		return 1536;
>> +	case 0x40:
>> +		return 2048;
>> +	case 0xf0 ... 0xfe:
>> +		return (gms - 0xf0 + 1) * 4;
>> +	default:
>> +		return -EIO;
>> +	}
>> +}
>> +
>> +static inline bool lmembar_is_igpu_stolen(struct drm_i915_private *i915)
>> +{
>> +	u32 regions = RUNTIME_INFO(i915)->memory_regions;
>> +
>> +	if (regions & REGION_LMEM)
>> +		return false;
>> +
>> +	drm_WARN_ON(&i915->drm, (regions & REGION_STOLEN_LMEM) == 0);
>> +
>> +	return true;
>> +}
>> +
>>  struct intel_memory_region *
>>  i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
>>  			   u16 instance)
>> @@ -806,19 +862,16 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
>>  	struct intel_memory_region *mem;
>>  	resource_size_t io_start, io_size;
>>  	resource_size_t min_page_size;
>> +	int ret;
>>  
>>  	if (WARN_ON_ONCE(instance))
>>  		return ERR_PTR(-ENODEV);
>>  
>> -	if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR))
>> +	if (!i915_pci_resource_valid(pdev, GFXMEM_BAR))
>>  		return ERR_PTR(-ENXIO);
>>  
>> -	/* Use DSM base address instead for stolen memory */
>> -	dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
>> -	if (IS_DG1(uncore->i915)) {
>> -		lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
>> -		if (WARN_ON(lmem_size < dsm_base))
>> -			return ERR_PTR(-ENODEV);
>> +	if (lmembar_is_igpu_stolen(i915) || IS_DG1(i915)) {
>> +		lmem_size = pci_resource_len(pdev, GFXMEM_BAR);
>>  	} else {
>>  		resource_size_t lmem_range;
>>  
>> @@ -827,13 +880,39 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
>>  		lmem_size *= SZ_1G;
>>  	}
>>  
>> -	dsm_size = lmem_size - dsm_base;
>> -	if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
>> +	if (HAS_BAR2_SMEM_STOLEN(i915)) {
>> +		/*
>> +		 * MTL dsm size is in GGC register, not the bar size.
>> +		 * also MTL uses offset to DSMBASE in ptes, so i915
>> +		 * uses dsm_base = 0 to setup stolen region.
>> +		 */
>> +		ret = get_mtl_gms_size(uncore);
>> +		if (ret < 0) {
>> +			drm_err(&i915->drm, "invalid MTL GGC register setting\n");
>> +			return ERR_PTR(ret);
>> +		}
>> +
>> +		dsm_base = 0;
>> +		dsm_size = (resource_size_t)(ret * SZ_1M);
>> +
>> +		GEM_BUG_ON(pci_resource_len(pdev, GFXMEM_BAR) != 256 * SZ_1M);
>> +		GEM_BUG_ON((dsm_size + 8 * SZ_1M) > lmem_size);
>> +	} else {
>> +		/* Use DSM base address instead for stolen memory */
>> +		dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
>> +		if (WARN_ON(lmem_size < dsm_base))
>> +			return ERR_PTR(-ENODEV);
>> +		dsm_size = lmem_size - dsm_base;
>> +	}
>> +
>> +	io_size = dsm_size;
>> +	if (pci_resource_len(pdev, GFXMEM_BAR) < dsm_size) {
>>  		io_start = 0;
>>  		io_size = 0;
>> +	} else if (HAS_BAR2_SMEM_STOLEN(i915)) {
>> +		io_start = pci_resource_start(pdev, GFXMEM_BAR) + 8 * SZ_1M;
>>  	} else {
>> -		io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + dsm_base;
>> -		io_size = dsm_size;
>> +		io_start = pci_resource_start(pdev, GFXMEM_BAR) + dsm_base;
>>  	}
>>  
>>  	min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> index 30cf5c3369d9..b31fe0fb013f 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> @@ -931,7 +931,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>>  	unsigned int size;
>>  	u16 snb_gmch_ctl;
>>  
>> -	if (!HAS_LMEM(i915)) {
>> +	if (!HAS_LMEM(i915) && !HAS_BAR2_SMEM_STOLEN(i915)) {
>>  		if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
>>  			return -ENXIO;
>>  
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 134fc1621821..ef3120322077 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -973,6 +973,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>>  
>>  #define HAS_ONE_EU_PER_FUSE_BIT(i915)	(INTEL_INFO(i915)->has_one_eu_per_fuse_bit)
>>  
>> +#define HAS_BAR2_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \
>> +				    GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>> +
>>  /* intel_device_info.c */
>>  static inline struct intel_device_info *
>>  mkwrite_device_info(struct drm_i915_private *dev_priv)
>
Lucas De Marchi Sept. 22, 2022, 1:56 p.m. UTC | #10
On Wed, Sep 21, 2022 at 12:00:38PM +0530, Iddamsetty, Aravind wrote:
>replying here for earlier comments too.
>
>On 21-09-2022 01:35, Lucas De Marchi wrote:
>> On Tue, Sep 20, 2022 at 01:31:49AM -0700, Lucas De Marchi wrote:
>>> On Tue, Sep 20, 2022 at 12:49:40PM +0530, Aravind Iddamsetty wrote:
>>>> As an integrated GPU, MTL does not have local memory and
>>>> HAS_LMEM() returns false.  However the platform's stolen memory
>>>> is presented via BAR2 (i.e., the BAR we traditionally consider
>>>> to be the LMEM BAR) and should be managed by the driver the same
>>>> way that local memory is on dgpu platforms (which includes
>>>> setting the "lmem" bit on page table entries).  We use the term
>>>> "local stolen memory" to refer to this model.
>>>>
>>>> Cc: Matt Roper <matthew.d.roper@intel.com>
>>>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>>>>
>>>> Signed-off-by: CQ Tang <cq.tang@intel.com>
>>>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>>>> Original-author: CQ Tang
>>>> ---
>>>> drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 113 +++++++++++++++++----
>>>> drivers/gpu/drm/i915/gt/intel_ggtt.c       |   2 +-
>>>> drivers/gpu/drm/i915/i915_drv.h            |   3 +
>>>> 3 files changed, 100 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>>> b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>>> index acc561c0f0aa..bad5250fb764 100644
>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>>> @@ -77,6 +77,19 @@ void i915_gem_stolen_remove_node(struct
>>>> drm_i915_private *i915,
>>>>     mutex_unlock(&i915->mm.stolen_lock);
>>>> }
>>>>
>>>> +static bool is_dsm_invalid(struct drm_i915_private *i915, struct
>>>> resource *dsm)
>>>> +{
>>>> +    if (!HAS_BAR2_SMEM_STOLEN(i915)) {
>>>
>>> I called a similar function as is_dsm_valid() in
>>> https://patchwork.freedesktop.org/series/108620/
>>>
>>> sounds weird  with "invalid" and the double negation on return early
>>> style.
>
>sure, will change it hope i can use that from your patch.

that patch is now pushed, so now you can reuse it.

Lucas De Marchi
Iddamsetty, Aravind Sept. 22, 2022, 3:56 p.m. UTC | #11
On 22-09-2022 19:26, Lucas De Marchi wrote:
> On Wed, Sep 21, 2022 at 12:00:38PM +0530, Iddamsetty, Aravind wrote:
>> replying here for earlier comments too.
>>
>> On 21-09-2022 01:35, Lucas De Marchi wrote:
>>> On Tue, Sep 20, 2022 at 01:31:49AM -0700, Lucas De Marchi wrote:
>>>> On Tue, Sep 20, 2022 at 12:49:40PM +0530, Aravind Iddamsetty wrote:
>>>>> As an integrated GPU, MTL does not have local memory and
>>>>> HAS_LMEM() returns false.  However the platform's stolen memory
>>>>> is presented via BAR2 (i.e., the BAR we traditionally consider
>>>>> to be the LMEM BAR) and should be managed by the driver the same
>>>>> way that local memory is on dgpu platforms (which includes
>>>>> setting the "lmem" bit on page table entries).  We use the term
>>>>> "local stolen memory" to refer to this model.
>>>>>
>>>>> Cc: Matt Roper <matthew.d.roper@intel.com>
>>>>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>>>>>
>>>>> Signed-off-by: CQ Tang <cq.tang@intel.com>
>>>>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>>>>> Original-author: CQ Tang
>>>>> ---
>>>>> drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 113 +++++++++++++++++----
>>>>> drivers/gpu/drm/i915/gt/intel_ggtt.c       |   2 +-
>>>>> drivers/gpu/drm/i915/i915_drv.h            |   3 +
>>>>> 3 files changed, 100 insertions(+), 18 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>>>> b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>>>> index acc561c0f0aa..bad5250fb764 100644
>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>>>> @@ -77,6 +77,19 @@ void i915_gem_stolen_remove_node(struct
>>>>> drm_i915_private *i915,
>>>>>     mutex_unlock(&i915->mm.stolen_lock);
>>>>> }
>>>>>
>>>>> +static bool is_dsm_invalid(struct drm_i915_private *i915, struct
>>>>> resource *dsm)
>>>>> +{
>>>>> +    if (!HAS_BAR2_SMEM_STOLEN(i915)) {
>>>>
>>>> I called a similar function as is_dsm_valid() in
>>>> https://patchwork.freedesktop.org/series/108620/
>>>>
>>>> sounds weird  with "invalid" and the double negation on return early
>>>> style.
>>
>> sure, will change it hope i can use that from your patch.
> 
> that patch is now pushed, so now you can reuse it.
Thanks for the info and help.

Aravind.
> 
> Lucas De Marchi
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index acc561c0f0aa..bad5250fb764 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -77,6 +77,19 @@  void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
 	mutex_unlock(&i915->mm.stolen_lock);
 }
 
+static bool is_dsm_invalid(struct drm_i915_private *i915, struct resource *dsm)
+{
+	if (!HAS_BAR2_SMEM_STOLEN(i915)) {
+		if (dsm->start == 0)
+			return true;
+	}
+
+	if (dsm->end <= dsm->start)
+		return true;
+
+	return false;
+}
+
 static int i915_adjust_stolen(struct drm_i915_private *i915,
 			      struct resource *dsm)
 {
@@ -84,7 +97,7 @@  static int i915_adjust_stolen(struct drm_i915_private *i915,
 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 	struct resource *r;
 
-	if (dsm->start == 0 || dsm->end <= dsm->start)
+	if (is_dsm_invalid(i915, dsm))
 		return -EINVAL;
 
 	/*
@@ -136,7 +149,7 @@  static int i915_adjust_stolen(struct drm_i915_private *i915,
 	 * overlaps with the non-stolen system memory range, since lmem is local
 	 * to the gpu.
 	 */
-	if (HAS_LMEM(i915))
+	if (HAS_LMEM(i915) || HAS_BAR2_SMEM_STOLEN(i915))
 		return 0;
 
 	/*
@@ -371,8 +384,6 @@  static void icl_get_stolen_reserved(struct drm_i915_private *i915,
 
 	drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
 
-	*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
-
 	switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
 	case GEN8_STOLEN_RESERVED_1M:
 		*size = 1024 * 1024;
@@ -390,6 +401,12 @@  static void icl_get_stolen_reserved(struct drm_i915_private *i915,
 		*size = 8 * 1024 * 1024;
 		MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
 	}
+
+	if ((GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) && !IS_DGFX(i915))
+		/* the base is initialized to stolen top so subtract size to get base */
+		*base -= *size;
+	else
+		*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
 }
 
 static int i915_gem_init_stolen(struct intel_memory_region *mem)
@@ -423,8 +440,7 @@  static int i915_gem_init_stolen(struct intel_memory_region *mem)
 	if (i915_adjust_stolen(i915, &i915->dsm))
 		return 0;
 
-	GEM_BUG_ON(i915->dsm.start == 0);
-	GEM_BUG_ON(i915->dsm.end <= i915->dsm.start);
+	GEM_BUG_ON(is_dsm_invalid(i915, &i915->dsm));
 
 	stolen_top = i915->dsm.end + 1;
 	reserved_base = stolen_top;
@@ -796,6 +812,46 @@  static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = {
 	.init_object = _i915_gem_object_stolen_init,
 };
 
+static int get_mtl_gms_size(struct intel_uncore *uncore)
+{
+	u16 ggc, gms;
+
+	ggc = intel_uncore_read16(uncore, _MMIO(0x108040));
+
+	/* check GGMS, should be fixed 0x3 (8MB) */
+	if ((ggc & 0xc0) != 0xc0)
+		return -EIO;
+
+	/* return valid GMS value, -EIO if invalid */
+	gms = ggc >> 8;
+	switch (gms) {
+	case 0x0 ... 0x10:
+		return gms * 32;
+	case 0x20:
+		return 1024;
+	case 0x30:
+		return 1536;
+	case 0x40:
+		return 2048;
+	case 0xf0 ... 0xfe:
+		return (gms - 0xf0 + 1) * 4;
+	default:
+		return -EIO;
+	}
+}
+
+static inline bool lmembar_is_igpu_stolen(struct drm_i915_private *i915)
+{
+	u32 regions = RUNTIME_INFO(i915)->memory_regions;
+
+	if (regions & REGION_LMEM)
+		return false;
+
+	drm_WARN_ON(&i915->drm, (regions & REGION_STOLEN_LMEM) == 0);
+
+	return true;
+}
+
 struct intel_memory_region *
 i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
 			   u16 instance)
@@ -806,19 +862,16 @@  i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
 	struct intel_memory_region *mem;
 	resource_size_t io_start, io_size;
 	resource_size_t min_page_size;
+	int ret;
 
 	if (WARN_ON_ONCE(instance))
 		return ERR_PTR(-ENODEV);
 
-	if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR))
+	if (!i915_pci_resource_valid(pdev, GFXMEM_BAR))
 		return ERR_PTR(-ENXIO);
 
-	/* Use DSM base address instead for stolen memory */
-	dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
-	if (IS_DG1(uncore->i915)) {
-		lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
-		if (WARN_ON(lmem_size < dsm_base))
-			return ERR_PTR(-ENODEV);
+	if (lmembar_is_igpu_stolen(i915) || IS_DG1(i915)) {
+		lmem_size = pci_resource_len(pdev, GFXMEM_BAR);
 	} else {
 		resource_size_t lmem_range;
 
@@ -827,13 +880,39 @@  i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
 		lmem_size *= SZ_1G;
 	}
 
-	dsm_size = lmem_size - dsm_base;
-	if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
+	if (HAS_BAR2_SMEM_STOLEN(i915)) {
+		/*
+		 * MTL dsm size is in GGC register, not the bar size.
+		 * also MTL uses offset to DSMBASE in ptes, so i915
+		 * uses dsm_base = 0 to setup stolen region.
+		 */
+		ret = get_mtl_gms_size(uncore);
+		if (ret < 0) {
+			drm_err(&i915->drm, "invalid MTL GGC register setting\n");
+			return ERR_PTR(ret);
+		}
+
+		dsm_base = 0;
+		dsm_size = (resource_size_t)(ret * SZ_1M);
+
+		GEM_BUG_ON(pci_resource_len(pdev, GFXMEM_BAR) != 256 * SZ_1M);
+		GEM_BUG_ON((dsm_size + 8 * SZ_1M) > lmem_size);
+	} else {
+		/* Use DSM base address instead for stolen memory */
+		dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
+		if (WARN_ON(lmem_size < dsm_base))
+			return ERR_PTR(-ENODEV);
+		dsm_size = lmem_size - dsm_base;
+	}
+
+	io_size = dsm_size;
+	if (pci_resource_len(pdev, GFXMEM_BAR) < dsm_size) {
 		io_start = 0;
 		io_size = 0;
+	} else if (HAS_BAR2_SMEM_STOLEN(i915)) {
+		io_start = pci_resource_start(pdev, GFXMEM_BAR) + 8 * SZ_1M;
 	} else {
-		io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + dsm_base;
-		io_size = dsm_size;
+		io_start = pci_resource_start(pdev, GFXMEM_BAR) + dsm_base;
 	}
 
 	min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 30cf5c3369d9..b31fe0fb013f 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -931,7 +931,7 @@  static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	unsigned int size;
 	u16 snb_gmch_ctl;
 
-	if (!HAS_LMEM(i915)) {
+	if (!HAS_LMEM(i915) && !HAS_BAR2_SMEM_STOLEN(i915)) {
 		if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
 			return -ENXIO;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 134fc1621821..ef3120322077 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -973,6 +973,9 @@  IS_SUBPLATFORM(const struct drm_i915_private *i915,
 
 #define HAS_ONE_EU_PER_FUSE_BIT(i915)	(INTEL_INFO(i915)->has_one_eu_per_fuse_bit)
 
+#define HAS_BAR2_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \
+				    GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+
 /* intel_device_info.c */
 static inline struct intel_device_info *
 mkwrite_device_info(struct drm_i915_private *dev_priv)