diff mbox series

[v3,10/14] drm/i915/uncore: Add GSI offset to uncore

Message ID 20220906234934.3655440-11-matthew.d.roper@intel.com (mailing list archive)
State New, archived
Headers show
Series i915: Add "standalone media" support for MTL | expand

Commit Message

Matt Roper Sept. 6, 2022, 11:49 p.m. UTC
GT non-engine registers (referred to as "GSI" registers by the spec)
have the same relative offsets on standalone media as they do on the
primary GT, just with an additional "GSI offset" added to their MMIO
address.  If we store this GSI offset in the standalone media's
intel_uncore structure, it can be automatically applied to all GSI reg
reads/writes that happen on that GT, allowing us to re-use our existing
GT code with minimal changes.

Forcewake and shadowed register tables for the media GT (which will be
added in a future patch) are listed as final addresses that already
include the GSI offset, so we also need to add the GSI offset before
doing lookups of registers in one of those tables.

Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt_types.h |  1 +
 drivers/gpu/drm/i915/intel_uncore.c      | 10 ++++++++--
 drivers/gpu/drm/i915/intel_uncore.h      | 22 ++++++++++++++++++++--
 3 files changed, 29 insertions(+), 4 deletions(-)

Comments

Daniele Ceraolo Spurio Sept. 8, 2022, 9:16 p.m. UTC | #1
On 9/6/2022 4:49 PM, Matt Roper wrote:
> GT non-engine registers (referred to as "GSI" registers by the spec)
> have the same relative offsets on standalone media as they do on the
> primary GT, just with an additional "GSI offset" added to their MMIO
> address.  If we store this GSI offset in the standalone media's
> intel_uncore structure, it can be automatically applied to all GSI reg
> reads/writes that happen on that GT, allowing us to re-use our existing
> GT code with minimal changes.
>
> Forcewake and shadowed register tables for the media GT (which will be
> added in a future patch) are listed as final addresses that already
> include the GSI offset, so we also need to add the GSI offset before
> doing lookups of registers in one of those tables.
>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_gt_types.h |  1 +
>   drivers/gpu/drm/i915/intel_uncore.c      | 10 ++++++++--
>   drivers/gpu/drm/i915/intel_uncore.h      | 22 ++++++++++++++++++++--
>   3 files changed, 29 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index 0e139f7d75ed..82dc28643572 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -274,6 +274,7 @@ struct intel_gt_definition {
>   	enum intel_gt_type type;
>   	char *name;
>   	u32 mapping_base;
> +	u32 gsi_offset;
>   	intel_engine_mask_t engine_mask;
>   };
>   
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 452b3a31e965..5cd423c7b646 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -928,6 +928,9 @@ find_fw_domain(struct intel_uncore *uncore, u32 offset)
>   {
>   	const struct intel_forcewake_range *entry;
>   
> +	if (IS_GSI_REG(offset))
> +		offset += uncore->gsi_offset;
> +
>   	entry = BSEARCH(offset,
>   			uncore->fw_domains_table,
>   			uncore->fw_domains_table_entries,
> @@ -1143,6 +1146,9 @@ static bool is_shadowed(struct intel_uncore *uncore, u32 offset)
>   	if (drm_WARN_ON(&uncore->i915->drm, !uncore->shadowed_reg_table))
>   		return false;
>   
> +	if (IS_GSI_REG(offset))
> +		offset += uncore->gsi_offset;
> +
>   	return BSEARCH(offset,
>   		       uncore->shadowed_reg_table,
>   		       uncore->shadowed_reg_table_entries,
> @@ -1995,8 +2001,8 @@ static int __fw_domain_init(struct intel_uncore *uncore,
>   
>   	d->uncore = uncore;
>   	d->wake_count = 0;
> -	d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set);
> -	d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack);
> +	d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set) + uncore->gsi_offset;
> +	d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack) + uncore->gsi_offset;
>   
>   	d->id = domain_id;
>   
> diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
> index 4acb78a03233..7f1d7903a8f3 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.h
> +++ b/drivers/gpu/drm/i915/intel_uncore.h
> @@ -136,6 +136,16 @@ struct intel_uncore {
>   
>   	spinlock_t lock; /** lock is also taken in irq contexts. */
>   
> +	/*
> +	 * Do we need to apply an additional offset to reach the beginning
> +	 * of the basic non-engine GT registers (referred to as "GSI" on
> +	 * newer platforms, or "GT block" on older platforms)?  If so, we'll
> +	 * track that here and apply it transparently to registers in the
> +	 * appropriate range to maintain compatibility with our existing
> +	 * register definitions and GT code.
> +	 */
> +	u32 gsi_offset;
> +
>   	unsigned int flags;
>   #define UNCORE_HAS_FORCEWAKE		BIT(0)
>   #define UNCORE_HAS_FPGA_DBG_UNCLAIMED	BIT(1)
> @@ -294,19 +304,27 @@ intel_wait_for_register_fw(struct intel_uncore *uncore,
>   					    2, timeout_ms, NULL);
>   }
>   
> +#define IS_GSI_REG(reg) ((reg) < 0x40000)
> +
>   /* register access functions */
>   #define __raw_read(x__, s__) \
>   static inline u##x__ __raw_uncore_read##x__(const struct intel_uncore *uncore, \
>   					    i915_reg_t reg) \
>   { \
> -	return read##s__(uncore->regs + i915_mmio_reg_offset(reg)); \
> +	u32 offset = i915_mmio_reg_offset(reg); \
> +	if (IS_GSI_REG(offset)) \
> +		offset += uncore->gsi_offset; \
> +	return read##s__(uncore->regs + offset); \
>   }
>   
>   #define __raw_write(x__, s__) \
>   static inline void __raw_uncore_write##x__(const struct intel_uncore *uncore, \
>   					   i915_reg_t reg, u##x__ val) \
>   { \
> -	write##s__(val, uncore->regs + i915_mmio_reg_offset(reg)); \
> +	u32 offset = i915_mmio_reg_offset(reg); \
> +	if (IS_GSI_REG(offset)) \
> +		offset += uncore->gsi_offset; \
> +	write##s__(val, uncore->regs + offset); \
>   }
>   __raw_read(8, b)
>   __raw_read(16, w)


Do we also need to update the raw_reg_<read/write> macros at the bottom 
of this file? Those seem to currently only be used for irq regs (which 
are outside the GSI range), so not structly required, but if we decide 
to not update them we should at least add a comment.

I'm also not sure why those macros exist to begin with as they're 
basically equivalent to __raw_uncore_<read/write>32, with the only 
difference being that uncore->regs is de-referenced outside the macro.

Daniele
Matt Roper Sept. 8, 2022, 10:29 p.m. UTC | #2
On Thu, Sep 08, 2022 at 02:16:27PM -0700, Ceraolo Spurio, Daniele wrote:
> 
> 
> On 9/6/2022 4:49 PM, Matt Roper wrote:
> > GT non-engine registers (referred to as "GSI" registers by the spec)
> > have the same relative offsets on standalone media as they do on the
> > primary GT, just with an additional "GSI offset" added to their MMIO
> > address.  If we store this GSI offset in the standalone media's
> > intel_uncore structure, it can be automatically applied to all GSI reg
> > reads/writes that happen on that GT, allowing us to re-use our existing
> > GT code with minimal changes.
> > 
> > Forcewake and shadowed register tables for the media GT (which will be
> > added in a future patch) are listed as final addresses that already
> > include the GSI offset, so we also need to add the GSI offset before
> > doing lookups of registers in one of those tables.
> > 
> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_gt_types.h |  1 +
> >   drivers/gpu/drm/i915/intel_uncore.c      | 10 ++++++++--
> >   drivers/gpu/drm/i915/intel_uncore.h      | 22 ++++++++++++++++++++--
> >   3 files changed, 29 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > index 0e139f7d75ed..82dc28643572 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > @@ -274,6 +274,7 @@ struct intel_gt_definition {
> >   	enum intel_gt_type type;
> >   	char *name;
> >   	u32 mapping_base;
> > +	u32 gsi_offset;
> >   	intel_engine_mask_t engine_mask;
> >   };
> > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> > index 452b3a31e965..5cd423c7b646 100644
> > --- a/drivers/gpu/drm/i915/intel_uncore.c
> > +++ b/drivers/gpu/drm/i915/intel_uncore.c
> > @@ -928,6 +928,9 @@ find_fw_domain(struct intel_uncore *uncore, u32 offset)
> >   {
> >   	const struct intel_forcewake_range *entry;
> > +	if (IS_GSI_REG(offset))
> > +		offset += uncore->gsi_offset;
> > +
> >   	entry = BSEARCH(offset,
> >   			uncore->fw_domains_table,
> >   			uncore->fw_domains_table_entries,
> > @@ -1143,6 +1146,9 @@ static bool is_shadowed(struct intel_uncore *uncore, u32 offset)
> >   	if (drm_WARN_ON(&uncore->i915->drm, !uncore->shadowed_reg_table))
> >   		return false;
> > +	if (IS_GSI_REG(offset))
> > +		offset += uncore->gsi_offset;
> > +
> >   	return BSEARCH(offset,
> >   		       uncore->shadowed_reg_table,
> >   		       uncore->shadowed_reg_table_entries,
> > @@ -1995,8 +2001,8 @@ static int __fw_domain_init(struct intel_uncore *uncore,
> >   	d->uncore = uncore;
> >   	d->wake_count = 0;
> > -	d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set);
> > -	d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack);
> > +	d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set) + uncore->gsi_offset;
> > +	d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack) + uncore->gsi_offset;
> >   	d->id = domain_id;
> > diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
> > index 4acb78a03233..7f1d7903a8f3 100644
> > --- a/drivers/gpu/drm/i915/intel_uncore.h
> > +++ b/drivers/gpu/drm/i915/intel_uncore.h
> > @@ -136,6 +136,16 @@ struct intel_uncore {
> >   	spinlock_t lock; /** lock is also taken in irq contexts. */
> > +	/*
> > +	 * Do we need to apply an additional offset to reach the beginning
> > +	 * of the basic non-engine GT registers (referred to as "GSI" on
> > +	 * newer platforms, or "GT block" on older platforms)?  If so, we'll
> > +	 * track that here and apply it transparently to registers in the
> > +	 * appropriate range to maintain compatibility with our existing
> > +	 * register definitions and GT code.
> > +	 */
> > +	u32 gsi_offset;
> > +
> >   	unsigned int flags;
> >   #define UNCORE_HAS_FORCEWAKE		BIT(0)
> >   #define UNCORE_HAS_FPGA_DBG_UNCLAIMED	BIT(1)
> > @@ -294,19 +304,27 @@ intel_wait_for_register_fw(struct intel_uncore *uncore,
> >   					    2, timeout_ms, NULL);
> >   }
> > +#define IS_GSI_REG(reg) ((reg) < 0x40000)
> > +
> >   /* register access functions */
> >   #define __raw_read(x__, s__) \
> >   static inline u##x__ __raw_uncore_read##x__(const struct intel_uncore *uncore, \
> >   					    i915_reg_t reg) \
> >   { \
> > -	return read##s__(uncore->regs + i915_mmio_reg_offset(reg)); \
> > +	u32 offset = i915_mmio_reg_offset(reg); \
> > +	if (IS_GSI_REG(offset)) \
> > +		offset += uncore->gsi_offset; \
> > +	return read##s__(uncore->regs + offset); \
> >   }
> >   #define __raw_write(x__, s__) \
> >   static inline void __raw_uncore_write##x__(const struct intel_uncore *uncore, \
> >   					   i915_reg_t reg, u##x__ val) \
> >   { \
> > -	write##s__(val, uncore->regs + i915_mmio_reg_offset(reg)); \
> > +	u32 offset = i915_mmio_reg_offset(reg); \
> > +	if (IS_GSI_REG(offset)) \
> > +		offset += uncore->gsi_offset; \
> > +	write##s__(val, uncore->regs + offset); \
> >   }
> >   __raw_read(8, b)
> >   __raw_read(16, w)
> 
> 
> Do we also need to update the raw_reg_<read/write> macros at the bottom of
> this file? Those seem to currently only be used for irq regs (which are
> outside the GSI range), so not structly required, but if we decide to not
> update them we should at least add a comment.
> 
> I'm also not sure why those macros exist to begin with as they're basically
> equivalent to __raw_uncore_<read/write>32, with the only difference being
> that uncore->regs is de-referenced outside the macro.

Good point; I forgot those existed.  According to git history, the original
justification when they were introduced was that the compiler wasn't caching
uncore->regs inside a register, so it resulted in unnecessary extra load
instructions in the interrupt handlers.  I'm not sure how important that
micro-optimization is, but I replaced them all as an experiment to see if the
claim still holds true with recent compilers and it seems it does:

	$ ./scripts/bloat-o-meter i915.ko.{orig,new}
	add/remove: 0/0 grow/shrink: 8/3 up/down: 275/-121 (154)
	Function                                     old     new   delta
	gen11_gt_engine_identity                     210     280     +70
	dg1_irq_handler                              279     329     +50
	gen11_irq_handler                            208     247     +39
	gen8_gt_irq_handler                          363     392     +29
	intel_irq_reset                             1225    1252     +27
	gen11_gt_reset_one_iir                       143     170     +27
	gen8_irq_handler                             166     185     +19
	gen11_display_irq_handler                     69      83     +14
	ilk_irq_handler.cold                          72      63      -9
	gen11_gt_irq_handler                         756     736     -20
	ilk_irq_handler                             2663    2571     -92
	Total: Before=2481127, After=2481281, chg +0.01%

So given that these macros are only used in places where we don't expect
to be operating on GSI registers, and the goal was to avoid unnecessary
instructions, I think adding a comment is probably the way to go.  If
there's ever a need to use these on GSI registers in the future, then it
will be the caller's responsibility to take care of adding the GSI
offset to the 'base' parameter.

I'll send an updated version that includes a comment.


Matt

> 
> Daniele
> 
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 0e139f7d75ed..82dc28643572 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -274,6 +274,7 @@  struct intel_gt_definition {
 	enum intel_gt_type type;
 	char *name;
 	u32 mapping_base;
+	u32 gsi_offset;
 	intel_engine_mask_t engine_mask;
 };
 
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 452b3a31e965..5cd423c7b646 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -928,6 +928,9 @@  find_fw_domain(struct intel_uncore *uncore, u32 offset)
 {
 	const struct intel_forcewake_range *entry;
 
+	if (IS_GSI_REG(offset))
+		offset += uncore->gsi_offset;
+
 	entry = BSEARCH(offset,
 			uncore->fw_domains_table,
 			uncore->fw_domains_table_entries,
@@ -1143,6 +1146,9 @@  static bool is_shadowed(struct intel_uncore *uncore, u32 offset)
 	if (drm_WARN_ON(&uncore->i915->drm, !uncore->shadowed_reg_table))
 		return false;
 
+	if (IS_GSI_REG(offset))
+		offset += uncore->gsi_offset;
+
 	return BSEARCH(offset,
 		       uncore->shadowed_reg_table,
 		       uncore->shadowed_reg_table_entries,
@@ -1995,8 +2001,8 @@  static int __fw_domain_init(struct intel_uncore *uncore,
 
 	d->uncore = uncore;
 	d->wake_count = 0;
-	d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set);
-	d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack);
+	d->reg_set = uncore->regs + i915_mmio_reg_offset(reg_set) + uncore->gsi_offset;
+	d->reg_ack = uncore->regs + i915_mmio_reg_offset(reg_ack) + uncore->gsi_offset;
 
 	d->id = domain_id;
 
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index 4acb78a03233..7f1d7903a8f3 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -136,6 +136,16 @@  struct intel_uncore {
 
 	spinlock_t lock; /** lock is also taken in irq contexts. */
 
+	/*
+	 * Do we need to apply an additional offset to reach the beginning
+	 * of the basic non-engine GT registers (referred to as "GSI" on
+	 * newer platforms, or "GT block" on older platforms)?  If so, we'll
+	 * track that here and apply it transparently to registers in the
+	 * appropriate range to maintain compatibility with our existing
+	 * register definitions and GT code.
+	 */
+	u32 gsi_offset;
+
 	unsigned int flags;
 #define UNCORE_HAS_FORCEWAKE		BIT(0)
 #define UNCORE_HAS_FPGA_DBG_UNCLAIMED	BIT(1)
@@ -294,19 +304,27 @@  intel_wait_for_register_fw(struct intel_uncore *uncore,
 					    2, timeout_ms, NULL);
 }
 
+#define IS_GSI_REG(reg) ((reg) < 0x40000)
+
 /* register access functions */
 #define __raw_read(x__, s__) \
 static inline u##x__ __raw_uncore_read##x__(const struct intel_uncore *uncore, \
 					    i915_reg_t reg) \
 { \
-	return read##s__(uncore->regs + i915_mmio_reg_offset(reg)); \
+	u32 offset = i915_mmio_reg_offset(reg); \
+	if (IS_GSI_REG(offset)) \
+		offset += uncore->gsi_offset; \
+	return read##s__(uncore->regs + offset); \
 }
 
 #define __raw_write(x__, s__) \
 static inline void __raw_uncore_write##x__(const struct intel_uncore *uncore, \
 					   i915_reg_t reg, u##x__ val) \
 { \
-	write##s__(val, uncore->regs + i915_mmio_reg_offset(reg)); \
+	u32 offset = i915_mmio_reg_offset(reg); \
+	if (IS_GSI_REG(offset)) \
+		offset += uncore->gsi_offset; \
+	write##s__(val, uncore->regs + offset); \
 }
 __raw_read(8, b)
 __raw_read(16, w)