diff mbox series

[RFC] drm/panfrost: Handle IDVS_GROUP_SIZE feature

Message ID 20220109171254.3183-1-alyssa.rosenzweig@collabora.com (mailing list archive)
State New, archived
Headers show
Series [RFC] drm/panfrost: Handle IDVS_GROUP_SIZE feature | expand

Commit Message

Alyssa Rosenzweig Jan. 9, 2022, 5:12 p.m. UTC
The IDVS group size feature was missing. It is used on some Bifrost and
Valhall GPUs, and is the last kernel-relevant Bifrost feature we're
missing.

This feature adds an extra IDVS group size field to the JM_CONFIG
register. In kbase, the value is configurable via the device tree; kbase
uses 0xF as a default if no value is specified. Until we find a device
demanding otherwise, let's always set the 0xF default on devices which
support this feature mimicking kbase's behaviour.

As JM_CONFIG is an undocumented register, it's not clear to me what
happens if we fail to include this handling. Index-driven vertex shading
already works on Bifrost boards with this feature without this handling.
Perhaps this has performance implications? Patch untested for the
moment, wanted to give Steven a chance to comment.

Applies on top of my feature clean up series which should go in first.
(That's pure cleaunp, this is a behaviour change RFC needing
discussion.)

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
---
 drivers/gpu/drm/panfrost/panfrost_features.h | 3 +++
 drivers/gpu/drm/panfrost/panfrost_gpu.c      | 3 +++
 drivers/gpu/drm/panfrost/panfrost_regs.h     | 1 +
 3 files changed, 7 insertions(+)

Comments

Alyssa Rosenzweig Jan. 9, 2022, 5:20 p.m. UTC | #1
kbase dt-bindings say that tasks are sent to cores in groups of N + 1,
where N is the value here. So our old behaviour sends tasks in groups of
1; the new behaviour sends tasks in groups of 16. I assume this has
performance implications but no conformance implications.

Searching GitHub, I can't find any device trees that set
idvs-group-size out of the many random Android forks people have
uploaded, so I don't think this will matter for any production device.
(Was this a workaround for preproduction silicon? or FPGAs? or was this
an option for the sake of having an option?)

On Sun, Jan 09, 2022 at 12:12:54PM -0500, Alyssa Rosenzweig wrote:
> The IDVS group size feature was missing. It is used on some Bifrost and
> Valhall GPUs, and is the last kernel-relevant Bifrost feature we're
> missing.
> 
> This feature adds an extra IDVS group size field to the JM_CONFIG
> register. In kbase, the value is configurable via the device tree; kbase
> uses 0xF as a default if no value is specified. Until we find a device
> demanding otherwise, let's always set the 0xF default on devices which
> support this feature mimicking kbase's behaviour.
> 
> As JM_CONFIG is an undocumented register, it's not clear to me what
> happens if we fail to include this handling. Index-driven vertex shading
> already works on Bifrost boards with this feature without this handling.
> Perhaps this has performance implications? Patch untested for the
> moment, wanted to give Steven a chance to comment.
> 
> Applies on top of my feature clean up series which should go in first.
> (That's pure cleaunp, this is a behaviour change RFC needing
> discussion.)
> 
> Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
> ---
>  drivers/gpu/drm/panfrost/panfrost_features.h | 3 +++
>  drivers/gpu/drm/panfrost/panfrost_gpu.c      | 3 +++
>  drivers/gpu/drm/panfrost/panfrost_regs.h     | 1 +
>  3 files changed, 7 insertions(+)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_features.h b/drivers/gpu/drm/panfrost/panfrost_features.h
> index 34f2bae1ec8c..36fadcf9634e 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_features.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_features.h
> @@ -20,6 +20,7 @@ enum panfrost_hw_feature {
>  	HW_FEATURE_AARCH64_MMU,
>  	HW_FEATURE_TLS_HASHING,
>  	HW_FEATURE_THREAD_GROUP_SPLIT,
> +	HW_FEATURE_IDVS_GROUP_SIZE,
>  	HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
>  };
>  
> @@ -74,6 +75,7 @@ enum panfrost_hw_feature {
>  	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
>  	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
>  	BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \
> +	BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \
>  	BIT_ULL(HW_FEATURE_COHERENCY_REG))
>  
>  #define hw_features_g76 (\
> @@ -87,6 +89,7 @@ enum panfrost_hw_feature {
>  	BIT_ULL(HW_FEATURE_COHERENCY_REG) | \
>  	BIT_ULL(HW_FEATURE_AARCH64_MMU) | \
>  	BIT_ULL(HW_FEATURE_TLS_HASHING) | \
> +	BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \
>  	BIT_ULL(HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG))
>  
>  #define hw_features_g31 (\
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
> index bbe628b306ee..50c8922694d7 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
> @@ -145,6 +145,9 @@ static void panfrost_gpu_init_quirks(struct panfrost_device *pfdev)
>  		quirks |= (COHERENCY_ACE_LITE | COHERENCY_ACE) <<
>  			   JM_FORCE_COHERENCY_FEATURES_SHIFT;
>  
> +	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_IDVS_GROUP_SIZE))
> +		quirks |= JM_DEFAULT_IDVS_GROUP_SIZE << JM_IDVS_GROUP_SIZE_SHIFT;
> +
>  	if (quirks)
>  		gpu_write(pfdev, GPU_JM_CONFIG, quirks);
>  
> diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h
> index 6c5a11ef1ee8..16e776cc82ea 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_regs.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
> @@ -208,6 +208,7 @@
>  #define JM_MAX_JOB_THROTTLE_LIMIT	0x3F
>  #define JM_FORCE_COHERENCY_FEATURES_SHIFT 2
>  #define JM_IDVS_GROUP_SIZE_SHIFT	16
> +#define JM_DEFAULT_IDVS_GROUP_SIZE	0xF
>  #define JM_MAX_IDVS_GROUP_SIZE		0x3F
>  
>  
> -- 
> 2.34.1
>
Steven Price Jan. 10, 2022, 5:10 p.m. UTC | #2
On 09/01/2022 17:12, Alyssa Rosenzweig wrote:
> The IDVS group size feature was missing. It is used on some Bifrost and
> Valhall GPUs, and is the last kernel-relevant Bifrost feature we're
> missing.
> 
> This feature adds an extra IDVS group size field to the JM_CONFIG
> register. In kbase, the value is configurable via the device tree; kbase
> uses 0xF as a default if no value is specified. Until we find a device
> demanding otherwise, let's always set the 0xF default on devices which
> support this feature mimicking kbase's behaviour.

This is a performance thing - so I don't think it will break anything if
this is wrong, it just won't be optimal.

> As JM_CONFIG is an undocumented register, it's not clear to me what
> happens if we fail to include this handling. Index-driven vertex shading
> already works on Bifrost boards with this feature without this handling.
> Perhaps this has performance implications? Patch untested for the
> moment, wanted to give Steven a chance to comment.

As it's a performance thing you shouldn't see correctness issues with
not setting it. But 0xF seems to have been chosen as it gave the best
overall performance (although for individual test content this can
vary). AFAICT the performance impact isn't massive either.

> Applies on top of my feature clean up series which should go in first.
> (That's pure cleaunp, this is a behaviour change RFC needing
> discussion.)
> 
> Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>

Reviewed-by: Steven Price <steven.price@arm.com>

Since you've tagged this RFC I won't merge it now, but it looks correct
to me.

Thanks,

Steve

> ---
>  drivers/gpu/drm/panfrost/panfrost_features.h | 3 +++
>  drivers/gpu/drm/panfrost/panfrost_gpu.c      | 3 +++
>  drivers/gpu/drm/panfrost/panfrost_regs.h     | 1 +
>  3 files changed, 7 insertions(+)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_features.h b/drivers/gpu/drm/panfrost/panfrost_features.h
> index 34f2bae1ec8c..36fadcf9634e 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_features.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_features.h
> @@ -20,6 +20,7 @@ enum panfrost_hw_feature {
>  	HW_FEATURE_AARCH64_MMU,
>  	HW_FEATURE_TLS_HASHING,
>  	HW_FEATURE_THREAD_GROUP_SPLIT,
> +	HW_FEATURE_IDVS_GROUP_SIZE,
>  	HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
>  };
>  
> @@ -74,6 +75,7 @@ enum panfrost_hw_feature {
>  	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
>  	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
>  	BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \
> +	BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \
>  	BIT_ULL(HW_FEATURE_COHERENCY_REG))
>  
>  #define hw_features_g76 (\
> @@ -87,6 +89,7 @@ enum panfrost_hw_feature {
>  	BIT_ULL(HW_FEATURE_COHERENCY_REG) | \
>  	BIT_ULL(HW_FEATURE_AARCH64_MMU) | \
>  	BIT_ULL(HW_FEATURE_TLS_HASHING) | \
> +	BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \
>  	BIT_ULL(HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG))
>  
>  #define hw_features_g31 (\
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
> index bbe628b306ee..50c8922694d7 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
> @@ -145,6 +145,9 @@ static void panfrost_gpu_init_quirks(struct panfrost_device *pfdev)
>  		quirks |= (COHERENCY_ACE_LITE | COHERENCY_ACE) <<
>  			   JM_FORCE_COHERENCY_FEATURES_SHIFT;
>  
> +	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_IDVS_GROUP_SIZE))
> +		quirks |= JM_DEFAULT_IDVS_GROUP_SIZE << JM_IDVS_GROUP_SIZE_SHIFT;
> +
>  	if (quirks)
>  		gpu_write(pfdev, GPU_JM_CONFIG, quirks);
>  
> diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h
> index 6c5a11ef1ee8..16e776cc82ea 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_regs.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
> @@ -208,6 +208,7 @@
>  #define JM_MAX_JOB_THROTTLE_LIMIT	0x3F
>  #define JM_FORCE_COHERENCY_FEATURES_SHIFT 2
>  #define JM_IDVS_GROUP_SIZE_SHIFT	16
> +#define JM_DEFAULT_IDVS_GROUP_SIZE	0xF
>  #define JM_MAX_IDVS_GROUP_SIZE		0x3F
>  
>  
>
Alyssa Rosenzweig Jan. 10, 2022, 5:33 p.m. UTC | #3
> > This feature adds an extra IDVS group size field to the JM_CONFIG
> > register. In kbase, the value is configurable via the device tree; kbase
> > uses 0xF as a default if no value is specified. Until we find a device
> > demanding otherwise, let's always set the 0xF default on devices which
> > support this feature mimicking kbase's behaviour.
> 
> This is a performance thing - so I don't think it will break anything if
> this is wrong, it just won't be optimal.

Then interpret my remarks as hardcoding the default until we find a
device where setting to something other than 0xF improves performance
nontrivially. (Read: I am lazy and do not want to write dt-bindings for
something nobody will ever use.)

> > As JM_CONFIG is an undocumented register, it's not clear to me what
> > happens if we fail to include this handling. Index-driven vertex shading
> > already works on Bifrost boards with this feature without this handling.
> > Perhaps this has performance implications? Patch untested for the
> > moment, wanted to give Steven a chance to comment.
> 
> As it's a performance thing you shouldn't see correctness issues with
> not setting it. But 0xF seems to have been chosen as it gave the best
> overall performance (although for individual test content this can
> vary). AFAICT the performance impact isn't massive either.

Good to know, will update the commit message accordingly.

> Reviewed-by: Steven Price <steven.price@arm.com>
> 
> Since you've tagged this RFC I won't merge it now, but it looks correct
> to me.

Thanks for the review... I hope you like reviewing Panfrost patches
because I have a Valhall bring-up series waiting o:)

When I get a chance to uprev the kernel on my G52 board I'll see if I
can benchmark the impact of this change, so far this is only
compile-tested. Even if there's no impact the patch should likely go in
to stay consistent with kbase, but hopefully there's a win from this. At
that point I'll send a v2 with your reviewed-by (and hopefully no
changes other than the commit message) and we'll land that.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/panfrost/panfrost_features.h b/drivers/gpu/drm/panfrost/panfrost_features.h
index 34f2bae1ec8c..36fadcf9634e 100644
--- a/drivers/gpu/drm/panfrost/panfrost_features.h
+++ b/drivers/gpu/drm/panfrost/panfrost_features.h
@@ -20,6 +20,7 @@  enum panfrost_hw_feature {
 	HW_FEATURE_AARCH64_MMU,
 	HW_FEATURE_TLS_HASHING,
 	HW_FEATURE_THREAD_GROUP_SPLIT,
+	HW_FEATURE_IDVS_GROUP_SIZE,
 	HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
 };
 
@@ -74,6 +75,7 @@  enum panfrost_hw_feature {
 	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
 	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
 	BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \
+	BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \
 	BIT_ULL(HW_FEATURE_COHERENCY_REG))
 
 #define hw_features_g76 (\
@@ -87,6 +89,7 @@  enum panfrost_hw_feature {
 	BIT_ULL(HW_FEATURE_COHERENCY_REG) | \
 	BIT_ULL(HW_FEATURE_AARCH64_MMU) | \
 	BIT_ULL(HW_FEATURE_TLS_HASHING) | \
+	BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \
 	BIT_ULL(HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG))
 
 #define hw_features_g31 (\
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index bbe628b306ee..50c8922694d7 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -145,6 +145,9 @@  static void panfrost_gpu_init_quirks(struct panfrost_device *pfdev)
 		quirks |= (COHERENCY_ACE_LITE | COHERENCY_ACE) <<
 			   JM_FORCE_COHERENCY_FEATURES_SHIFT;
 
+	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_IDVS_GROUP_SIZE))
+		quirks |= JM_DEFAULT_IDVS_GROUP_SIZE << JM_IDVS_GROUP_SIZE_SHIFT;
+
 	if (quirks)
 		gpu_write(pfdev, GPU_JM_CONFIG, quirks);
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h
index 6c5a11ef1ee8..16e776cc82ea 100644
--- a/drivers/gpu/drm/panfrost/panfrost_regs.h
+++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
@@ -208,6 +208,7 @@ 
 #define JM_MAX_JOB_THROTTLE_LIMIT	0x3F
 #define JM_FORCE_COHERENCY_FEATURES_SHIFT 2
 #define JM_IDVS_GROUP_SIZE_SHIFT	16
+#define JM_DEFAULT_IDVS_GROUP_SIZE	0xF
 #define JM_MAX_IDVS_GROUP_SIZE		0x3F