diff mbox series

[2/2] drm/v3d: Fix Indirect Dispatch configuration for V3D 7.1.6 and later

Message ID 20240714145243.1223131-2-mcanal@igalia.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/v3d: Add V3D tech revision to the device information | expand

Commit Message

Maíra Canal July 14, 2024, 2:49 p.m. UTC
`args->cfg[4]` is configured in Indirect Dispatch using the number of
batches. Currently, for all V3D tech versions, `args->cfg[4]` equals the
number of batches subtracted by 1. But, for V3D 7.1.6 and later, we must not
subtract 1 from the number of batches.

Implement the fix by checking the V3D tech version and revision.

Fixes several `dEQP-VK.synchronization*` CTS tests related to Indirect Dispatch.

Fixes: 18b8413b25b7 ("drm/v3d: Create a CPU job extension for a indirect CSD job")
Signed-off-by: Maíra Canal <mcanal@igalia.com>
---
 drivers/gpu/drm/v3d/v3d_sched.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

Comments

Iago Toral July 15, 2024, 5:49 a.m. UTC | #1
Thanks Maíra, both patches are:
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

El dom, 14-07-2024 a las 11:49 -0300, Maíra Canal escribió:
> `args->cfg[4]` is configured in Indirect Dispatch using the number of
> batches. Currently, for all V3D tech versions, `args->cfg[4]` equals
> the
> number of batches subtracted by 1. But, for V3D 7.1.6 and later, we
> must not
> subtract 1 from the number of batches.
> 
> Implement the fix by checking the V3D tech version and revision.
> 
> Fixes several `dEQP-VK.synchronization*` CTS tests related to
> Indirect Dispatch.
> 
> Fixes: 18b8413b25b7 ("drm/v3d: Create a CPU job extension for a
> indirect CSD job")
> Signed-off-by: Maíra Canal <mcanal@igalia.com>
> ---
>  drivers/gpu/drm/v3d/v3d_sched.c | 16 +++++++++++++---
>  1 file changed, 13 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c
> b/drivers/gpu/drm/v3d/v3d_sched.c
> index d193072703f3..cafa3a298c11 100644
> --- a/drivers/gpu/drm/v3d/v3d_sched.c
> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
> @@ -353,7 +353,8 @@
> v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job)
>  	struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
>  	struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect);
>  	struct drm_v3d_submit_csd *args = &indirect_csd->job->args;
> -	u32 *wg_counts;
> +	struct v3d_dev *v3d = job->base.v3d;
> +	u32 num_batches, *wg_counts;
>  
>  	v3d_get_bo_vaddr(bo);
>  	v3d_get_bo_vaddr(indirect);
> @@ -366,8 +367,17 @@
> v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job)
>  	args->cfg[0] = wg_counts[0] <<
> V3D_CSD_CFG012_WG_COUNT_SHIFT;
>  	args->cfg[1] = wg_counts[1] <<
> V3D_CSD_CFG012_WG_COUNT_SHIFT;
>  	args->cfg[2] = wg_counts[2] <<
> V3D_CSD_CFG012_WG_COUNT_SHIFT;
> -	args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) *
> -		       (wg_counts[0] * wg_counts[1] * wg_counts[2])
> - 1;
> +
> +	num_batches = DIV_ROUND_UP(indirect_csd->wg_size, 16) *
> +		      (wg_counts[0] * wg_counts[1] * wg_counts[2]);
> +
> +	/* V3D 7.1.6 and later don't subtract 1 from the number of
> batches */
> +	if (v3d->ver < 71 || (v3d->ver == 71 && v3d->rev < 6))
> +		args->cfg[4] = num_batches - 1;
> +	else
> +		args->cfg[4] = num_batches;
> +
> +	WARN_ON(args->cfg[4] == ~0);
>  
>  	for (int i = 0; i < 3; i++) {
>  		/* 0xffffffff indicates that the uniform rewrite is
> not needed */
diff mbox series

Patch

diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index d193072703f3..cafa3a298c11 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -353,7 +353,8 @@  v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job)
 	struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
 	struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect);
 	struct drm_v3d_submit_csd *args = &indirect_csd->job->args;
-	u32 *wg_counts;
+	struct v3d_dev *v3d = job->base.v3d;
+	u32 num_batches, *wg_counts;
 
 	v3d_get_bo_vaddr(bo);
 	v3d_get_bo_vaddr(indirect);
@@ -366,8 +367,17 @@  v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job)
 	args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
 	args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
 	args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
-	args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) *
-		       (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1;
+
+	num_batches = DIV_ROUND_UP(indirect_csd->wg_size, 16) *
+		      (wg_counts[0] * wg_counts[1] * wg_counts[2]);
+
+	/* V3D 7.1.6 and later don't subtract 1 from the number of batches */
+	if (v3d->ver < 71 || (v3d->ver == 71 && v3d->rev < 6))
+		args->cfg[4] = num_batches - 1;
+	else
+		args->cfg[4] = num_batches;
+
+	WARN_ON(args->cfg[4] == ~0);
 
 	for (int i = 0; i < 3; i++) {
 		/* 0xffffffff indicates that the uniform rewrite is not needed */