diff mbox series

dma-buf: Take a breath during dma-fence-chain subtests

Message ID 20250318103408.3566010-1-nitin.r.gote@intel.com (mailing list archive)
State New
Headers show
Series dma-buf: Take a breath during dma-fence-chain subtests | expand

Commit Message

Gote, Nitin R March 18, 2025, 10:34 a.m. UTC
Give the scheduler a chance to breathe by calling cond_resched()
as some of the loops may take some time on old machines (apl/bsw/pnv), and
so catch the attention of the watchdogs.

Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12904
Signed-off-by: Nitin Gote <nitin.r.gote@intel.com>
---
Cc: Christian Konig <christian.koenig@amd.com>

Hi Konig,

This is not a functional issue in test. We wish to prevent softlock and allow the
dma-fence-chain test run to completion to verify it's functional correctness.

The performance issue takes about 5ms for the dma-fence-chain to be signalled on
older hardware which is an orthogonal issue and to be debugged separately for
which the test has to run to completion.

So, reverting to cond_resched() which fixes the issue instead of
delay functions.

- Nitin

 drivers/dma-buf/st-dma-fence-chain.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

Comments

Christian König March 18, 2025, 10:37 a.m. UTC | #1
Am 18.03.25 um 11:34 schrieb Nitin Gote:
> Give the scheduler a chance to breathe by calling cond_resched()
> as some of the loops may take some time on old machines (apl/bsw/pnv), and
> so catch the attention of the watchdogs.
>
> Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12904
> Signed-off-by: Nitin Gote <nitin.r.gote@intel.com>
> ---
> Cc: Christian Konig <christian.koenig@amd.com>
>
> Hi Konig,
>
> This is not a functional issue in test.

Exactly that doesn't sounds correct to me.

> We wish to prevent softlock and allow the
> dma-fence-chain test run to completion to verify it's functional correctness.
>
> The performance issue takes about 5ms for the dma-fence-chain to be signalled on
> older hardware which is an orthogonal issue and to be debugged separately for
> which the test has to run to completion.
>
> So, reverting to cond_resched() which fixes the issue instead of
> delay functions.

See what the warning is all about is that the CPU *busy* on something for more than 26seconds. Even on older hardware that should absolutely not happen.

So what we either have is a bug in the test case (perfectly possible) or we have a bug in the dma-fence-chain implementation.

As long as you don't come with a good explanation why the test case keeps the CPU busy for that long this patch here is a clear NAK from my side.

Regards,
Christian.

>
> - Nitin
>
>  drivers/dma-buf/st-dma-fence-chain.c | 14 +++++++++++---
>  1 file changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
> index ed4b323886e4..328a66ed59e5 100644
> --- a/drivers/dma-buf/st-dma-fence-chain.c
> +++ b/drivers/dma-buf/st-dma-fence-chain.c
> @@ -505,6 +505,7 @@ static int signal_forward(void *arg)
>  
>  	for (i = 0; i < fc.chain_length; i++) {
>  		dma_fence_signal(fc.fences[i]);
> +		cond_resched();
>  
>  		if (!dma_fence_is_signaled(fc.chains[i])) {
>  			pr_err("chain[%d] not signaled!\n", i);
> @@ -537,6 +538,7 @@ static int signal_backward(void *arg)
>  
>  	for (i = fc.chain_length; i--; ) {
>  		dma_fence_signal(fc.fences[i]);
> +		cond_resched();
>  
>  		if (i > 0 && dma_fence_is_signaled(fc.chains[i])) {
>  			pr_err("chain[%d] is signaled!\n", i);
> @@ -587,8 +589,10 @@ static int wait_forward(void *arg)
>  	get_task_struct(tsk);
>  	yield_to(tsk, true);
>  
> -	for (i = 0; i < fc.chain_length; i++)
> +	for (i = 0; i < fc.chain_length; i++) {
>  		dma_fence_signal(fc.fences[i]);
> +		cond_resched();
> +	}
>  
>  	err = kthread_stop_put(tsk);
>  
> @@ -616,8 +620,10 @@ static int wait_backward(void *arg)
>  	get_task_struct(tsk);
>  	yield_to(tsk, true);
>  
> -	for (i = fc.chain_length; i--; )
> +	for (i = fc.chain_length; i--; ) {
>  		dma_fence_signal(fc.fences[i]);
> +		cond_resched();
> +	}
>  
>  	err = kthread_stop_put(tsk);
>  
> @@ -663,8 +669,10 @@ static int wait_random(void *arg)
>  	get_task_struct(tsk);
>  	yield_to(tsk, true);
>  
> -	for (i = 0; i < fc.chain_length; i++)
> +	for (i = 0; i < fc.chain_length; i++) {
>  		dma_fence_signal(fc.fences[i]);
> +		cond_resched();
> +	}
>  
>  	err = kthread_stop_put(tsk);
>
diff mbox series

Patch

diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
index ed4b323886e4..328a66ed59e5 100644
--- a/drivers/dma-buf/st-dma-fence-chain.c
+++ b/drivers/dma-buf/st-dma-fence-chain.c
@@ -505,6 +505,7 @@  static int signal_forward(void *arg)
 
 	for (i = 0; i < fc.chain_length; i++) {
 		dma_fence_signal(fc.fences[i]);
+		cond_resched();
 
 		if (!dma_fence_is_signaled(fc.chains[i])) {
 			pr_err("chain[%d] not signaled!\n", i);
@@ -537,6 +538,7 @@  static int signal_backward(void *arg)
 
 	for (i = fc.chain_length; i--; ) {
 		dma_fence_signal(fc.fences[i]);
+		cond_resched();
 
 		if (i > 0 && dma_fence_is_signaled(fc.chains[i])) {
 			pr_err("chain[%d] is signaled!\n", i);
@@ -587,8 +589,10 @@  static int wait_forward(void *arg)
 	get_task_struct(tsk);
 	yield_to(tsk, true);
 
-	for (i = 0; i < fc.chain_length; i++)
+	for (i = 0; i < fc.chain_length; i++) {
 		dma_fence_signal(fc.fences[i]);
+		cond_resched();
+	}
 
 	err = kthread_stop_put(tsk);
 
@@ -616,8 +620,10 @@  static int wait_backward(void *arg)
 	get_task_struct(tsk);
 	yield_to(tsk, true);
 
-	for (i = fc.chain_length; i--; )
+	for (i = fc.chain_length; i--; ) {
 		dma_fence_signal(fc.fences[i]);
+		cond_resched();
+	}
 
 	err = kthread_stop_put(tsk);
 
@@ -663,8 +669,10 @@  static int wait_random(void *arg)
 	get_task_struct(tsk);
 	yield_to(tsk, true);
 
-	for (i = 0; i < fc.chain_length; i++)
+	for (i = 0; i < fc.chain_length; i++) {
 		dma_fence_signal(fc.fences[i]);
+		cond_resched();
+	}
 
 	err = kthread_stop_put(tsk);