diff mbox series

[06/10] btrfs: add helper to kill background process running _btrfs_stress_remount_compress

Message ID 0689a969e9834f4c2e694404f41f0bf8b7a34a2f.1711558345.git.fdmanana@suse.com (mailing list archive)
State New, archived
Headers show
Series fstests/btrfs: fixes for tests btrfs/06[0-9] and btrfs/07[0-4] | expand

Commit Message

Filipe Manana March 27, 2024, 5:11 p.m. UTC
From: Filipe Manana <fdmanana@suse.com>

Killing a background process running _btrfs_stress_remount_compress() is
not as simple as sending a signal to the process and waiting for it to
die. Therefore we have the following logic to terminate such process:

    kill $pid
    wait $pid
    while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
        sleep 1
    done

Since this is repeated in several test cases, move this logic to a common
helper and use it in all affected test cases. This will help to avoid
repeating the same code again several times in upcoming changes.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 common/btrfs    | 15 +++++++++++++++
 tests/btrfs/063 |  7 +------
 tests/btrfs/068 |  8 ++------
 tests/btrfs/071 | 12 +++++-------
 tests/btrfs/073 |  8 +-------
 tests/btrfs/074 |  8 +-------
 6 files changed, 25 insertions(+), 33 deletions(-)

Comments

Anand Jain March 28, 2024, 9:24 a.m. UTC | #1
On 3/28/24 01:11, fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
> 
> Killing a background process running _btrfs_stress_remount_compress() is
> not as simple as sending a signal to the process and waiting for it to
> die. Therefore we have the following logic to terminate such process:
> 
>      kill $pid
>      wait $pid
>      while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
>          sleep 1
>      done
> 
> Since this is repeated in several test cases, move this logic to a common
> helper and use it in all affected test cases. This will help to avoid
> repeating the same code again several times in upcoming changes.
> 
> Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Anand Jain <anand.jain@oracle.com>


Thanks, Anand

> ---
>   common/btrfs    | 15 +++++++++++++++
>   tests/btrfs/063 |  7 +------
>   tests/btrfs/068 |  8 ++------
>   tests/btrfs/071 | 12 +++++-------
>   tests/btrfs/073 |  8 +-------
>   tests/btrfs/074 |  8 +-------
>   6 files changed, 25 insertions(+), 33 deletions(-)
> 
> diff --git a/common/btrfs b/common/btrfs
> index 46056d4a..66a3a347 100644
> --- a/common/btrfs
> +++ b/common/btrfs
> @@ -411,6 +411,21 @@ _btrfs_stress_remount_compress()
>   	done
>   }
>   
> +# Kill a background process running _btrfs_stress_remount_compress()
> +_btrfs_kill_stress_remount_compress_pid()
> +{
> +	local remount_pid=$1
> +	local btrfs_mnt=$2
> +
> +	# Ignore if process already died.
> +	kill $remount_pid &> /dev/null
> +	wait $remount_pid &> /dev/null
> +	# Wait for the remount loop to finish.
> +	while ps aux | grep "mount.*${btrfs_mnt}" | grep -qv grep; do
> +		sleep 1
> +	done
> +}
> +
>   # stress btrfs by replacing devices in a loop
>   # Note that at least 3 devices are needed in SCRATCH_DEV_POOL and the last
>   # device should be free(not used by btrfs)
> diff --git a/tests/btrfs/063 b/tests/btrfs/063
> index baf0c356..5ee2837f 100755
> --- a/tests/btrfs/063
> +++ b/tests/btrfs/063
> @@ -52,12 +52,7 @@ run_test()
>   	echo "Wait for fsstress to exit and kill all background workers" >>$seqres.full
>   	wait $fsstress_pid
>   	_btrfs_kill_stress_balance_pid $balance_pid
> -	kill $remount_pid
> -	wait $remount_pid
> -	# wait for the remount loop to finish
> -	while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
> -		sleep 1
> -	done
> +	_btrfs_kill_stress_remount_compress_pid $remount_pid $SCRATCH_MNT
>   
>   	echo "Scrub the filesystem" >>$seqres.full
>   	$BTRFS_UTIL_PROG scrub start -B $SCRATCH_MNT >>$seqres.full 2>&1
> diff --git a/tests/btrfs/068 b/tests/btrfs/068
> index 15fd41db..db53254a 100755
> --- a/tests/btrfs/068
> +++ b/tests/btrfs/068
> @@ -58,12 +58,8 @@ run_test()
>   	wait $fsstress_pid
>   
>   	touch $stop_file
> -	kill $remount_pid
> -	wait
> -	# wait for the remount loop process to finish
> -	while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
> -		sleep 1
> -	done
> +	wait $subvol_pid
> +	_btrfs_kill_stress_remount_compress_pid $remount_pid $SCRATCH_MNT
>   
>   	echo "Scrub the filesystem" >>$seqres.full
>   	$BTRFS_UTIL_PROG scrub start -B $SCRATCH_MNT >>$seqres.full 2>&1
> diff --git a/tests/btrfs/071 b/tests/btrfs/071
> index 6ebbd8cc..7ba15390 100755
> --- a/tests/btrfs/071
> +++ b/tests/btrfs/071
> @@ -58,17 +58,15 @@ run_test()
>   	echo "$remount_pid" >>$seqres.full
>   
>   	echo "Wait for fsstress to exit and kill all background workers" >>$seqres.full
> -	wait $fsstress_pid
> -	kill $replace_pid $remount_pid
> -	wait
> +	kill $replace_pid
> +	wait $fsstress_pid $replace_pid
>   
> -	# wait for the remount and replace operations to finish
> +	# wait for the replace operationss to finish
>   	while ps aux | grep "replace start" | grep -qv grep; do
>   		sleep 1
>   	done
> -	while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
> -		sleep 1
> -	done
> +
> +	_btrfs_kill_stress_remount_compress_pid $remount_pid $SCRATCH_MNT
>   
>   	echo "Scrub the filesystem" >>$seqres.full
>   	$BTRFS_UTIL_PROG scrub start -B $SCRATCH_MNT >>$seqres.full 2>&1
> diff --git a/tests/btrfs/073 b/tests/btrfs/073
> index 49a4abd1..50358286 100755
> --- a/tests/btrfs/073
> +++ b/tests/btrfs/073
> @@ -51,13 +51,7 @@ run_test()
>   
>   	echo "Wait for fsstress to exit and kill all background workers" >>$seqres.full
>   	wait $fsstress_pid
> -	kill $remount_pid
> -	wait $remount_pid
> -	# wait for the remount operation to finish
> -	while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
> -		sleep 1
> -	done
> -
> +	_btrfs_kill_stress_remount_compress_pid $remount_pid $SCRATCH_MNT
>   	_btrfs_kill_stress_scrub_pid $scrub_pid
>   
>   	echo "Scrub the filesystem" >>$seqres.full
> diff --git a/tests/btrfs/074 b/tests/btrfs/074
> index d51922d0..6e93b36a 100755
> --- a/tests/btrfs/074
> +++ b/tests/btrfs/074
> @@ -52,13 +52,7 @@ run_test()
>   
>   	echo "Wait for fsstress to exit and kill all background workers" >>$seqres.full
>   	wait $fsstress_pid
> -	kill $remount_pid
> -	wait $remount_pid
> -	# wait for the remount operation to finish
> -	while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
> -		sleep 1
> -	done
> -
> +	_btrfs_kill_stress_remount_compress_pid $remount_pid $SCRATCH_MNT
>   	_btrfs_kill_stress_defrag_pid $defrag_pid
>   
>   	echo "Scrub the filesystem" >>$seqres.full
Anand Jain March 28, 2024, 11:46 a.m. UTC | #2
>> diff --git a/tests/btrfs/071 b/tests/btrfs/071
>> index 6ebbd8cc..7ba15390 100755
>> --- a/tests/btrfs/071
>> +++ b/tests/btrfs/071
>> @@ -58,17 +58,15 @@ run_test()
>>       echo "$remount_pid" >>$seqres.full



>>       echo "Wait for fsstress to exit and kill all background workers" 
>> >>$seqres.full
>> -    wait $fsstress_pid
>> -    kill $replace_pid $remount_pid
>> -    wait
>> +    kill $replace_pid
>> +    wait $fsstress_pid $replace_pid

The change first kills the replace and then wait for fsstress. Was this
intentional?

This patch is causing a regression. The replace never gets killed, as
the echo-comment specifically states to wait for fsstress and then kill
the replace. Following it fixes the issue.

Which the patch 7/10 reversed the order to fix. But why?

Thanks, Anand


>> -    # wait for the remount and replace operations to finish
>> +    # wait for the replace operationss to finish




>>       while ps aux | grep "replace start" | grep -qv grep; do
>>           sleep 1
>>       done
>> -    while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
>> -        sleep 1
>> -    done
>> +
>> +    _btrfs_kill_stress_remount_compress_pid $remount_pid $SCRATCH_MNT
>>       echo "Scrub the filesystem" >>$seqres.full
>>       $BTRFS_UTIL_PROG scrub start -B $SCRATCH_MNT >>$seqres.full 2>&1
Filipe Manana March 28, 2024, 12:03 p.m. UTC | #3
On Thu, Mar 28, 2024 at 11:46 AM Anand Jain <anand.jain@oracle.com> wrote:
>
>
> >> diff --git a/tests/btrfs/071 b/tests/btrfs/071
> >> index 6ebbd8cc..7ba15390 100755
> >> --- a/tests/btrfs/071
> >> +++ b/tests/btrfs/071
> >> @@ -58,17 +58,15 @@ run_test()
> >>       echo "$remount_pid" >>$seqres.full
>
>
>
> >>       echo "Wait for fsstress to exit and kill all background workers"
> >> >>$seqres.full
> >> -    wait $fsstress_pid
> >> -    kill $replace_pid $remount_pid
> >> -    wait
> >> +    kill $replace_pid
> >> +    wait $fsstress_pid $replace_pid
>
> The change first kills the replace and then wait for fsstress. Was this
> intentional?

It wasn't. But after all patches applied, everything's correct, we
wait for fsstress to finish and then kill replace.
Do you want me to fix that?

The sequence will have to be:

wait $fsstress_pid
kill $replace_pid
wait $replace_pid

Thanks.

>
> This patch is causing a regression. The replace never gets killed, as
> the echo-comment specifically states to wait for fsstress and then kill
> the replace. Following it fixes the issue.
>
> Which the patch 7/10 reversed the order to fix. But why?
>
> Thanks, Anand
>
>
> >> -    # wait for the remount and replace operations to finish
> >> +    # wait for the replace operationss to finish
>
>
>
>
> >>       while ps aux | grep "replace start" | grep -qv grep; do
> >>           sleep 1
> >>       done
> >> -    while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
> >> -        sleep 1
> >> -    done
> >> +
> >> +    _btrfs_kill_stress_remount_compress_pid $remount_pid $SCRATCH_MNT
> >>       echo "Scrub the filesystem" >>$seqres.full
> >>       $BTRFS_UTIL_PROG scrub start -B $SCRATCH_MNT >>$seqres.full 2>&1
Anand Jain March 28, 2024, 12:50 p.m. UTC | #4
On 3/28/24 20:03, Filipe Manana wrote:
> On Thu, Mar 28, 2024 at 11:46 AM Anand Jain <anand.jain@oracle.com> wrote:
>>
>>
>>>> diff --git a/tests/btrfs/071 b/tests/btrfs/071
>>>> index 6ebbd8cc..7ba15390 100755
>>>> --- a/tests/btrfs/071
>>>> +++ b/tests/btrfs/071
>>>> @@ -58,17 +58,15 @@ run_test()
>>>>        echo "$remount_pid" >>$seqres.full
>>
>>
>>
>>>>        echo "Wait for fsstress to exit and kill all background workers"
>>>>>> $seqres.full
>>>> -    wait $fsstress_pid
>>>> -    kill $replace_pid $remount_pid
>>>> -    wait
>>>> +    kill $replace_pid
>>>> +    wait $fsstress_pid $replace_pid
>>

>> The change first kills the replace and then wait for fsstress. Was this
>> intentional?
> 

> It wasn't.

Ok. Thanks for confirming. I will fix it.

-Anand


> But after all patches applied, everything's correct, we
> wait for fsstress to finish and then kill replace.
> Do you want me to fix that?
> 
> The sequence will have to be:
> 
> wait $fsstress_pid
> kill $replace_pid
> wait $replace_pid
> 
> Thanks.
> 
>>
>> This patch is causing a regression. The replace never gets killed, as
>> the echo-comment specifically states to wait for fsstress and then kill
>> the replace. Following it fixes the issue.
>>
>> Which the patch 7/10 reversed the order to fix. But why?
>>
>> Thanks, Anand
>>
>>
>>>> -    # wait for the remount and replace operations to finish
>>>> +    # wait for the replace operationss to finish
>>
>>
>>
>>
>>>>        while ps aux | grep "replace start" | grep -qv grep; do
>>>>            sleep 1
>>>>        done
>>>> -    while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
>>>> -        sleep 1
>>>> -    done
>>>> +
>>>> +    _btrfs_kill_stress_remount_compress_pid $remount_pid $SCRATCH_MNT
>>>>        echo "Scrub the filesystem" >>$seqres.full
>>>>        $BTRFS_UTIL_PROG scrub start -B $SCRATCH_MNT >>$seqres.full 2>&1
diff mbox series

Patch

diff --git a/common/btrfs b/common/btrfs
index 46056d4a..66a3a347 100644
--- a/common/btrfs
+++ b/common/btrfs
@@ -411,6 +411,21 @@  _btrfs_stress_remount_compress()
 	done
 }
 
+# Kill a background process running _btrfs_stress_remount_compress()
+_btrfs_kill_stress_remount_compress_pid()
+{
+	local remount_pid=$1
+	local btrfs_mnt=$2
+
+	# Ignore if process already died.
+	kill $remount_pid &> /dev/null
+	wait $remount_pid &> /dev/null
+	# Wait for the remount loop to finish.
+	while ps aux | grep "mount.*${btrfs_mnt}" | grep -qv grep; do
+		sleep 1
+	done
+}
+
 # stress btrfs by replacing devices in a loop
 # Note that at least 3 devices are needed in SCRATCH_DEV_POOL and the last
 # device should be free(not used by btrfs)
diff --git a/tests/btrfs/063 b/tests/btrfs/063
index baf0c356..5ee2837f 100755
--- a/tests/btrfs/063
+++ b/tests/btrfs/063
@@ -52,12 +52,7 @@  run_test()
 	echo "Wait for fsstress to exit and kill all background workers" >>$seqres.full
 	wait $fsstress_pid
 	_btrfs_kill_stress_balance_pid $balance_pid
-	kill $remount_pid
-	wait $remount_pid
-	# wait for the remount loop to finish
-	while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
-		sleep 1
-	done
+	_btrfs_kill_stress_remount_compress_pid $remount_pid $SCRATCH_MNT
 
 	echo "Scrub the filesystem" >>$seqres.full
 	$BTRFS_UTIL_PROG scrub start -B $SCRATCH_MNT >>$seqres.full 2>&1
diff --git a/tests/btrfs/068 b/tests/btrfs/068
index 15fd41db..db53254a 100755
--- a/tests/btrfs/068
+++ b/tests/btrfs/068
@@ -58,12 +58,8 @@  run_test()
 	wait $fsstress_pid
 
 	touch $stop_file
-	kill $remount_pid
-	wait
-	# wait for the remount loop process to finish
-	while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
-		sleep 1
-	done
+	wait $subvol_pid
+	_btrfs_kill_stress_remount_compress_pid $remount_pid $SCRATCH_MNT
 
 	echo "Scrub the filesystem" >>$seqres.full
 	$BTRFS_UTIL_PROG scrub start -B $SCRATCH_MNT >>$seqres.full 2>&1
diff --git a/tests/btrfs/071 b/tests/btrfs/071
index 6ebbd8cc..7ba15390 100755
--- a/tests/btrfs/071
+++ b/tests/btrfs/071
@@ -58,17 +58,15 @@  run_test()
 	echo "$remount_pid" >>$seqres.full
 
 	echo "Wait for fsstress to exit and kill all background workers" >>$seqres.full
-	wait $fsstress_pid
-	kill $replace_pid $remount_pid
-	wait
+	kill $replace_pid
+	wait $fsstress_pid $replace_pid
 
-	# wait for the remount and replace operations to finish
+	# wait for the replace operationss to finish
 	while ps aux | grep "replace start" | grep -qv grep; do
 		sleep 1
 	done
-	while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
-		sleep 1
-	done
+
+	_btrfs_kill_stress_remount_compress_pid $remount_pid $SCRATCH_MNT
 
 	echo "Scrub the filesystem" >>$seqres.full
 	$BTRFS_UTIL_PROG scrub start -B $SCRATCH_MNT >>$seqres.full 2>&1
diff --git a/tests/btrfs/073 b/tests/btrfs/073
index 49a4abd1..50358286 100755
--- a/tests/btrfs/073
+++ b/tests/btrfs/073
@@ -51,13 +51,7 @@  run_test()
 
 	echo "Wait for fsstress to exit and kill all background workers" >>$seqres.full
 	wait $fsstress_pid
-	kill $remount_pid
-	wait $remount_pid
-	# wait for the remount operation to finish
-	while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
-		sleep 1
-	done
-
+	_btrfs_kill_stress_remount_compress_pid $remount_pid $SCRATCH_MNT
 	_btrfs_kill_stress_scrub_pid $scrub_pid
 
 	echo "Scrub the filesystem" >>$seqres.full
diff --git a/tests/btrfs/074 b/tests/btrfs/074
index d51922d0..6e93b36a 100755
--- a/tests/btrfs/074
+++ b/tests/btrfs/074
@@ -52,13 +52,7 @@  run_test()
 
 	echo "Wait for fsstress to exit and kill all background workers" >>$seqres.full
 	wait $fsstress_pid
-	kill $remount_pid
-	wait $remount_pid
-	# wait for the remount operation to finish
-	while ps aux | grep "mount.*$SCRATCH_MNT" | grep -qv grep; do
-		sleep 1
-	done
-
+	_btrfs_kill_stress_remount_compress_pid $remount_pid $SCRATCH_MNT
 	_btrfs_kill_stress_defrag_pid $defrag_pid
 
 	echo "Scrub the filesystem" >>$seqres.full