Message ID | 20241127045403.3665299-26-david@fromorbit.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fstests: concurrent test execution | expand |
On Wed, Nov 27, 2024 at 03:51:55PM +1100, Dave Chinner wrote: > From: Dave Chinner <dchinner@redhat.com> > > Several tests use lots of processes to stress the filesystem. many > of them haven't really considered what this means for running the > test on high CPU machines (e.g. >32p) and the potential contention > and performance issues this might trigger. > > Some of these tests simply need to increase the size of the journal. > Some need to run on filesystems with high inherent concurrency (e.g. > larger AG count). Some need more efficient/faster file creation. And > so on. > > This commit is a collection of those sorts of changes to improve > runtimes on high CPU count machines. > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > --- > src/aio-dio-regress/aio-last-ref-held-by-io.c | 5 ++++- > tests/generic/251 | 5 ++++- > tests/generic/323 | 7 +++++-- > tests/generic/530 | 2 +- > tests/generic/531 | 8 +++++++- > tests/xfs/013 | 4 ++-- > tests/xfs/076 | 6 +++--- > tests/xfs/176 | 6 +++--- > tests/xfs/297 | 4 +++- > tests/xfs/501 | 2 +- > tests/xfs/502 | 2 +- > 11 files changed, 34 insertions(+), 17 deletions(-) > > diff --git a/src/aio-dio-regress/aio-last-ref-held-by-io.c b/src/aio-dio-regress/aio-last-ref-held-by-io.c > index a70f2a9b7..7106e30a9 100644 > --- a/src/aio-dio-regress/aio-last-ref-held-by-io.c > +++ b/src/aio-dio-regress/aio-last-ref-held-by-io.c > @@ -85,11 +85,14 @@ aio_test_thread(void *data) > /* > * Problems have been easier to trigger when spreading the > * workload over the available CPUs. > + * > + * If CPU hotplug is active, this can randomly fail so dump the error > + * to stderror so it can be filtered out easily by the caller. > */ > CPU_ZERO(&cpuset); > CPU_SET(mycpu, &cpuset); > if (sched_setaffinity(mytid, sizeof(cpuset), &cpuset)) { > - printf("FAILED to set thread %d to run on cpu %ld\n", > + fprintf(stderr, "FAILED to set thread %d to run on cpu %ld\n", > mytid, mycpu); > } > > diff --git a/tests/generic/251 b/tests/generic/251 > index b432fb119..98986469e 100755 > --- a/tests/generic/251 > +++ b/tests/generic/251 > @@ -175,9 +175,12 @@ nproc=20 > # Copy $here to the scratch fs and make coipes of the replica. The fstests > # output (and hence $seqres.full) could be in $here, so we need to snapshot > # $here before computing file checksums. > +# > +# $here/* as the files to copy so we avoid any .git directory that might be > +# much, much larger than the rest of the fstests source tree we are copying. > content=$SCRATCH_MNT/orig > mkdir -p $content > -cp -axT $here/ $content/ > +cp -ax $here/* $content/ Hi Dave, Darrick sent a patch to review this line: https://lore.kernel.org/fstests/173258395238.4031902.16373799205312238046.stgit@frogsfrogsfrogs/T/#u Please help to review, if you don't need this change anymore, I'll fix this conflict (by removing your above change on g/251) when I merge this patch. Thanks, Zorro > > mkdir -p $tmp > > diff --git a/tests/generic/323 b/tests/generic/323 > index 457253fee..2dde04d06 100755 > --- a/tests/generic/323 > +++ b/tests/generic/323 > @@ -23,12 +23,15 @@ _require_aiodio aio-last-ref-held-by-io > testfile=$TEST_DIR/aio-testfile > $XFS_IO_PROG -ftc "pwrite 0 10m" $testfile | _filter_xfs_io > > -$AIO_TEST 0 100 $testfile > +# This can emit cpu affinity setting failures that aren't considered test > +# failures but cause golden image failures. Redirect the test output to > +# $seqres.full so that it is captured but doesn't directly cause test failures. > +$AIO_TEST 0 100 $testfile 2>> $seqres.full > if [ $? -ne 0 ]; then > exit $status > fi > > -$AIO_TEST 1 100 $testfile > +$AIO_TEST 1 100 $testfile 2>> $seqres.full > if [ $? -ne 0 ]; then > exit $status > fi > diff --git a/tests/generic/530 b/tests/generic/530 > index 2e47c3e0c..18256b870 100755 > --- a/tests/generic/530 > +++ b/tests/generic/530 > @@ -22,7 +22,7 @@ _require_scratch_shutdown > _require_metadata_journaling > _require_test_program "t_open_tmpfiles" > > -_scratch_mkfs >> $seqres.full 2>&1 > +_scratch_mkfs "-l size=256m" >> $seqres.full 2>&1 > _scratch_mount > > # Set ULIMIT_NOFILE to min(file-max / 2, 50000 files per LOAD_FACTOR) > diff --git a/tests/generic/531 b/tests/generic/531 > index 0e3564fd4..ed6c3f911 100755 > --- a/tests/generic/531 > +++ b/tests/generic/531 > @@ -21,7 +21,13 @@ _require_scratch > _require_xfs_io_command "-T" > _require_test_program "t_open_tmpfiles" > > -_scratch_mkfs >> $seqres.full 2>&1 > +# On high CPU count machines, this runs a -lot- of create and unlink > +# concurrency. Set the filesytsem up to handle this. > +if [ $FSTYP = "xfs" ]; then > + _scratch_mkfs "-d agcount=32" >> $seqres.full 2>&1 > +else > + _scratch_mkfs >> $seqres.full 2>&1 > +fi > _scratch_mount > > # Try to load up all the CPUs, two threads per CPU. > diff --git a/tests/xfs/013 b/tests/xfs/013 > index fd3d8c64c..5a92ef084 100755 > --- a/tests/xfs/013 > +++ b/tests/xfs/013 > @@ -28,7 +28,7 @@ _create() > mkdir -p $dir > for i in $(seq 0 $count) > do > - touch $dir/$i 2>&1 | filter_enospc > + echo -n > $dir/$i 2>&1 | filter_enospc > done > } > > @@ -42,7 +42,7 @@ _rand_replace() > do > file=$((RANDOM % count)) > rm -f $dir/$file > - touch $dir/$file 2>&1 | filter_enospc > + echo -n > $dir/$file 2>&1 | filter_enospc > done > } > > diff --git a/tests/xfs/076 b/tests/xfs/076 > index 840617ccb..e315a067c 100755 > --- a/tests/xfs/076 > +++ b/tests/xfs/076 > @@ -47,10 +47,10 @@ _alloc_inodes() > dir=$1 > > i=0 > - while [ true ]; do > - touch $dir/$i 2>> $seqres.full || break > + ( while [ true ]; do > + echo -n > $dir/$i || break > i=$((i + 1)) > - done > + done ) >> $seqres.full 2>&1 > } > > > diff --git a/tests/xfs/176 b/tests/xfs/176 > index 8e5951ec1..1aa8cde38 100755 > --- a/tests/xfs/176 > +++ b/tests/xfs/176 > @@ -68,10 +68,10 @@ _alloc_inodes() > dir=$1 > > i=0 > - while [ true ]; do > - echo -n > $dir/$i >> $seqres.full 2>&1 || break > + ( while [ true ]; do > + echo -n > $dir/$i || break > i=$((i + 1)) > - done > + done ) >> $seqres.full 2>&1 > } > > # Find a sparse inode cluster after logend_agno/logend_agino. > diff --git a/tests/xfs/297 b/tests/xfs/297 > index f9cd2ff12..af6af601a 100755 > --- a/tests/xfs/297 > +++ b/tests/xfs/297 > @@ -34,7 +34,9 @@ _scratch_mount > STRESS_DIR="$SCRATCH_MNT/testdir" > mkdir -p $STRESS_DIR > > -_run_fsstress_bg -d $STRESS_DIR -n 1000 -p 1000 $FSSTRESS_AVOID > +# turn off sync as this can lead to near deadlock conditions due to every > +# fsstress process lockstepping against freeze on large CPU count machines > +_run_fsstress_bg -d $STRESS_DIR -f sync=0 -n 1000 -p 1000 $FSSTRESS_AVOID > > # Freeze/unfreeze file system randomly > echo "Start freeze/unfreeze randomly" | tee -a $seqres.full > diff --git a/tests/xfs/501 b/tests/xfs/501 > index 1da4cbf92..678c51b52 100755 > --- a/tests/xfs/501 > +++ b/tests/xfs/501 > @@ -33,7 +33,7 @@ _require_xfs_sysfs debug/log_recovery_delay > _require_scratch > _require_test_program "t_open_tmpfiles" > > -_scratch_mkfs >> $seqres.full 2>&1 > +_scratch_mkfs "-l size=256m" >> $seqres.full 2>&1 > _scratch_mount > > # Set ULIMIT_NOFILE to min(file-max / 2, 30000 files per LOAD_FACTOR) > diff --git a/tests/xfs/502 b/tests/xfs/502 > index 52b8e95a2..10b0017f6 100755 > --- a/tests/xfs/502 > +++ b/tests/xfs/502 > @@ -23,7 +23,7 @@ _require_xfs_io_error_injection "iunlink_fallback" > _require_scratch > _require_test_program "t_open_tmpfiles" > > -_scratch_mkfs | _filter_mkfs 2> $tmp.mkfs > /dev/null > +_scratch_mkfs "-l size=256m" | _filter_mkfs 2> $tmp.mkfs > /dev/null > cat $tmp.mkfs >> $seqres.full > . $tmp.mkfs > > -- > 2.45.2 > >
diff --git a/src/aio-dio-regress/aio-last-ref-held-by-io.c b/src/aio-dio-regress/aio-last-ref-held-by-io.c index a70f2a9b7..7106e30a9 100644 --- a/src/aio-dio-regress/aio-last-ref-held-by-io.c +++ b/src/aio-dio-regress/aio-last-ref-held-by-io.c @@ -85,11 +85,14 @@ aio_test_thread(void *data) /* * Problems have been easier to trigger when spreading the * workload over the available CPUs. + * + * If CPU hotplug is active, this can randomly fail so dump the error + * to stderror so it can be filtered out easily by the caller. */ CPU_ZERO(&cpuset); CPU_SET(mycpu, &cpuset); if (sched_setaffinity(mytid, sizeof(cpuset), &cpuset)) { - printf("FAILED to set thread %d to run on cpu %ld\n", + fprintf(stderr, "FAILED to set thread %d to run on cpu %ld\n", mytid, mycpu); } diff --git a/tests/generic/251 b/tests/generic/251 index b432fb119..98986469e 100755 --- a/tests/generic/251 +++ b/tests/generic/251 @@ -175,9 +175,12 @@ nproc=20 # Copy $here to the scratch fs and make coipes of the replica. The fstests # output (and hence $seqres.full) could be in $here, so we need to snapshot # $here before computing file checksums. +# +# $here/* as the files to copy so we avoid any .git directory that might be +# much, much larger than the rest of the fstests source tree we are copying. content=$SCRATCH_MNT/orig mkdir -p $content -cp -axT $here/ $content/ +cp -ax $here/* $content/ mkdir -p $tmp diff --git a/tests/generic/323 b/tests/generic/323 index 457253fee..2dde04d06 100755 --- a/tests/generic/323 +++ b/tests/generic/323 @@ -23,12 +23,15 @@ _require_aiodio aio-last-ref-held-by-io testfile=$TEST_DIR/aio-testfile $XFS_IO_PROG -ftc "pwrite 0 10m" $testfile | _filter_xfs_io -$AIO_TEST 0 100 $testfile +# This can emit cpu affinity setting failures that aren't considered test +# failures but cause golden image failures. Redirect the test output to +# $seqres.full so that it is captured but doesn't directly cause test failures. +$AIO_TEST 0 100 $testfile 2>> $seqres.full if [ $? -ne 0 ]; then exit $status fi -$AIO_TEST 1 100 $testfile +$AIO_TEST 1 100 $testfile 2>> $seqres.full if [ $? -ne 0 ]; then exit $status fi diff --git a/tests/generic/530 b/tests/generic/530 index 2e47c3e0c..18256b870 100755 --- a/tests/generic/530 +++ b/tests/generic/530 @@ -22,7 +22,7 @@ _require_scratch_shutdown _require_metadata_journaling _require_test_program "t_open_tmpfiles" -_scratch_mkfs >> $seqres.full 2>&1 +_scratch_mkfs "-l size=256m" >> $seqres.full 2>&1 _scratch_mount # Set ULIMIT_NOFILE to min(file-max / 2, 50000 files per LOAD_FACTOR) diff --git a/tests/generic/531 b/tests/generic/531 index 0e3564fd4..ed6c3f911 100755 --- a/tests/generic/531 +++ b/tests/generic/531 @@ -21,7 +21,13 @@ _require_scratch _require_xfs_io_command "-T" _require_test_program "t_open_tmpfiles" -_scratch_mkfs >> $seqres.full 2>&1 +# On high CPU count machines, this runs a -lot- of create and unlink +# concurrency. Set the filesytsem up to handle this. +if [ $FSTYP = "xfs" ]; then + _scratch_mkfs "-d agcount=32" >> $seqres.full 2>&1 +else + _scratch_mkfs >> $seqres.full 2>&1 +fi _scratch_mount # Try to load up all the CPUs, two threads per CPU. diff --git a/tests/xfs/013 b/tests/xfs/013 index fd3d8c64c..5a92ef084 100755 --- a/tests/xfs/013 +++ b/tests/xfs/013 @@ -28,7 +28,7 @@ _create() mkdir -p $dir for i in $(seq 0 $count) do - touch $dir/$i 2>&1 | filter_enospc + echo -n > $dir/$i 2>&1 | filter_enospc done } @@ -42,7 +42,7 @@ _rand_replace() do file=$((RANDOM % count)) rm -f $dir/$file - touch $dir/$file 2>&1 | filter_enospc + echo -n > $dir/$file 2>&1 | filter_enospc done } diff --git a/tests/xfs/076 b/tests/xfs/076 index 840617ccb..e315a067c 100755 --- a/tests/xfs/076 +++ b/tests/xfs/076 @@ -47,10 +47,10 @@ _alloc_inodes() dir=$1 i=0 - while [ true ]; do - touch $dir/$i 2>> $seqres.full || break + ( while [ true ]; do + echo -n > $dir/$i || break i=$((i + 1)) - done + done ) >> $seqres.full 2>&1 } diff --git a/tests/xfs/176 b/tests/xfs/176 index 8e5951ec1..1aa8cde38 100755 --- a/tests/xfs/176 +++ b/tests/xfs/176 @@ -68,10 +68,10 @@ _alloc_inodes() dir=$1 i=0 - while [ true ]; do - echo -n > $dir/$i >> $seqres.full 2>&1 || break + ( while [ true ]; do + echo -n > $dir/$i || break i=$((i + 1)) - done + done ) >> $seqres.full 2>&1 } # Find a sparse inode cluster after logend_agno/logend_agino. diff --git a/tests/xfs/297 b/tests/xfs/297 index f9cd2ff12..af6af601a 100755 --- a/tests/xfs/297 +++ b/tests/xfs/297 @@ -34,7 +34,9 @@ _scratch_mount STRESS_DIR="$SCRATCH_MNT/testdir" mkdir -p $STRESS_DIR -_run_fsstress_bg -d $STRESS_DIR -n 1000 -p 1000 $FSSTRESS_AVOID +# turn off sync as this can lead to near deadlock conditions due to every +# fsstress process lockstepping against freeze on large CPU count machines +_run_fsstress_bg -d $STRESS_DIR -f sync=0 -n 1000 -p 1000 $FSSTRESS_AVOID # Freeze/unfreeze file system randomly echo "Start freeze/unfreeze randomly" | tee -a $seqres.full diff --git a/tests/xfs/501 b/tests/xfs/501 index 1da4cbf92..678c51b52 100755 --- a/tests/xfs/501 +++ b/tests/xfs/501 @@ -33,7 +33,7 @@ _require_xfs_sysfs debug/log_recovery_delay _require_scratch _require_test_program "t_open_tmpfiles" -_scratch_mkfs >> $seqres.full 2>&1 +_scratch_mkfs "-l size=256m" >> $seqres.full 2>&1 _scratch_mount # Set ULIMIT_NOFILE to min(file-max / 2, 30000 files per LOAD_FACTOR) diff --git a/tests/xfs/502 b/tests/xfs/502 index 52b8e95a2..10b0017f6 100755 --- a/tests/xfs/502 +++ b/tests/xfs/502 @@ -23,7 +23,7 @@ _require_xfs_io_error_injection "iunlink_fallback" _require_scratch _require_test_program "t_open_tmpfiles" -_scratch_mkfs | _filter_mkfs 2> $tmp.mkfs > /dev/null +_scratch_mkfs "-l size=256m" | _filter_mkfs 2> $tmp.mkfs > /dev/null cat $tmp.mkfs >> $seqres.full . $tmp.mkfs