Message ID | 20230421060505.10132-8-dwagner@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | nvme testsuite runtime optimization | expand |
On 4/21/23 08:05, Daniel Wagner wrote: > Introduce two new function to calculate the IO size for fio jobs. > > _nvme_calc_io_size() returns the jobs size for _run_fio_verify_io() > function. Reduce the max size of the job by one megabyte to make the > test more robust not to run out of space by accident. Note these fio > calls run with just one jobs. > > _nvme_calc_run_io_size() returns the jobs size for _run_fio_rand_io() > function. Again, the jobs size is not maxing out the space and most > important it takes the number of jobs into account which are > created (number of CPUs). > > Signed-off-by: Daniel Wagner <dwagner@suse.de> > --- > tests/nvme/010 | 5 +++-- > tests/nvme/011 | 5 +++-- > tests/nvme/032 | 6 ++++-- > tests/nvme/034 | 4 +++- > tests/nvme/040 | 4 +++- > tests/nvme/045 | 4 +++- > tests/nvme/047 | 6 ++++-- > tests/nvme/rc | 20 ++++++++++++++++++++ > 8 files changed, 43 insertions(+), 11 deletions(-) > > diff --git a/tests/nvme/010 b/tests/nvme/010 > index 805f80d40620..d209335c2158 100755 > --- a/tests/nvme/010 > +++ b/tests/nvme/010 > @@ -25,6 +25,7 @@ test() { > local loop_dev > local file_path="${TMPDIR}/img" > local subsys_name="blktests-subsystem-1" > + local io_size > > truncate -s "${nvme_img_size}" "${file_path}" > > @@ -41,8 +42,8 @@ test() { > cat "/sys/block/${nvmedev}n1/uuid" > cat "/sys/block/${nvmedev}n1/wwid" > > - _run_fio_verify_io --size=${nvme_img_size} \ > - --filename="/dev/${nvmedev}n1" > + io_size="$(_nvme_calc_io_size "${nvme_img_size}")" > + _run_fio_verify_io --size="${io_size}" --filename="/dev/${nvmedev}n1" > > _nvme_disconnect_subsys "${subsys_name}" > > diff --git a/tests/nvme/011 b/tests/nvme/011 > index da8cbac11124..294ba4333aff 100755 > --- a/tests/nvme/011 > +++ b/tests/nvme/011 > @@ -25,6 +25,7 @@ test() { > local file_path > local file_path="${TMPDIR}/img" > local subsys_name="blktests-subsystem-1" > + local io_size > > truncate -s "${nvme_img_size}" "${file_path}" > > @@ -39,8 +40,8 @@ test() { > cat "/sys/block/${nvmedev}n1/uuid" > cat "/sys/block/${nvmedev}n1/wwid" > > - _run_fio_verify_io --size="${nvme_img_size}" \ > - --filename="/dev/${nvmedev}n1" > + io_size="$(_nvme_calc_io_size "${nvme_img_size}")" > + _run_fio_verify_io --size="${io_size}" --filename="/dev/${nvmedev}n1" > > _nvme_disconnect_subsys "${subsys_name}" > > diff --git a/tests/nvme/032 b/tests/nvme/032 > index 9f9756b0f959..ad701cea877d 100755 > --- a/tests/nvme/032 > +++ b/tests/nvme/032 > @@ -33,13 +33,15 @@ test_device() { > local sysfs > local attr > local m > + local rand_io_size > > pdev="$(_get_pci_dev_from_blkdev)" > sysfs="/sys/bus/pci/devices/${pdev}" > > # start fio job > - _run_fio_rand_io --filename="$TEST_DEV" --size="${nvme_img_size}" \ > - --group_reporting --time_based --runtime=1m &> /dev/null & > + rand_io_size="$(_nvme_calc_rand_io_size "${nvme_img_size}")" > + _run_fio_rand_io --filename="$TEST_DEV" --size="${rand_io_size}" \ > + --group_reporting --time_based --runtime=1m > /dev/null & > > sleep 5 > > diff --git a/tests/nvme/034 b/tests/nvme/034 > index e0ede717c373..0df8bef98e5e 100755 > --- a/tests/nvme/034 > +++ b/tests/nvme/034 > @@ -19,6 +19,7 @@ test_device() { > local ctrldev > local nsdev > local port > + local io_size > > echo "Running ${TEST_NAME}" > > @@ -26,7 +27,8 @@ test_device() { > port=$(_nvmet_passthru_target_setup "${subsys}") > nsdev=$(_nvmet_passthru_target_connect "${nvme_trtype}" "${subsys}") > > - _run_fio_verify_io --size="${nvme_img_size}" --filename="${nsdev}" > + io_size="$(_nvme_calc_io_size "${nvme_img_size}")" > + _run_fio_verify_io --size="${io_size}" --filename="${nsdev}" > > _nvme_disconnect_subsys "${subsys}" > _nvmet_passthru_target_cleanup "${port}" "${subsys}" > diff --git a/tests/nvme/040 b/tests/nvme/040 > index 31b7cafef4be..b033a2a866f2 100755 > --- a/tests/nvme/040 > +++ b/tests/nvme/040 > @@ -21,6 +21,7 @@ test() { > local port > local loop_dev > local nvmedev > + local rand_io_size > > echo "Running ${TEST_NAME}" > > @@ -37,7 +38,8 @@ test() { > > # start fio job > echo "starting background fio" > - _run_fio_rand_io --filename="/dev/${nvmedev}n1" --size="${nvme_img_size}" \ > + rand_io_size="$(_nvme_calc_rand_io_size "${nvme_img_size}")" > + _run_fio_rand_io --filename="/dev/${nvmedev}n1" --size="${rand_io_size}" \ > --group_reporting --ramp_time=5 \ > --time_based --runtime=1m &> /dev/null & > sleep 5 > diff --git a/tests/nvme/045 b/tests/nvme/045 > index 99012f6bed8f..f50087cccb6a 100755 > --- a/tests/nvme/045 > +++ b/tests/nvme/045 > @@ -31,6 +31,7 @@ test() { > local ctrlkey > local new_ctrlkey > local ctrldev > + local rand_io_size > > echo "Running ${TEST_NAME}" > > @@ -120,7 +121,8 @@ test() { > > nvmedev=$(_find_nvme_dev "${subsys_name}") > > - _run_fio_rand_io --size=4m --filename="/dev/${nvmedev}n1" > + rand_io_size="$(_nvme_calc_rand_io_size 4m)" > + _run_fio_rand_io --size="${rand_io_size}" --filename="/dev/${nvmedev}n1" > > _nvme_disconnect_subsys "${subsys_name}" > > diff --git a/tests/nvme/047 b/tests/nvme/047 > index b5a8d469a983..6a7599bc2e91 100755 > --- a/tests/nvme/047 > +++ b/tests/nvme/047 > @@ -25,6 +25,7 @@ test() { > local port > local nvmedev > local loop_dev > + local rand_io_size > local file_path="$TMPDIR/img" > local subsys_name="blktests-subsystem-1" > > @@ -42,7 +43,8 @@ test() { > > nvmedev=$(_find_nvme_dev "${subsys_name}") > > - _xfs_run_fio_verify_io /dev/"${nvmedev}n1" "1m" || echo FAIL > + rand_io_size="$(_nvme_calc_rand_io_size 4M)" > + _run_fio_rand_io --filename="/dev/${nvmedev}n1" --size="${rand_io_size}" > > _nvme_disconnect_subsys "${subsys_name}" >> "$FULL" 2>&1 > > @@ -50,7 +52,7 @@ test() { > --nr-write-queues 1 \ > --nr-poll-queues 1 || echo FAIL > > - _xfs_run_fio_verify_io /dev/"${nvmedev}n1" "1m" || echo FAIL > + _run_fio_rand_io --filename="/dev/${nvmedev}n1" --size="${rand_io_size}" > > _nvme_disconnect_subsys "${subsys_name}" >> "$FULL" 2>&1 > > diff --git a/tests/nvme/rc b/tests/nvme/rc > index b1f2dacae125..172f510527ed 100644 > --- a/tests/nvme/rc > +++ b/tests/nvme/rc > @@ -150,6 +150,26 @@ _test_dev_nvme_nsid() { > cat "${TEST_DEV_SYSFS}/nsid" > } > > +_nvme_calc_io_size() { > + local img_size_mb > + local io_size_mb > + > + img_size_mb="$(convert_to_mb "$1")" > + io_size_mb="$((img_size_mb - 1))" > + > + echo "${io_size_mb}m" > +} > + > +_nvme_calc_rand_io_size() { > + local img_size_mb > + local io_size_mb > + > + img_size_mb="$(convert_to_mb "$1")" > + io_size_mb="$(printf "%d" $((((img_size_mb * 1024 * 1024) / $(nproc) - 1) / 1024)))" > + ... ending with ridiculous small io sizes on machines with lots of CPUs. Please cap nproc by something sane like 32. > + echo "${io_size_mb}k" > +} > + > _nvme_fcloop_add_rport() { > local local_wwnn="$1" > local local_wwpn="$2" Cheers, Hannes
On Fri, Apr 21, 2023 at 08:33:46AM +0200, Hannes Reinecke wrote: > +_nvme_calc_rand_io_size() { > > + local img_size_mb > > + local io_size_mb > > + > > + img_size_mb="$(convert_to_mb "$1")" > > + io_size_mb="$(printf "%d" $((((img_size_mb * 1024 * 1024) / $(nproc) - 1) / 1024)))" > > + > > ... ending with ridiculous small io sizes on machines with lots of CPUs. > Please cap nproc by something sane like 32. Yeah, propably not really good long time strategy. I was wondering if we should make run_fio() variants smarter and do the size callculation there and not by the callee. If we do this, we could make the number of jobs dependend on CPUs and image size a bit nicer.
On Apr 21, 2023 / 09:03, Daniel Wagner wrote: > On Fri, Apr 21, 2023 at 08:33:46AM +0200, Hannes Reinecke wrote: > > +_nvme_calc_rand_io_size() { > > > + local img_size_mb > > > + local io_size_mb > > > + > > > + img_size_mb="$(convert_to_mb "$1")" > > > + io_size_mb="$(printf "%d" $((((img_size_mb * 1024 * 1024) / $(nproc) - 1) / 1024)))" > > > + > > > > ... ending with ridiculous small io sizes on machines with lots of CPUs. > > Please cap nproc by something sane like 32. > > Yeah, propably not really good long time strategy. I was wondering if we should > make run_fio() variants smarter and do the size callculation there and not by > the callee. If we do this, we could make the number of jobs dependend on CPUs > and image size a bit nicer. The usage of _run_fio_rand_io() look different for each test case. nvme/032 kills the fio process when it is no longer required. Then IO size reduction with _nvme_calc_io_size() will not reduce runtime of nvme/032. I think nvme/040 has same story, since _nvme_delete_ctrl will stop the fio process with I/O error. On the other hand, nvme/045 and nvme/047 may have different usage. I'm not sure if these test case needs I/O with all CPUs. It would be better to have other run_fio() variant as Daniel mentioned, so that their runtime will not depend on number of CPUs.
On Apr 21, 2023 / 08:05, Daniel Wagner wrote: > Introduce two new function to calculate the IO size for fio jobs. > > _nvme_calc_io_size() returns the jobs size for _run_fio_verify_io() > function. Reduce the max size of the job by one megabyte to make the > test more robust not to run out of space by accident. Note these fio > calls run with just one jobs. It is not clear for me what kind of issue happens without the 1MB decrement. Could you share failure symptoms you observed? > > _nvme_calc_run_io_size() returns the jobs size for _run_fio_rand_io() > function. Again, the jobs size is not maxing out the space and most > important it takes the number of jobs into account which are > created (number of CPUs). This patch has two purposes, similar but different. It would be the better to separate them.
On Fri, Apr 28, 2023 at 04:00:54AM +0000, Shinichiro Kawasaki wrote: > On Apr 21, 2023 / 08:05, Daniel Wagner wrote: > > Introduce two new function to calculate the IO size for fio jobs. > > > > _nvme_calc_io_size() returns the jobs size for _run_fio_verify_io() > > function. Reduce the max size of the job by one megabyte to make the > > test more robust not to run out of space by accident. Note these fio > > calls run with just one jobs. > > It is not clear for me what kind of issue happens without the 1MB decrement. > Could you share failure symptoms you observed? As I said, this is just to make the test more robust as this the size limits are not the main objective of these tests. I don't care about this too much, I'll just drop it then.
On Tue, May 02, 2023 at 05:45:46PM +0200, Daniel Wagner wrote: > On Fri, Apr 28, 2023 at 04:00:54AM +0000, Shinichiro Kawasaki wrote: > > On Apr 21, 2023 / 08:05, Daniel Wagner wrote: > > > Introduce two new function to calculate the IO size for fio jobs. > > > > > > _nvme_calc_io_size() returns the jobs size for _run_fio_verify_io() > > > function. Reduce the max size of the job by one megabyte to make the > > > test more robust not to run out of space by accident. Note these fio > > > calls run with just one jobs. > > > > It is not clear for me what kind of issue happens without the 1MB decrement. > > Could you share failure symptoms you observed? > > As I said, this is just to make the test more robust as this the size limits > are not the main objective of these tests. I don't care about this too > much, I'll just drop it then. BTW, this is how it would look like if the disk is too small: nvme/035 => nvme0n1 (run mkfs and data verification fio job on an NVMeOF passthru controller) [failed] runtime 2.383s ... 51.954s --- tests/nvme/035.out 2023-04-18 17:43:18.163745956 +0200 +++ /home/wagi/work/blktests/results/nvme0n1/nvme/035.out.bad 2023-05-02 18:21:09.442382196 +0200 @@ -1,3 +1,20 @@ Running nvme/035 +fio: io_u error on file /mnt/blktests//verify.0.0: No space left on device: write offset=925274112, buflen=4096 +fio: io_u error on file /mnt/blktests//verify.0.0: No space left on device: write offset=406040576, buflen=4096 +fio: io_u error on file /mnt/blktests//verify.0.0: No space left on device: write offset=498868224, buflen=4096 +fio: io_u error on file /mnt/blktests//verify.0.0: No space left on device: write offset=217063424, buflen=4096 +fio: io_u error on file /mnt/blktests//verify.0.0: No space left on device: write offset=1049411584, buflen=4096 +fio: io_u error on file /mnt/blktests//verify.0.0: No space left on device: write offset=348282880, buflen=4096 ... (Run 'diff -u tests/nvme/035.out /home/wagi/work/blktests/results/nvme0n1/nvme/035.out.bad' to see the entire diff)
diff --git a/tests/nvme/010 b/tests/nvme/010 index 805f80d40620..d209335c2158 100755 --- a/tests/nvme/010 +++ b/tests/nvme/010 @@ -25,6 +25,7 @@ test() { local loop_dev local file_path="${TMPDIR}/img" local subsys_name="blktests-subsystem-1" + local io_size truncate -s "${nvme_img_size}" "${file_path}" @@ -41,8 +42,8 @@ test() { cat "/sys/block/${nvmedev}n1/uuid" cat "/sys/block/${nvmedev}n1/wwid" - _run_fio_verify_io --size=${nvme_img_size} \ - --filename="/dev/${nvmedev}n1" + io_size="$(_nvme_calc_io_size "${nvme_img_size}")" + _run_fio_verify_io --size="${io_size}" --filename="/dev/${nvmedev}n1" _nvme_disconnect_subsys "${subsys_name}" diff --git a/tests/nvme/011 b/tests/nvme/011 index da8cbac11124..294ba4333aff 100755 --- a/tests/nvme/011 +++ b/tests/nvme/011 @@ -25,6 +25,7 @@ test() { local file_path local file_path="${TMPDIR}/img" local subsys_name="blktests-subsystem-1" + local io_size truncate -s "${nvme_img_size}" "${file_path}" @@ -39,8 +40,8 @@ test() { cat "/sys/block/${nvmedev}n1/uuid" cat "/sys/block/${nvmedev}n1/wwid" - _run_fio_verify_io --size="${nvme_img_size}" \ - --filename="/dev/${nvmedev}n1" + io_size="$(_nvme_calc_io_size "${nvme_img_size}")" + _run_fio_verify_io --size="${io_size}" --filename="/dev/${nvmedev}n1" _nvme_disconnect_subsys "${subsys_name}" diff --git a/tests/nvme/032 b/tests/nvme/032 index 9f9756b0f959..ad701cea877d 100755 --- a/tests/nvme/032 +++ b/tests/nvme/032 @@ -33,13 +33,15 @@ test_device() { local sysfs local attr local m + local rand_io_size pdev="$(_get_pci_dev_from_blkdev)" sysfs="/sys/bus/pci/devices/${pdev}" # start fio job - _run_fio_rand_io --filename="$TEST_DEV" --size="${nvme_img_size}" \ - --group_reporting --time_based --runtime=1m &> /dev/null & + rand_io_size="$(_nvme_calc_rand_io_size "${nvme_img_size}")" + _run_fio_rand_io --filename="$TEST_DEV" --size="${rand_io_size}" \ + --group_reporting --time_based --runtime=1m > /dev/null & sleep 5 diff --git a/tests/nvme/034 b/tests/nvme/034 index e0ede717c373..0df8bef98e5e 100755 --- a/tests/nvme/034 +++ b/tests/nvme/034 @@ -19,6 +19,7 @@ test_device() { local ctrldev local nsdev local port + local io_size echo "Running ${TEST_NAME}" @@ -26,7 +27,8 @@ test_device() { port=$(_nvmet_passthru_target_setup "${subsys}") nsdev=$(_nvmet_passthru_target_connect "${nvme_trtype}" "${subsys}") - _run_fio_verify_io --size="${nvme_img_size}" --filename="${nsdev}" + io_size="$(_nvme_calc_io_size "${nvme_img_size}")" + _run_fio_verify_io --size="${io_size}" --filename="${nsdev}" _nvme_disconnect_subsys "${subsys}" _nvmet_passthru_target_cleanup "${port}" "${subsys}" diff --git a/tests/nvme/040 b/tests/nvme/040 index 31b7cafef4be..b033a2a866f2 100755 --- a/tests/nvme/040 +++ b/tests/nvme/040 @@ -21,6 +21,7 @@ test() { local port local loop_dev local nvmedev + local rand_io_size echo "Running ${TEST_NAME}" @@ -37,7 +38,8 @@ test() { # start fio job echo "starting background fio" - _run_fio_rand_io --filename="/dev/${nvmedev}n1" --size="${nvme_img_size}" \ + rand_io_size="$(_nvme_calc_rand_io_size "${nvme_img_size}")" + _run_fio_rand_io --filename="/dev/${nvmedev}n1" --size="${rand_io_size}" \ --group_reporting --ramp_time=5 \ --time_based --runtime=1m &> /dev/null & sleep 5 diff --git a/tests/nvme/045 b/tests/nvme/045 index 99012f6bed8f..f50087cccb6a 100755 --- a/tests/nvme/045 +++ b/tests/nvme/045 @@ -31,6 +31,7 @@ test() { local ctrlkey local new_ctrlkey local ctrldev + local rand_io_size echo "Running ${TEST_NAME}" @@ -120,7 +121,8 @@ test() { nvmedev=$(_find_nvme_dev "${subsys_name}") - _run_fio_rand_io --size=4m --filename="/dev/${nvmedev}n1" + rand_io_size="$(_nvme_calc_rand_io_size 4m)" + _run_fio_rand_io --size="${rand_io_size}" --filename="/dev/${nvmedev}n1" _nvme_disconnect_subsys "${subsys_name}" diff --git a/tests/nvme/047 b/tests/nvme/047 index b5a8d469a983..6a7599bc2e91 100755 --- a/tests/nvme/047 +++ b/tests/nvme/047 @@ -25,6 +25,7 @@ test() { local port local nvmedev local loop_dev + local rand_io_size local file_path="$TMPDIR/img" local subsys_name="blktests-subsystem-1" @@ -42,7 +43,8 @@ test() { nvmedev=$(_find_nvme_dev "${subsys_name}") - _xfs_run_fio_verify_io /dev/"${nvmedev}n1" "1m" || echo FAIL + rand_io_size="$(_nvme_calc_rand_io_size 4M)" + _run_fio_rand_io --filename="/dev/${nvmedev}n1" --size="${rand_io_size}" _nvme_disconnect_subsys "${subsys_name}" >> "$FULL" 2>&1 @@ -50,7 +52,7 @@ test() { --nr-write-queues 1 \ --nr-poll-queues 1 || echo FAIL - _xfs_run_fio_verify_io /dev/"${nvmedev}n1" "1m" || echo FAIL + _run_fio_rand_io --filename="/dev/${nvmedev}n1" --size="${rand_io_size}" _nvme_disconnect_subsys "${subsys_name}" >> "$FULL" 2>&1 diff --git a/tests/nvme/rc b/tests/nvme/rc index b1f2dacae125..172f510527ed 100644 --- a/tests/nvme/rc +++ b/tests/nvme/rc @@ -150,6 +150,26 @@ _test_dev_nvme_nsid() { cat "${TEST_DEV_SYSFS}/nsid" } +_nvme_calc_io_size() { + local img_size_mb + local io_size_mb + + img_size_mb="$(convert_to_mb "$1")" + io_size_mb="$((img_size_mb - 1))" + + echo "${io_size_mb}m" +} + +_nvme_calc_rand_io_size() { + local img_size_mb + local io_size_mb + + img_size_mb="$(convert_to_mb "$1")" + io_size_mb="$(printf "%d" $((((img_size_mb * 1024 * 1024) / $(nproc) - 1) / 1024)))" + + echo "${io_size_mb}k" +} + _nvme_fcloop_add_rport() { local local_wwnn="$1" local local_wwpn="$2"
Introduce two new function to calculate the IO size for fio jobs. _nvme_calc_io_size() returns the jobs size for _run_fio_verify_io() function. Reduce the max size of the job by one megabyte to make the test more robust not to run out of space by accident. Note these fio calls run with just one jobs. _nvme_calc_run_io_size() returns the jobs size for _run_fio_rand_io() function. Again, the jobs size is not maxing out the space and most important it takes the number of jobs into account which are created (number of CPUs). Signed-off-by: Daniel Wagner <dwagner@suse.de> --- tests/nvme/010 | 5 +++-- tests/nvme/011 | 5 +++-- tests/nvme/032 | 6 ++++-- tests/nvme/034 | 4 +++- tests/nvme/040 | 4 +++- tests/nvme/045 | 4 +++- tests/nvme/047 | 6 ++++-- tests/nvme/rc | 20 ++++++++++++++++++++ 8 files changed, 43 insertions(+), 11 deletions(-)