[v2] xfstests: add test for xfs_repair progress reporting
diff mbox series

Message ID 20200520035258.298516-1-ddouwsma@redhat.com
State New
Headers show
Series
  • [v2] xfstests: add test for xfs_repair progress reporting
Related show

Commit Message

Donald Douwsma May 20, 2020, 3:52 a.m. UTC
xfs_repair's interval based progress has been broken for
some time, create a test based on dmdelay to stretch out
the time and use ag_stride to force parallelism.

Signed-off-by: Donald Douwsma <ddouwsma@redhat.com>
---
Changes since v1:
- Use _scratch_xfs_repair
- Filter only repair output
- Make the filter more tolerant of whitespace and plurals
- Take golden output from 'xfs_repair: fix progress reporting'

 tests/xfs/516     | 76 +++++++++++++++++++++++++++++++++++++++++++++++
 tests/xfs/516.out | 15 ++++++++++
 tests/xfs/group   |  1 +
 3 files changed, 92 insertions(+)
 create mode 100755 tests/xfs/516
 create mode 100644 tests/xfs/516.out

Comments

Darrick J. Wong May 20, 2020, 3:55 p.m. UTC | #1
On Wed, May 20, 2020 at 01:52:58PM +1000, Donald Douwsma wrote:
> xfs_repair's interval based progress has been broken for
> some time, create a test based on dmdelay to stretch out
> the time and use ag_stride to force parallelism.
> 
> Signed-off-by: Donald Douwsma <ddouwsma@redhat.com>
> ---
> Changes since v1:
> - Use _scratch_xfs_repair
> - Filter only repair output
> - Make the filter more tolerant of whitespace and plurals
> - Take golden output from 'xfs_repair: fix progress reporting'
> 
>  tests/xfs/516     | 76 +++++++++++++++++++++++++++++++++++++++++++++++
>  tests/xfs/516.out | 15 ++++++++++
>  tests/xfs/group   |  1 +
>  3 files changed, 92 insertions(+)
>  create mode 100755 tests/xfs/516
>  create mode 100644 tests/xfs/516.out
> 
> diff --git a/tests/xfs/516 b/tests/xfs/516
> new file mode 100755
> index 00000000..1c0508ef
> --- /dev/null
> +++ b/tests/xfs/516
> @@ -0,0 +1,76 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2020 Red Hat, Inc.  All Rights Reserved.
> +#
> +# FS QA Test 516
> +#
> +# Test xfs_repair's progress reporting
> +#
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1	# failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	_dmsetup_remove delay-test > /dev/null 2>&1
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +. ./common/dmdelay
> +. ./common/populate
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +
> +# Modify as appropriate.
> +_supported_fs xfs
> +_supported_os Linux
> +_require_scratch
> +_require_dm_target delay
> +
> +# Filter output specific to the formatters in xfs_repair/progress.c
> +# Ideally we'd like to see hits on anything that matches
> +# awk '/{FMT/' repair/progress.c
> +_filter_repair()
> +{
> +	sed -ne '
> +	s/[0-9]\+/#/g;
> +	s/^\s\+/ /g;
> +	s/\(second\|minute\)s/\1/g
> +	/#:#:#:/p
> +	'
> +}
> +
> +echo "Format and populate"
> +_scratch_populate_cached nofill > $seqres.full 2>&1
> +
> +echo "Introduce a dmdelay"
> +_init_delay
> +
> +# Introduce a read I/O delay
> +# The default in common/dmdelay is a bit too agressive
> +BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
> +DELAY_TABLE_RDELAY="0 $BLK_DEV_SIZE delay $SCRATCH_DEV 0 100 $SCRATCH_DEV 0 0"
> +_load_delay_table $DELAY_READ
> +
> +echo "Run repair"
> +SCRATCH_DEV=$DELAY_DEV _scratch_xfs_repair -o ag_stride=4 -t 1 2>&1 |
> +        tee -a $seqres.full > $seqres.xfs_repair.out
> +
> +cat $seqres.xfs_repair.out | _filter_repair | sort -u

I might've redirected this to $tmp.repair, but otherwise I think this
looks decent.

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D


> +
> +_cleanup_delay
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/xfs/516.out b/tests/xfs/516.out
> new file mode 100644
> index 00000000..85018b93
> --- /dev/null
> +++ b/tests/xfs/516.out
> @@ -0,0 +1,15 @@
> +QA output created by 516
> +Format and populate
> +Introduce a dmdelay
> +Run repair
> + - #:#:#: Phase #: #% done - estimated remaining time # minute, # second
> + - #:#:#: Phase #: elapsed time # second - processed # inodes per minute
> + - #:#:#: check for inodes claiming duplicate blocks - # of # inodes done
> + - #:#:#: process known inodes and inode discovery - # of # inodes done
> + - #:#:#: process newly discovered inodes - # of # allocation groups done
> + - #:#:#: rebuild AG headers and trees - # of # allocation groups done
> + - #:#:#: scanning agi unlinked lists - # of # allocation groups done
> + - #:#:#: scanning filesystem freespace - # of # allocation groups done
> + - #:#:#: setting up duplicate extent list - # of # allocation groups done
> + - #:#:#: verify and correct link counts - # of # allocation groups done
> + - #:#:#: zeroing log - # of # blocks done
> diff --git a/tests/xfs/group b/tests/xfs/group
> index 12eb55c9..aeeca23f 100644
> --- a/tests/xfs/group
> +++ b/tests/xfs/group
> @@ -513,3 +513,4 @@
>  513 auto mount
>  514 auto quick db
>  515 auto quick quota
> +516 repair
> -- 
> 2.18.4
>
Eryu Guan May 24, 2020, 4:46 p.m. UTC | #2
On Wed, May 20, 2020 at 01:52:58PM +1000, Donald Douwsma wrote:
> xfs_repair's interval based progress has been broken for
> some time, create a test based on dmdelay to stretch out
> the time and use ag_stride to force parallelism.
> 
> Signed-off-by: Donald Douwsma <ddouwsma@redhat.com>
> ---
> Changes since v1:
> - Use _scratch_xfs_repair
> - Filter only repair output
> - Make the filter more tolerant of whitespace and plurals
> - Take golden output from 'xfs_repair: fix progress reporting'

I saw failures like below, and I'm using v5.7-rc4 kernel and v5.4.0
xfsprogs, is this expected failure?

@@ -2,8 +2,6 @@
 Format and populate
 Introduce a dmdelay
 Run repair
- - #:#:#: Phase #: #% done - estimated remaining time # minute, # second
- - #:#:#: Phase #: elapsed time # second - processed # inodes per minute
  - #:#:#: check for inodes claiming duplicate blocks - # of # inodes done
  - #:#:#: process known inodes and inode discovery - # of # inodes done
  - #:#:#: process newly discovered inodes - # of # allocation groups done
@@ -12,4 +10,3 @@
  - #:#:#: scanning filesystem freespace - # of # allocation groups done
  - #:#:#: setting up duplicate extent list - # of # allocation groups done
  - #:#:#: verify and correct link counts - # of # allocation groups done
- - #:#:#: zeroing log - # of # blocks done

> 
>  tests/xfs/516     | 76 +++++++++++++++++++++++++++++++++++++++++++++++
>  tests/xfs/516.out | 15 ++++++++++
>  tests/xfs/group   |  1 +
>  3 files changed, 92 insertions(+)
>  create mode 100755 tests/xfs/516
>  create mode 100644 tests/xfs/516.out
> 
> diff --git a/tests/xfs/516 b/tests/xfs/516
> new file mode 100755
> index 00000000..1c0508ef
> --- /dev/null
> +++ b/tests/xfs/516
> @@ -0,0 +1,76 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2020 Red Hat, Inc.  All Rights Reserved.
> +#
> +# FS QA Test 516
> +#
> +# Test xfs_repair's progress reporting
> +#
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1	# failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{

	rm -f $tmp.*

As some common helpers would use $tmp. files as well.

> +	cd /
> +	_dmsetup_remove delay-test > /dev/null 2>&1

I think we could do _cleanup_delay here and discard the outputs.

> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +. ./common/dmdelay
> +. ./common/populate
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +
> +# Modify as appropriate.
> +_supported_fs xfs
> +_supported_os Linux
> +_require_scratch
> +_require_dm_target delay
> +
> +# Filter output specific to the formatters in xfs_repair/progress.c
> +# Ideally we'd like to see hits on anything that matches
> +# awk '/{FMT/' repair/progress.c
> +_filter_repair()

Function names with the leading underscore are reserved for common
helpers, filter_repair would be fine.

> +{
> +	sed -ne '
> +	s/[0-9]\+/#/g;
> +	s/^\s\+/ /g;
> +	s/\(second\|minute\)s/\1/g
> +	/#:#:#:/p
> +	'
> +}
> +
> +echo "Format and populate"
> +_scratch_populate_cached nofill > $seqres.full 2>&1
> +
> +echo "Introduce a dmdelay"
> +_init_delay
> +
> +# Introduce a read I/O delay
> +# The default in common/dmdelay is a bit too agressive
> +BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
> +DELAY_TABLE_RDELAY="0 $BLK_DEV_SIZE delay $SCRATCH_DEV 0 100 $SCRATCH_DEV 0 0"
> +_load_delay_table $DELAY_READ
> +
> +echo "Run repair"
> +SCRATCH_DEV=$DELAY_DEV _scratch_xfs_repair -o ag_stride=4 -t 1 2>&1 |
> +        tee -a $seqres.full > $seqres.xfs_repair.out
> +
> +cat $seqres.xfs_repair.out | _filter_repair | sort -u

I agreed with Darrick here. redirect output to $tmp.repair is better, as
we already cleanup $tmp.* in _cleanup, and no one is cleaning up
$seqres.xfs_repair.out file.

> +
> +_cleanup_delay

We could remove this one if do it in _cleanup.

> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/xfs/516.out b/tests/xfs/516.out
> new file mode 100644
> index 00000000..85018b93
> --- /dev/null
> +++ b/tests/xfs/516.out
> @@ -0,0 +1,15 @@
> +QA output created by 516
> +Format and populate
> +Introduce a dmdelay
> +Run repair
> + - #:#:#: Phase #: #% done - estimated remaining time # minute, # second
> + - #:#:#: Phase #: elapsed time # second - processed # inodes per minute
> + - #:#:#: check for inodes claiming duplicate blocks - # of # inodes done
> + - #:#:#: process known inodes and inode discovery - # of # inodes done
> + - #:#:#: process newly discovered inodes - # of # allocation groups done
> + - #:#:#: rebuild AG headers and trees - # of # allocation groups done
> + - #:#:#: scanning agi unlinked lists - # of # allocation groups done
> + - #:#:#: scanning filesystem freespace - # of # allocation groups done
> + - #:#:#: setting up duplicate extent list - # of # allocation groups done
> + - #:#:#: verify and correct link counts - # of # allocation groups done
> + - #:#:#: zeroing log - # of # blocks done
> diff --git a/tests/xfs/group b/tests/xfs/group
> index 12eb55c9..aeeca23f 100644
> --- a/tests/xfs/group
> +++ b/tests/xfs/group
> @@ -513,3 +513,4 @@
>  513 auto mount
>  514 auto quick db
>  515 auto quick quota
> +516 repair

Should be in auto group as well? Only tests in auto (and quick, which is
a sub-set of auto) will be run by default.

Thanks,
Eryu

> -- 
> 2.18.4
Darrick J. Wong May 24, 2020, 5:18 p.m. UTC | #3
On Mon, May 25, 2020 at 12:46:48AM +0800, Eryu Guan wrote:
> On Wed, May 20, 2020 at 01:52:58PM +1000, Donald Douwsma wrote:
> > xfs_repair's interval based progress has been broken for
> > some time, create a test based on dmdelay to stretch out
> > the time and use ag_stride to force parallelism.
> > 
> > Signed-off-by: Donald Douwsma <ddouwsma@redhat.com>
> > ---
> > Changes since v1:
> > - Use _scratch_xfs_repair
> > - Filter only repair output
> > - Make the filter more tolerant of whitespace and plurals
> > - Take golden output from 'xfs_repair: fix progress reporting'
> 
> I saw failures like below, and I'm using v5.7-rc4 kernel and v5.4.0
> xfsprogs, is this expected failure?
> 
> @@ -2,8 +2,6 @@
>  Format and populate
>  Introduce a dmdelay
>  Run repair
> - - #:#:#: Phase #: #% done - estimated remaining time # minute, # second
> - - #:#:#: Phase #: elapsed time # second - processed # inodes per minute
>   - #:#:#: check for inodes claiming duplicate blocks - # of # inodes done
>   - #:#:#: process known inodes and inode discovery - # of # inodes done
>   - #:#:#: process newly discovered inodes - # of # allocation groups done
> @@ -12,4 +10,3 @@
>   - #:#:#: scanning filesystem freespace - # of # allocation groups done
>   - #:#:#: setting up duplicate extent list - # of # allocation groups done
>   - #:#:#: verify and correct link counts - # of # allocation groups done
> - - #:#:#: zeroing log - # of # blocks done

Yeah, there's an xfsprogs patch pending, IIRC.

--D

> 
> > 
> >  tests/xfs/516     | 76 +++++++++++++++++++++++++++++++++++++++++++++++
> >  tests/xfs/516.out | 15 ++++++++++
> >  tests/xfs/group   |  1 +
> >  3 files changed, 92 insertions(+)
> >  create mode 100755 tests/xfs/516
> >  create mode 100644 tests/xfs/516.out
> > 
> > diff --git a/tests/xfs/516 b/tests/xfs/516
> > new file mode 100755
> > index 00000000..1c0508ef
> > --- /dev/null
> > +++ b/tests/xfs/516
> > @@ -0,0 +1,76 @@
> > +#! /bin/bash
> > +# SPDX-License-Identifier: GPL-2.0
> > +# Copyright (c) 2020 Red Hat, Inc.  All Rights Reserved.
> > +#
> > +# FS QA Test 516
> > +#
> > +# Test xfs_repair's progress reporting
> > +#
> > +seq=`basename $0`
> > +seqres=$RESULT_DIR/$seq
> > +echo "QA output created by $seq"
> > +
> > +here=`pwd`
> > +tmp=/tmp/$$
> > +status=1	# failure is the default!
> > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > +
> > +_cleanup()
> > +{
> 
> 	rm -f $tmp.*
> 
> As some common helpers would use $tmp. files as well.
> 
> > +	cd /
> > +	_dmsetup_remove delay-test > /dev/null 2>&1
> 
> I think we could do _cleanup_delay here and discard the outputs.
> 
> > +}
> > +
> > +# get standard environment, filters and checks
> > +. ./common/rc
> > +. ./common/filter
> > +. ./common/dmdelay
> > +. ./common/populate
> > +
> > +# remove previous $seqres.full before test
> > +rm -f $seqres.full
> > +
> > +# real QA test starts here
> > +
> > +# Modify as appropriate.
> > +_supported_fs xfs
> > +_supported_os Linux
> > +_require_scratch
> > +_require_dm_target delay
> > +
> > +# Filter output specific to the formatters in xfs_repair/progress.c
> > +# Ideally we'd like to see hits on anything that matches
> > +# awk '/{FMT/' repair/progress.c
> > +_filter_repair()
> 
> Function names with the leading underscore are reserved for common
> helpers, filter_repair would be fine.
> 
> > +{
> > +	sed -ne '
> > +	s/[0-9]\+/#/g;
> > +	s/^\s\+/ /g;
> > +	s/\(second\|minute\)s/\1/g
> > +	/#:#:#:/p
> > +	'
> > +}
> > +
> > +echo "Format and populate"
> > +_scratch_populate_cached nofill > $seqres.full 2>&1
> > +
> > +echo "Introduce a dmdelay"
> > +_init_delay
> > +
> > +# Introduce a read I/O delay
> > +# The default in common/dmdelay is a bit too agressive
> > +BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
> > +DELAY_TABLE_RDELAY="0 $BLK_DEV_SIZE delay $SCRATCH_DEV 0 100 $SCRATCH_DEV 0 0"
> > +_load_delay_table $DELAY_READ
> > +
> > +echo "Run repair"
> > +SCRATCH_DEV=$DELAY_DEV _scratch_xfs_repair -o ag_stride=4 -t 1 2>&1 |
> > +        tee -a $seqres.full > $seqres.xfs_repair.out
> > +
> > +cat $seqres.xfs_repair.out | _filter_repair | sort -u
> 
> I agreed with Darrick here. redirect output to $tmp.repair is better, as
> we already cleanup $tmp.* in _cleanup, and no one is cleaning up
> $seqres.xfs_repair.out file.
> 
> > +
> > +_cleanup_delay
> 
> We could remove this one if do it in _cleanup.
> 
> > +
> > +# success, all done
> > +status=0
> > +exit
> > diff --git a/tests/xfs/516.out b/tests/xfs/516.out
> > new file mode 100644
> > index 00000000..85018b93
> > --- /dev/null
> > +++ b/tests/xfs/516.out
> > @@ -0,0 +1,15 @@
> > +QA output created by 516
> > +Format and populate
> > +Introduce a dmdelay
> > +Run repair
> > + - #:#:#: Phase #: #% done - estimated remaining time # minute, # second
> > + - #:#:#: Phase #: elapsed time # second - processed # inodes per minute
> > + - #:#:#: check for inodes claiming duplicate blocks - # of # inodes done
> > + - #:#:#: process known inodes and inode discovery - # of # inodes done
> > + - #:#:#: process newly discovered inodes - # of # allocation groups done
> > + - #:#:#: rebuild AG headers and trees - # of # allocation groups done
> > + - #:#:#: scanning agi unlinked lists - # of # allocation groups done
> > + - #:#:#: scanning filesystem freespace - # of # allocation groups done
> > + - #:#:#: setting up duplicate extent list - # of # allocation groups done
> > + - #:#:#: verify and correct link counts - # of # allocation groups done
> > + - #:#:#: zeroing log - # of # blocks done
> > diff --git a/tests/xfs/group b/tests/xfs/group
> > index 12eb55c9..aeeca23f 100644
> > --- a/tests/xfs/group
> > +++ b/tests/xfs/group
> > @@ -513,3 +513,4 @@
> >  513 auto mount
> >  514 auto quick db
> >  515 auto quick quota
> > +516 repair
> 
> Should be in auto group as well? Only tests in auto (and quick, which is
> a sub-set of auto) will be run by default.
> 
> Thanks,
> Eryu
> 
> > -- 
> > 2.18.4
Zorro Lang May 29, 2020, 8:06 a.m. UTC | #4
On Wed, May 20, 2020 at 01:52:58PM +1000, Donald Douwsma wrote:
> xfs_repair's interval based progress has been broken for
> some time, create a test based on dmdelay to stretch out
> the time and use ag_stride to force parallelism.
> 
> Signed-off-by: Donald Douwsma <ddouwsma@redhat.com>
> ---
> Changes since v1:
> - Use _scratch_xfs_repair
> - Filter only repair output
> - Make the filter more tolerant of whitespace and plurals
> - Take golden output from 'xfs_repair: fix progress reporting'
> 
>  tests/xfs/516     | 76 +++++++++++++++++++++++++++++++++++++++++++++++
>  tests/xfs/516.out | 15 ++++++++++
>  tests/xfs/group   |  1 +
>  3 files changed, 92 insertions(+)
>  create mode 100755 tests/xfs/516
>  create mode 100644 tests/xfs/516.out
> 
> diff --git a/tests/xfs/516 b/tests/xfs/516
> new file mode 100755
> index 00000000..1c0508ef
> --- /dev/null
> +++ b/tests/xfs/516
> @@ -0,0 +1,76 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2020 Red Hat, Inc.  All Rights Reserved.
> +#
> +# FS QA Test 516
> +#
> +# Test xfs_repair's progress reporting
> +#
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1	# failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	_dmsetup_remove delay-test > /dev/null 2>&1

How about use the helper, avoid using the 'delay-test' name at here?
_cleanup_delay > /dev/null 2>&1

> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +. ./common/dmdelay
> +. ./common/populate
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +
> +# Modify as appropriate.
> +_supported_fs xfs
> +_supported_os Linux
> +_require_scratch
> +_require_dm_target delay
> +
> +# Filter output specific to the formatters in xfs_repair/progress.c
> +# Ideally we'd like to see hits on anything that matches
> +# awk '/{FMT/' repair/progress.c
> +_filter_repair()
> +{
> +	sed -ne '
> +	s/[0-9]\+/#/g;
> +	s/^\s\+/ /g;
> +	s/\(second\|minute\)s/\1/g
> +	/#:#:#:/p
> +	'
> +}
> +
> +echo "Format and populate"
> +_scratch_populate_cached nofill > $seqres.full 2>&1
> +
> +echo "Introduce a dmdelay"
> +_init_delay
> +
> +# Introduce a read I/O delay
> +# The default in common/dmdelay is a bit too agressive
> +BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
> +DELAY_TABLE_RDELAY="0 $BLK_DEV_SIZE delay $SCRATCH_DEV 0 100 $SCRATCH_DEV 0 0"
> +_load_delay_table $DELAY_READ
> +
> +echo "Run repair"
> +SCRATCH_DEV=$DELAY_DEV _scratch_xfs_repair -o ag_stride=4 -t 1 2>&1 |
> +        tee -a $seqres.full > $seqres.xfs_repair.out
> +
> +cat $seqres.xfs_repair.out | _filter_repair | sort -u
> +
> +_cleanup_delay
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/xfs/516.out b/tests/xfs/516.out
> new file mode 100644
> index 00000000..85018b93
> --- /dev/null
> +++ b/tests/xfs/516.out
> @@ -0,0 +1,15 @@
> +QA output created by 516
> +Format and populate
> +Introduce a dmdelay
> +Run repair
> + - #:#:#: Phase #: #% done - estimated remaining time # minute, # second

I just tested on latest upstream xfsprogs-dev for-next branch, it failed as:
--- /root/git/xfstests-dev/tests/xfs/516.out    2020-05-29 15:31:06.602440261 +0800
+++ /root/git/xfstests-dev/results//xfs/516.out.bad     2020-05-29 15:40:13.401777675 +0800
@@ -3,6 +3,7 @@
 Introduce a dmdelay
 Run repair
  - #:#:#: Phase #: #% done - estimated remaining time # minute, # second
+ - #:#:#: Phase #: #% done - estimated remaining time # second
  - #:#:#: Phase #: elapsed time # second - processed # inodes per minute
  - #:#:#: check for inodes claiming duplicate blocks - # of # inodes done
  - #:#:#: process known inodes and inode discovery - # of # inodes done


> + - #:#:#: Phase #: elapsed time # second - processed # inodes per minute
> + - #:#:#: check for inodes claiming duplicate blocks - # of # inodes done
> + - #:#:#: process known inodes and inode discovery - # of # inodes done
> + - #:#:#: process newly discovered inodes - # of # allocation groups done
> + - #:#:#: rebuild AG headers and trees - # of # allocation groups done
> + - #:#:#: scanning agi unlinked lists - # of # allocation groups done
> + - #:#:#: scanning filesystem freespace - # of # allocation groups done
> + - #:#:#: setting up duplicate extent list - # of # allocation groups done
> + - #:#:#: verify and correct link counts - # of # allocation groups done
> + - #:#:#: zeroing log - # of # blocks done
> diff --git a/tests/xfs/group b/tests/xfs/group
> index 12eb55c9..aeeca23f 100644
> --- a/tests/xfs/group
> +++ b/tests/xfs/group
> @@ -513,3 +513,4 @@
>  513 auto mount
>  514 auto quick db
>  515 auto quick quota
> +516 repair

Is there a reason why this case shouldn't be in auto group?

Thanks,
Zorro

> -- 
> 2.18.4
>
Donald Douwsma June 1, 2020, 12:53 a.m. UTC | #5
Hi Zorro,

On 29/05/2020 18:06, Zorro Lang wrote:
> On Wed, May 20, 2020 at 01:52:58PM +1000, Donald Douwsma wrote:
>> xfs_repair's interval based progress has been broken for
>> some time, create a test based on dmdelay to stretch out
>> the time and use ag_stride to force parallelism.
>>
>> Signed-off-by: Donald Douwsma <ddouwsma@redhat.com>
>> ---
>> Changes since v1:
>> - Use _scratch_xfs_repair
>> - Filter only repair output
>> - Make the filter more tolerant of whitespace and plurals
>> - Take golden output from 'xfs_repair: fix progress reporting'
>>
>>  tests/xfs/516     | 76 +++++++++++++++++++++++++++++++++++++++++++++++
>>  tests/xfs/516.out | 15 ++++++++++
>>  tests/xfs/group   |  1 +
>>  3 files changed, 92 insertions(+)
>>  create mode 100755 tests/xfs/516
>>  create mode 100644 tests/xfs/516.out
>>
>> diff --git a/tests/xfs/516 b/tests/xfs/516
>> new file mode 100755
>> index 00000000..1c0508ef
>> --- /dev/null
>> +++ b/tests/xfs/516
>> @@ -0,0 +1,76 @@
>> +#! /bin/bash
>> +# SPDX-License-Identifier: GPL-2.0
>> +# Copyright (c) 2020 Red Hat, Inc.  All Rights Reserved.
>> +#
>> +# FS QA Test 516
>> +#
>> +# Test xfs_repair's progress reporting
>> +#
>> +seq=`basename $0`
>> +seqres=$RESULT_DIR/$seq
>> +echo "QA output created by $seq"
>> +
>> +here=`pwd`
>> +tmp=/tmp/$$
>> +status=1	# failure is the default!
>> +trap "_cleanup; exit \$status" 0 1 2 3 15
>> +
>> +_cleanup()
>> +{
>> +	cd /
>> +	_dmsetup_remove delay-test > /dev/null 2>&1
> 
> How about use the helper, avoid using the 'delay-test' name at here?
> _cleanup_delay > /dev/null 2>&1
> 
>> +}
>> +
>> +# get standard environment, filters and checks
>> +. ./common/rc
>> +. ./common/filter
>> +. ./common/dmdelay
>> +. ./common/populate
>> +
>> +# remove previous $seqres.full before test
>> +rm -f $seqres.full
>> +
>> +# real QA test starts here
>> +
>> +# Modify as appropriate.
>> +_supported_fs xfs
>> +_supported_os Linux
>> +_require_scratch
>> +_require_dm_target delay
>> +
>> +# Filter output specific to the formatters in xfs_repair/progress.c
>> +# Ideally we'd like to see hits on anything that matches
>> +# awk '/{FMT/' repair/progress.c
>> +_filter_repair()
>> +{
>> +	sed -ne '
>> +	s/[0-9]\+/#/g;
>> +	s/^\s\+/ /g;
>> +	s/\(second\|minute\)s/\1/g
>> +	/#:#:#:/p
>> +	'
>> +}
>> +
>> +echo "Format and populate"
>> +_scratch_populate_cached nofill > $seqres.full 2>&1
>> +
>> +echo "Introduce a dmdelay"
>> +_init_delay
>> +
>> +# Introduce a read I/O delay
>> +# The default in common/dmdelay is a bit too agressive
>> +BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
>> +DELAY_TABLE_RDELAY="0 $BLK_DEV_SIZE delay $SCRATCH_DEV 0 100 $SCRATCH_DEV 0 0"
>> +_load_delay_table $DELAY_READ
>> +
>> +echo "Run repair"
>> +SCRATCH_DEV=$DELAY_DEV _scratch_xfs_repair -o ag_stride=4 -t 1 2>&1 |
>> +        tee -a $seqres.full > $seqres.xfs_repair.out
>> +
>> +cat $seqres.xfs_repair.out | _filter_repair | sort -u
>> +
>> +_cleanup_delay
>> +
>> +# success, all done
>> +status=0
>> +exit
>> diff --git a/tests/xfs/516.out b/tests/xfs/516.out
>> new file mode 100644
>> index 00000000..85018b93
>> --- /dev/null
>> +++ b/tests/xfs/516.out
>> @@ -0,0 +1,15 @@
>> +QA output created by 516
>> +Format and populate
>> +Introduce a dmdelay
>> +Run repair
>> + - #:#:#: Phase #: #% done - estimated remaining time # minute, # second
> 
> I just tested on latest upstream xfsprogs-dev for-next branch, it failed as:
> --- /root/git/xfstests-dev/tests/xfs/516.out    2020-05-29 15:31:06.602440261 +0800
> +++ /root/git/xfstests-dev/results//xfs/516.out.bad     2020-05-29 15:40:13.401777675 +0800
> @@ -3,6 +3,7 @@
>  Introduce a dmdelay
>  Run repair
>   - #:#:#: Phase #: #% done - estimated remaining time # minute, # second
> + - #:#:#: Phase #: #% done - estimated remaining time # second
>   - #:#:#: Phase #: elapsed time # second - processed # inodes per minute
>   - #:#:#: check for inodes claiming duplicate blocks - # of # inodes done
>   - #:#:#: process known inodes and inode discovery - # of # inodes done
> 
> 
>> + - #:#:#: Phase #: elapsed time # second - processed # inodes per minute
>> + - #:#:#: check for inodes claiming duplicate blocks - # of # inodes done
>> + - #:#:#: process known inodes and inode discovery - # of # inodes done
>> + - #:#:#: process newly discovered inodes - # of # allocation groups done
>> + - #:#:#: rebuild AG headers and trees - # of # allocation groups done
>> + - #:#:#: scanning agi unlinked lists - # of # allocation groups done
>> + - #:#:#: scanning filesystem freespace - # of # allocation groups done
>> + - #:#:#: setting up duplicate extent list - # of # allocation groups done
>> + - #:#:#: verify and correct link counts - # of # allocation groups done
>> + - #:#:#: zeroing log - # of # blocks done
>> diff --git a/tests/xfs/group b/tests/xfs/group
>> index 12eb55c9..aeeca23f 100644
>> --- a/tests/xfs/group
>> +++ b/tests/xfs/group
>> @@ -513,3 +513,4 @@
>>  513 auto mount
>>  514 auto quick db
>>  515 auto quick quota
>> +516 repair
> 
> Is there a reason why this case shouldn't be in auto group?
> 
> Thanks,
> Zorro


We could work to wards getting it into auto, I wanted to make sure it
was working ok first.

It takes about 2.5 min to run with the current image used by
_scratch_populate_cached, by its nature it needs time for the progress
code to fire, but that may be ok.

It sometimes leaves the delay-test active, I think because I've I used
_dmsetup_remove in _cleanup instead of _cleanup_delay because the later
unmounts the filesystem, which this test doesnt do, but I'd have to look
into this more so it plays well with other tests like the original
dmdelay unmount test 311.

I wasn't completely happy with the filter, it only checks that any of the
progress messages are printing at least once, which for most can still
just match on the end of phase printing, which always worked. Ideally it
would check that some of these messages print multiple times.

I can work on a V3 if this hasn't merged yet, or a follow up after, thoughts?

Cheers, 
Don
Donald Douwsma June 1, 2020, 3:55 a.m. UTC | #6
On 01/06/2020 10:53, Donald Douwsma wrote:
> Hi Zorro,
> 
> On 29/05/2020 18:06, Zorro Lang wrote:
>> On Wed, May 20, 2020 at 01:52:58PM +1000, Donald Douwsma wrote:

<snip>

>>> --- a/tests/xfs/group
>>> +++ b/tests/xfs/group
>>> @@ -513,3 +513,4 @@
>>>  513 auto mount
>>>  514 auto quick db
>>>  515 auto quick quota
>>> +516 repair
>>
>> Is there a reason why this case shouldn't be in auto group?
>>
>> Thanks,
>> Zorro
> 
> 
> We could work to wards getting it into auto, I wanted to make sure it
> was working ok first.
> 
> It takes about 2.5 min to run with the current image used by
> _scratch_populate_cached, by its nature it needs time for the progress
> code to fire, but that may be ok.
> 
> It sometimes leaves the delay-test active, I think because I've I used
> _dmsetup_remove in _cleanup instead of _cleanup_delay because the later
> unmounts the filesystem, which this test doesnt do, but I'd have to look
> into this more so it plays well with other tests like the original
> dmdelay unmount test 311.

Actually it does clean up delay-test correctly (*cough* I may have been
backgrounding xfs_repair in my xfstests tree while testing something
else).  I have seen it leave delay-test around if terminated with 
ctrl+c, but that seems reasonable if a test is aborted. 

> I wasn't completely happy with the filter, it only checks that any of the
> progress messages are printing at least once, which for most can still
> just match on the end of phase printing, which always worked. Ideally it
> would check that some of these messages print multiple times.
> 
> I can work on a V3 if this hasn't merged yet, or a follow up after, thoughts?
>
Zorro Lang June 11, 2020, 5:03 a.m. UTC | #7
On Mon, Jun 01, 2020 at 01:55:49PM +1000, Donald Douwsma wrote:
> 
> 
> On 01/06/2020 10:53, Donald Douwsma wrote:
> > Hi Zorro,
> > 
> > On 29/05/2020 18:06, Zorro Lang wrote:
> >> On Wed, May 20, 2020 at 01:52:58PM +1000, Donald Douwsma wrote:
> 
> <snip>
> 
> >>> --- a/tests/xfs/group
> >>> +++ b/tests/xfs/group
> >>> @@ -513,3 +513,4 @@
> >>>  513 auto mount
> >>>  514 auto quick db
> >>>  515 auto quick quota
> >>> +516 repair
> >>
> >> Is there a reason why this case shouldn't be in auto group?
> >>
> >> Thanks,
> >> Zorro
> > 
> > 
> > We could work to wards getting it into auto, I wanted to make sure it
> > was working ok first.

I just rechecked the mail list, sorry I missed this email long time (CC me will
make sure I won't miss it next time:)

I think several minutes running time is acceptable to be into auto group, if the
case is stable enough, won't fail unexpected.

> > 
> > It takes about 2.5 min to run with the current image used by
> > _scratch_populate_cached, by its nature it needs time for the progress
> > code to fire, but that may be ok.
> > 
> > It sometimes leaves the delay-test active, I think because I've I used
> > _dmsetup_remove in _cleanup instead of _cleanup_delay because the later
> > unmounts the filesystem, which this test doesnt do, but I'd have to look
> > into this more so it plays well with other tests like the original
> > dmdelay unmount test 311.
> 
> Actually it does clean up delay-test correctly (*cough* I may have been
> backgrounding xfs_repair in my xfstests tree while testing something
> else).  I have seen it leave delay-test around if terminated with 
> ctrl+c, but that seems reasonable if a test is aborted. 

If use a common helper to DO a test, I'd like to use its corresponding
helper to cleanup UNDO it. If there's still something wrong, we can fix the
helpers.

> 
> > I wasn't completely happy with the filter, it only checks that any of the
> > progress messages are printing at least once, which for most can still
> > just match on the end of phase printing, which always worked. Ideally it
> > would check that some of these messages print multiple times.
> > 
> > I can work on a V3 if this hasn't merged yet, or a follow up after, thoughts?

Sure, hope the V3 can improve the output mismatch issue, although the filter is
really boring:)

Thanks,
Zorro

> > 
> 
>

Patch
diff mbox series

diff --git a/tests/xfs/516 b/tests/xfs/516
new file mode 100755
index 00000000..1c0508ef
--- /dev/null
+++ b/tests/xfs/516
@@ -0,0 +1,76 @@ 
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2020 Red Hat, Inc.  All Rights Reserved.
+#
+# FS QA Test 516
+#
+# Test xfs_repair's progress reporting
+#
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+	cd /
+	_dmsetup_remove delay-test > /dev/null 2>&1
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/dmdelay
+. ./common/populate
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+
+# Modify as appropriate.
+_supported_fs xfs
+_supported_os Linux
+_require_scratch
+_require_dm_target delay
+
+# Filter output specific to the formatters in xfs_repair/progress.c
+# Ideally we'd like to see hits on anything that matches
+# awk '/{FMT/' repair/progress.c
+_filter_repair()
+{
+	sed -ne '
+	s/[0-9]\+/#/g;
+	s/^\s\+/ /g;
+	s/\(second\|minute\)s/\1/g
+	/#:#:#:/p
+	'
+}
+
+echo "Format and populate"
+_scratch_populate_cached nofill > $seqres.full 2>&1
+
+echo "Introduce a dmdelay"
+_init_delay
+
+# Introduce a read I/O delay
+# The default in common/dmdelay is a bit too agressive
+BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
+DELAY_TABLE_RDELAY="0 $BLK_DEV_SIZE delay $SCRATCH_DEV 0 100 $SCRATCH_DEV 0 0"
+_load_delay_table $DELAY_READ
+
+echo "Run repair"
+SCRATCH_DEV=$DELAY_DEV _scratch_xfs_repair -o ag_stride=4 -t 1 2>&1 |
+        tee -a $seqres.full > $seqres.xfs_repair.out
+
+cat $seqres.xfs_repair.out | _filter_repair | sort -u
+
+_cleanup_delay
+
+# success, all done
+status=0
+exit
diff --git a/tests/xfs/516.out b/tests/xfs/516.out
new file mode 100644
index 00000000..85018b93
--- /dev/null
+++ b/tests/xfs/516.out
@@ -0,0 +1,15 @@ 
+QA output created by 516
+Format and populate
+Introduce a dmdelay
+Run repair
+ - #:#:#: Phase #: #% done - estimated remaining time # minute, # second
+ - #:#:#: Phase #: elapsed time # second - processed # inodes per minute
+ - #:#:#: check for inodes claiming duplicate blocks - # of # inodes done
+ - #:#:#: process known inodes and inode discovery - # of # inodes done
+ - #:#:#: process newly discovered inodes - # of # allocation groups done
+ - #:#:#: rebuild AG headers and trees - # of # allocation groups done
+ - #:#:#: scanning agi unlinked lists - # of # allocation groups done
+ - #:#:#: scanning filesystem freespace - # of # allocation groups done
+ - #:#:#: setting up duplicate extent list - # of # allocation groups done
+ - #:#:#: verify and correct link counts - # of # allocation groups done
+ - #:#:#: zeroing log - # of # blocks done
diff --git a/tests/xfs/group b/tests/xfs/group
index 12eb55c9..aeeca23f 100644
--- a/tests/xfs/group
+++ b/tests/xfs/group
@@ -513,3 +513,4 @@ 
 513 auto mount
 514 auto quick db
 515 auto quick quota
+516 repair