diff mbox

generic/409: reflink concurrent operations

Message ID 1487853054-3194-1-git-send-email-nave.vardy@plexistor.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nave Vardy Feb. 23, 2017, 12:30 p.m. UTC
reflink: concurrent operations test

perform read operation on the target file while
doing write or fpunch operations on the reflinks.

Signed-off-by: Nave Vardy <nave.vardy@plexistor.com>
---
 tests/generic/409     | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/generic/409.out |  2 ++
 tests/generic/group   |  1 +
 3 files changed, 100 insertions(+)
 create mode 100644 tests/generic/409
 create mode 100644 tests/generic/409.out

Comments

Darrick J. Wong Feb. 24, 2017, 3:38 a.m. UTC | #1
On Thu, Feb 23, 2017 at 02:30:54PM +0200, Nave Vardy wrote:
> reflink: concurrent operations test
> 
> perform read operation on the target file while
> doing write or fpunch operations on the reflinks.

A read vs. (write|fpunch) race... is this a regression test for a
specific problem you found?

> Signed-off-by: Nave Vardy <nave.vardy@plexistor.com>
> ---
>  tests/generic/409     | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  tests/generic/409.out |  2 ++
>  tests/generic/group   |  1 +
>  3 files changed, 100 insertions(+)
>  create mode 100644 tests/generic/409
>  create mode 100644 tests/generic/409.out
> 
> diff --git a/tests/generic/409 b/tests/generic/409
> new file mode 100644
> index 0000000..eb1da13
> --- /dev/null
> +++ b/tests/generic/409
> @@ -0,0 +1,97 @@
> +#!/bin/bash
> +# FS QA Test No. 409
> +#
> +# test for races between write or fpunch operations on reflinked files
> +# to read operations on the targed file
> +#
> +#-----------------------------------------------------------------------
> +#
> +# Copyright (c) 2017 Plexistor Ltd. All Rights Reserved.
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU General Public License as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it would be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write the Free Software Foundation,
> +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> +#
> +#-----------------------------------------------------------------------
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=0    # success is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	rm -f $tmp.*
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/reflink
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +_supported_fs generic
> +_supported_os Linux
> +_require_scratch_reflink
> +_require_cp_reflink
> +
> +_scratch_mkfs_sized $((400 * 1024 ** 2)) >> $seqres.full 2>&1 || _fail "mkfs failed"

Any particular reason for formatting a 400MB filesystem?

_require_fs_space is customary for "skip test if we don't have this much
free space"

> +_scratch_mount || _fail "mount failed"
> +
> +echo "Silence is golden"

Until something fails, and now you have to go figure out which part of
the test script has the failed command.

It's much easier to diagnose test failures if the test case occasionally
echoes a section heading into the output file so that any error output
can be pinpointed to within a few lines.

> +
> +workfile=${SCRATCH_MNT}/workfile
> +light_clone=${SCRATCH_MNT}/light_clone
> +
> +file_size=$((10 * 1024 * 1024))
> +bs=4096

Um, what if the block size isn't 4k?  Does this test case work on a
filesystem with 64k blocks?  Or weird things like ocfs2 that map
clusters instead of blocks?

_get_file_block_size perhaps?

> +block_num=$((file_size / bs))
> +reflinks_num=20
> +
> +$XFS_IO_PROG -f -c "pwrite 0 $file_size" $workfile >> $seqres.full
> +
> +#create all reflinkfs

"create all reflinks" ?

> +for ((i=1; i<reflinks_num; i++));
> +do
> +	cp --reflink $workfile ${light_clone}_$i

_cp_reflink?

> +done
> +
> +#for each block simultaneously write to all reflinks
> +for ((block_index=0; block_index<block_num; block_index++));
> +do
> +	for ((i=0; i<reflinks_num; i++));
> +	do
> +		if [ $i -eq 0 ]; then
> +			$XFS_IO_PROG -c "pread $((block_index * bs)) $bs" \
> +						$workfile >> $seqres.full &
> +		elif [ $((block_index % 2)) -eq 0 ]; then
> +			$XFS_IO_PROG -c "pwrite $((block_index * bs)) $bs" \
> +					${light_clone}_$i  >> $seqres.full &
> +		else
> +			$XFS_IO_PROG -c "fpunch $((block_index * bs)) $bs" \
> +					${light_clone}_$i >> $seqres.full &

So... is this test only concerned with pread/pwrite/fpunch returning
some kind of error code or crashing the kernel?  Should we check that
the read data is the old shared data, the newly written data, or zeroes?

--D

> +		fi
> +	done
> +	# wait for all threads to join before moving to next index
> +	wait
> +done
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/generic/409.out b/tests/generic/409.out
> new file mode 100644
> index 0000000..6f11537
> --- /dev/null
> +++ b/tests/generic/409.out
> @@ -0,0 +1,2 @@
> +QA output created by 409
> +Silence is golden
> diff --git a/tests/generic/group b/tests/generic/group
> index d14f221..27ff229 100644
> --- a/tests/generic/group
> +++ b/tests/generic/group
> @@ -411,3 +411,4 @@
>  406 auto quick dangerous
>  407 auto quick clone metadata
>  408 auto quick clone dedupe metadata
> +409 auto quick clone
> -- 
> 2.5.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe fstests" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe fstests" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eryu Guan Feb. 24, 2017, 9:47 a.m. UTC | #2
On Thu, Feb 23, 2017 at 02:30:54PM +0200, Nave Vardy wrote:
> reflink: concurrent operations test
> 
> perform read operation on the target file while
> doing write or fpunch operations on the reflinks.

I'm having the same question as Darrick's, is this test reproducing some
kind of bugs for you?

> 
> Signed-off-by: Nave Vardy <nave.vardy@plexistor.com>
> ---
>  tests/generic/409     | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  tests/generic/409.out |  2 ++
>  tests/generic/group   |  1 +
>  3 files changed, 100 insertions(+)
>  create mode 100644 tests/generic/409

0755 file mode for new test. This is done automatically if you use
"./new generic" to setup new test template.

>  create mode 100644 tests/generic/409.out
> 
> diff --git a/tests/generic/409 b/tests/generic/409
> new file mode 100644
> index 0000000..eb1da13
> --- /dev/null
> +++ b/tests/generic/409
> @@ -0,0 +1,97 @@
> +#!/bin/bash
> +# FS QA Test No. 409
> +#
> +# test for races between write or fpunch operations on reflinked files
> +# to read operations on the targed file
> +#
> +#-----------------------------------------------------------------------
> +#
> +# Copyright (c) 2017 Plexistor Ltd. All Rights Reserved.
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU General Public License as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it would be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write the Free Software Foundation,
> +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> +#
> +#-----------------------------------------------------------------------
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=0    # success is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	rm -f $tmp.*
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/reflink
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +_supported_fs generic
> +_supported_os Linux
> +_require_scratch_reflink
> +_require_cp_reflink
> +
> +_scratch_mkfs_sized $((400 * 1024 ** 2)) >> $seqres.full 2>&1 || _fail "mkfs failed"

Same question on the filesystem size.

> +_scratch_mount || _fail "mount failed"
> +
> +echo "Silence is golden"
> +
> +workfile=${SCRATCH_MNT}/workfile
> +light_clone=${SCRATCH_MNT}/light_clone
> +
> +file_size=$((10 * 1024 * 1024))
> +bs=4096
> +block_num=$((file_size / bs))
> +reflinks_num=20
> +
> +$XFS_IO_PROG -f -c "pwrite 0 $file_size" $workfile >> $seqres.full
> +
> +#create all reflinkfs
> +for ((i=1; i<reflinks_num; i++));
> +do
> +	cp --reflink $workfile ${light_clone}_$i
> +done
> +
> +#for each block simultaneously write to all reflinks
> +for ((block_index=0; block_index<block_num; block_index++));
> +do
> +	for ((i=0; i<reflinks_num; i++));
> +	do
> +		if [ $i -eq 0 ]; then
> +			$XFS_IO_PROG -c "pread $((block_index * bs)) $bs" \
> +						$workfile >> $seqres.full &
> +		elif [ $((block_index % 2)) -eq 0 ]; then
> +			$XFS_IO_PROG -c "pwrite $((block_index * bs)) $bs" \
> +					${light_clone}_$i  >> $seqres.full &
> +		else
> +			$XFS_IO_PROG -c "fpunch $((block_index * bs)) $bs" \
> +					${light_clone}_$i >> $seqres.full &
> +		fi

I don't see much concurrency in this loop, for each block_index there's
only one pread process and reflinks_num processes doing pwrite or
fpunch. Seems only pwrites are racing with pwrites and fpunchs are
racing with fpunchs. How about:

for ((block_index=0; block_index<block_num; block_index++)); do
	for ((i=0; i<reflinks_num; i++)); do
		$XFS_IO_PROG -c "pread ..." ... &
		if [ $((i % 2)) -eq 0 ]; then
			$XFS_IO_PROG -c "pwrite ..." ... &
		else
			$XFS_IO_PROG -c "fpunch ..." ... &
		fi
	done
	wait
done

So for every pwrite or fpunch there's a pread racing with it. But
because I don't know if you're reproducing some specific bug with this
loop, I'm not sure if my version still works.

> +	done
> +	# wait for all threads to join before moving to next index
> +	wait
> +done
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/generic/409.out b/tests/generic/409.out
> new file mode 100644
> index 0000000..6f11537
> --- /dev/null
> +++ b/tests/generic/409.out
> @@ -0,0 +1,2 @@
> +QA output created by 409
> +Silence is golden
> diff --git a/tests/generic/group b/tests/generic/group
> index d14f221..27ff229 100644
> --- a/tests/generic/group
> +++ b/tests/generic/group
> @@ -411,3 +411,4 @@
>  406 auto quick dangerous
>  407 auto quick clone metadata
>  408 auto quick clone dedupe metadata
> +409 auto quick clone

If you decide to increase the concurrency/workload, probably the test
will not be quick enough to fit in quick group :)

Thanks,
Eryu

> -- 
> 2.5.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe fstests" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe fstests" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nave Vardy Feb. 27, 2017, 2:26 p.m. UTC | #3
On Fri, 2017-02-24 at 17:47 +0800, Eryu Guan wrote:
> On Thu, Feb 23, 2017 at 02:30:54PM +0200, Nave Vardy wrote:
> > 
> > reflink: concurrent operations test
> > 
> > perform read operation on the target file while
> > doing write or fpunch operations on the reflinks.
> I'm having the same question as Darrick's, is this test reproducing
> some
> kind of bugs for you?
Maybe it would be better if I'll first explain my motivation for
sending the patch. I am working at Plexistor a company that develops a
new file-system for persistent memory. The file-system supports cow,
and we wrote some fstests, we thought to contribute for the community.
For the test itself, it helped us to found a bug in the development of
cow, no other generic test currently exists did.
> 
> > 
> > 
> > Signed-off-by: Nave Vardy <nave.vardy@plexistor.com>
> > ---
> >  tests/generic/409     | 97
> > +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  tests/generic/409.out |  2 ++
> >  tests/generic/group   |  1 +
> >  3 files changed, 100 insertions(+)
> >  create mode 100644 tests/generic/409
> 0755 file mode for new test. This is done automatically if you use
> "./new generic" to setup new test template.
> 
In the next version of the patch I'll use this.
> > 
> >  create mode 100644 tests/generic/409.out
> > 
> > diff --git a/tests/generic/409 b/tests/generic/409
> > new file mode 100644
> > index 0000000..eb1da13
> > --- /dev/null
> > +++ b/tests/generic/409
> > @@ -0,0 +1,97 @@
> > +#!/bin/bash
> > +# FS QA Test No. 409
> > +#
> > +# test for races between write or fpunch operations on reflinked
> > files
> > +# to read operations on the targed file
> > +#
> > +#-----------------------------------------------------------------
> > ------
> > +#
> > +# Copyright (c) 2017 Plexistor Ltd. All Rights Reserved.
> > +#
> > +# This program is free software; you can redistribute it and/or
> > +# modify it under the terms of the GNU General Public License as
> > +# published by the Free Software Foundation.
> > +#
> > +# This program is distributed in the hope that it would be useful,
> > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > +# GNU General Public License for more details.
> > +#
> > +# You should have received a copy of the GNU General Public
> > License
> > +# along with this program; if not, write the Free Software
> > Foundation,
> > +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> > +#
> > +#-----------------------------------------------------------------
> > ------
> > +
> > +seq=`basename $0`
> > +seqres=$RESULT_DIR/$seq
> > +echo "QA output created by $seq"
> > +
> > +here=`pwd`
> > +tmp=/tmp/$$
> > +status=0    # success is the default!
> > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > +
> > +_cleanup()
> > +{
> > +	cd /
> > +	rm -f $tmp.*
> > +}
> > +
> > +# get standard environment, filters and checks
> > +. ./common/rc
> > +. ./common/reflink
> > +
> > +# remove previous $seqres.full before test
> > +rm -f $seqres.full
> > +
> > +# real QA test starts here
> > +_supported_fs generic
> > +_supported_os Linux
> > +_require_scratch_reflink
> > +_require_cp_reflink
> > +
> > +_scratch_mkfs_sized $((400 * 1024 ** 2)) >> $seqres.full 2>&1 ||
> > _fail "mkfs failed"
> Same question on the filesystem size.
> 
I used _scratch_mkfs_sized to avoid enospace in this scenario. 
I took a lager margin than I needed, which I can narrow down.
The targted file is 10mb and I used 20 reflinks, because the tests
writes the reflinks, in the end each will take place of max 10mb (it is
not accurate because I use fpunch for every second block), so we have
21 files sum to max size of 210mb, so I think 250mb will be good
enough.
> > 
> > +_scratch_mount || _fail "mount failed"
> > +
> > +echo "Silence is golden"
> > +
> > +workfile=${SCRATCH_MNT}/workfile
> > +light_clone=${SCRATCH_MNT}/light_clone
> > +
> > +file_size=$((10 * 1024 * 1024))
> > +bs=4096
I used 4096 because this is our file-system copy-on-write granularity,
this is in order to get maximum effect. I'll be able to use
_get_file_block_size().
> > +block_num=$((file_size / bs))
> > +reflinks_num=20
> > +
> > +$XFS_IO_PROG -f -c "pwrite 0 $file_size" $workfile >> $seqres.full
> > +
> > +#create all reflinkfs
> > +for ((i=1; i<reflinks_num; i++));
> > +do
> > +	cp --reflink $workfile ${light_clone}_$i
I'll change this to use _cp_reflink()
> > +done
> > +
> > +#for each block simultaneously write to all reflinks
> > +for ((block_index=0; block_index<block_num; block_index++));
> > +do
> > +	for ((i=0; i<reflinks_num; i++));
> > +	do
> > +		if [ $i -eq 0 ]; then
> > +			$XFS_IO_PROG -c "pread $((block_index *
> > bs)) $bs" \
> > +						$workfile >>
> > $seqres.full &
> > +		elif [ $((block_index % 2)) -eq 0 ]; then
> > +			$XFS_IO_PROG -c "pwrite $((block_index *
> > bs)) $bs" \
> > +					${light_clone}_$i  >>
> > $seqres.full &
> > +		else
> > +			$XFS_IO_PROG -c "fpunch $((block_index *
> > bs)) $bs" \
> > +					${light_clone}_$i >>
> > $seqres.full &
> > +		fi
> I don't see much concurrency in this loop, for each block_index
> there's
> only one pread process and reflinks_num processes doing pwrite or
> fpunch. Seems only pwrites are racing with pwrites and fpunchs are
> racing with fpunchs. How about:
> 
> for ((block_index=0; block_index<block_num; block_index++)); do
> 	for ((i=0; i<reflinks_num; i++)); do
> 		$XFS_IO_PROG -c "pread ..." ... &
> 		if [ $((i % 2)) -eq 0 ]; then
> 			$XFS_IO_PROG -c "pwrite ..." ... &
> 		else
> 			$XFS_IO_PROG -c "fpunch ..." ... &
> 		fi
> 	done
> 	wait
> done
> 
> So for every pwrite or fpunch there's a pread racing with it. But
> because I don't know if you're reproducing some specific bug with
> this
> loop, I'm not sure if my version still works.
> 
I understand your comment and your suggestion, we interntionally didn't
won't to modify the workfile so we just read it and race the pread with
the pwrites of the reflinks (or the fpunchs). we wanted that all
reflinks simultaneously will perform pwrite and in the next iteration
fpunch (and not half reflinks pwrite and the other half fpunch - which
is fine but describes another scenario). I think we can combine your
suggestion with my version, to get more concurrency like this:

for ((block_index=0; block_index<block_num; block_index++)); do
     for ((i=0; i<reflinks_num; i++)); do
          $XFS_IO_PROG -c "pread $((block_index * 	bs)) $bs" \
					$workfile >> $seqres.full &
          if [ $i -ne 0 ]; then
              if [ $((block_index % 2)) -eq 0 ]; then
		  $XFS_IO_PROG -c "pwrite $((block_index * bs)) $bs" \        
                            ${light_clone}_$i >> $seqres.full &
              else
                  $XFS_IO_PROG -c "fpunch $((block_index * bs)) $bs" \
                                    ${light_clone}_$i >> $seqres.full &
              fi
           fi
       wait
       done
done		 
> > +	wait
> > +done
> > +
> > +# success, all done
> > +status=0
> > +exit
> > diff --git a/tests/generic/409.out b/tests/generic/409.out
> > new file mode 100644
> > index 0000000..6f11537
> > --- /dev/null
> > +++ b/tests/generic/409.out
> > @@ -0,0 +1,2 @@
> > +QA output created by 409
> > +Silence is golden
> > diff --git a/tests/generic/group b/tests/generic/group
> > index d14f221..27ff229 100644
> > --- a/tests/generic/group
> > +++ b/tests/generic/group
> > @@ -411,3 +411,4 @@
> >  406 auto quick dangerous
> >  407 auto quick clone metadata
> >  408 auto quick clone dedupe metadata
> > +409 auto quick clone
> If you decide to increase the concurrency/workload, probably the test
> will not be quick enough to fit in quick group :)
> 
It is not too important if it would only be in clone and auto
> Thanks,
> Eryu
> 
> > 
--
To unsubscribe from this list: send the line "unsubscribe fstests" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eryu Guan Feb. 28, 2017, 2:49 a.m. UTC | #4
On Mon, Feb 27, 2017 at 04:26:23PM +0200, nave vardy wrote:
> On Fri, 2017-02-24 at 17:47 +0800, Eryu Guan wrote:
> > On Thu, Feb 23, 2017 at 02:30:54PM +0200, Nave Vardy wrote:
> > > 
> > > reflink: concurrent operations test
> > > 
> > > perform read operation on the target file while
> > > doing write or fpunch operations on the reflinks.
> > I'm having the same question as Darrick's, is this test reproducing
> > some
> > kind of bugs for you?
> Maybe it would be better if I'll first explain my motivation for
> sending the patch. I am working at Plexistor a company that develops a
> new file-system for persistent memory. The file-system supports cow,
> and we wrote some fstests, we thought to contribute for the community.
> For the test itself, it helped us to found a bug in the development of
> cow, no other generic test currently exists did.

Thanks a lot for writing new tests!

> > 
> > > 
> > > 
> > > Signed-off-by: Nave Vardy <nave.vardy@plexistor.com>
> > > ---
> > >  tests/generic/409     | 97
> > > +++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  tests/generic/409.out |  2 ++
> > >  tests/generic/group   |  1 +
> > >  3 files changed, 100 insertions(+)
> > >  create mode 100644 tests/generic/409
> > 0755 file mode for new test. This is done automatically if you use
> > "./new generic" to setup new test template.
> > 
> In the next version of the patch I'll use this.
> > > 
> > >  create mode 100644 tests/generic/409.out
> > > 
> > > diff --git a/tests/generic/409 b/tests/generic/409
> > > new file mode 100644
> > > index 0000000..eb1da13
> > > --- /dev/null
> > > +++ b/tests/generic/409
> > > @@ -0,0 +1,97 @@
> > > +#!/bin/bash
> > > +# FS QA Test No. 409
> > > +#
> > > +# test for races between write or fpunch operations on reflinked
> > > files
> > > +# to read operations on the targed file
> > > +#
> > > +#-----------------------------------------------------------------
> > > ------
> > > +#
> > > +# Copyright (c) 2017 Plexistor Ltd. All Rights Reserved.
> > > +#
> > > +# This program is free software; you can redistribute it and/or
> > > +# modify it under the terms of the GNU General Public License as
> > > +# published by the Free Software Foundation.
> > > +#
> > > +# This program is distributed in the hope that it would be useful,
> > > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > > +# GNU General Public License for more details.
> > > +#
> > > +# You should have received a copy of the GNU General Public
> > > License
> > > +# along with this program; if not, write the Free Software
> > > Foundation,
> > > +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> > > +#
> > > +#-----------------------------------------------------------------
> > > ------
> > > +
> > > +seq=`basename $0`
> > > +seqres=$RESULT_DIR/$seq
> > > +echo "QA output created by $seq"
> > > +
> > > +here=`pwd`
> > > +tmp=/tmp/$$
> > > +status=0    # success is the default!
> > > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > > +
> > > +_cleanup()
> > > +{
> > > +	cd /
> > > +	rm -f $tmp.*
> > > +}
> > > +
> > > +# get standard environment, filters and checks
> > > +. ./common/rc
> > > +. ./common/reflink
> > > +
> > > +# remove previous $seqres.full before test
> > > +rm -f $seqres.full
> > > +
> > > +# real QA test starts here
> > > +_supported_fs generic
> > > +_supported_os Linux
> > > +_require_scratch_reflink
> > > +_require_cp_reflink
> > > +
> > > +_scratch_mkfs_sized $((400 * 1024 ** 2)) >> $seqres.full 2>&1 ||
> > > _fail "mkfs failed"
> > Same question on the filesystem size.
> > 
> I used _scratch_mkfs_sized to avoid enospace in this scenario. 
> I took a lager margin than I needed, which I can narrow down.
> The targted file is 10mb and I used 20 reflinks, because the tests
> writes the reflinks, in the end each will take place of max 10mb (it is
> not accurate because I use fpunch for every second block), so we have
> 21 files sum to max size of 210mb, so I think 250mb will be good
> enough.

Then I think, as Darrick suggested, _require_fs_space() is what you need.

> > > 
> > > +_scratch_mount || _fail "mount failed"
> > > +
> > > +echo "Silence is golden"
> > > +
> > > +workfile=${SCRATCH_MNT}/workfile
> > > +light_clone=${SCRATCH_MNT}/light_clone
> > > +
> > > +file_size=$((10 * 1024 * 1024))
> > > +bs=4096
> I used 4096 because this is our file-system copy-on-write granularity,
> this is in order to get maximum effect. I'll be able to use
> _get_file_block_size().
> > > +block_num=$((file_size / bs))
> > > +reflinks_num=20
> > > +
> > > +$XFS_IO_PROG -f -c "pwrite 0 $file_size" $workfile >> $seqres.full
> > > +
> > > +#create all reflinkfs
> > > +for ((i=1; i<reflinks_num; i++));
> > > +do
> > > +	cp --reflink $workfile ${light_clone}_$i
> I'll change this to use _cp_reflink()
> > > +done
> > > +
> > > +#for each block simultaneously write to all reflinks
> > > +for ((block_index=0; block_index<block_num; block_index++));
> > > +do
> > > +	for ((i=0; i<reflinks_num; i++));
> > > +	do
> > > +		if [ $i -eq 0 ]; then
> > > +			$XFS_IO_PROG -c "pread $((block_index *
> > > bs)) $bs" \
> > > +						$workfile >>
> > > $seqres.full &
> > > +		elif [ $((block_index % 2)) -eq 0 ]; then
> > > +			$XFS_IO_PROG -c "pwrite $((block_index *
> > > bs)) $bs" \
> > > +					${light_clone}_$i  >>
> > > $seqres.full &
> > > +		else
> > > +			$XFS_IO_PROG -c "fpunch $((block_index *
> > > bs)) $bs" \
> > > +					${light_clone}_$i >>
> > > $seqres.full &
> > > +		fi
> > I don't see much concurrency in this loop, for each block_index
> > there's
> > only one pread process and reflinks_num processes doing pwrite or
> > fpunch. Seems only pwrites are racing with pwrites and fpunchs are
> > racing with fpunchs. How about:
> > 
> > for ((block_index=0; block_index<block_num; block_index++)); do
> > 	for ((i=0; i<reflinks_num; i++)); do
> > 		$XFS_IO_PROG -c "pread ..." ... &
> > 		if [ $((i % 2)) -eq 0 ]; then
> > 			$XFS_IO_PROG -c "pwrite ..." ... &
> > 		else
> > 			$XFS_IO_PROG -c "fpunch ..." ... &
> > 		fi
> > 	done
> > 	wait
> > done
> > 
> > So for every pwrite or fpunch there's a pread racing with it. But
> > because I don't know if you're reproducing some specific bug with
> > this
> > loop, I'm not sure if my version still works.
> > 
> I understand your comment and your suggestion, we interntionally didn't
> won't to modify the workfile so we just read it and race the pread with
> the pwrites of the reflinks (or the fpunchs). we wanted that all
> reflinks simultaneously will perform pwrite and in the next iteration
> fpunch (and not half reflinks pwrite and the other half fpunch - which
> is fine but describes another scenario). I think we can combine your
> suggestion with my version, to get more concurrency like this:

Looks reasonable, please add some comments to describe this workload.

> 
> for ((block_index=0; block_index<block_num; block_index++)); do
>      for ((i=0; i<reflinks_num; i++)); do

Perhaps we can start with i=1 then remove the "$i -ne 0" check?

Thanks,
Eryu

>           $XFS_IO_PROG -c "pread $((block_index * 	bs)) $bs" \
> 					$workfile >> $seqres.full &
>           if [ $i -ne 0 ]; then
>               if [ $((block_index % 2)) -eq 0 ]; then
> 		  $XFS_IO_PROG -c "pwrite $((block_index * bs)) $bs" \        
>                             ${light_clone}_$i >> $seqres.full &
>               else
>                   $XFS_IO_PROG -c "fpunch $((block_index * bs)) $bs" \
>                                     ${light_clone}_$i >> $seqres.full &
>               fi
>            fi
>        wait
>        done
> done		 
> > > +	wait
> > > +done
> > > +
> > > +# success, all done
> > > +status=0
> > > +exit
> > > diff --git a/tests/generic/409.out b/tests/generic/409.out
> > > new file mode 100644
> > > index 0000000..6f11537
> > > --- /dev/null
> > > +++ b/tests/generic/409.out
> > > @@ -0,0 +1,2 @@
> > > +QA output created by 409
> > > +Silence is golden
> > > diff --git a/tests/generic/group b/tests/generic/group
> > > index d14f221..27ff229 100644
> > > --- a/tests/generic/group
> > > +++ b/tests/generic/group
> > > @@ -411,3 +411,4 @@
> > >  406 auto quick dangerous
> > >  407 auto quick clone metadata
> > >  408 auto quick clone dedupe metadata
> > > +409 auto quick clone
> > If you decide to increase the concurrency/workload, probably the test
> > will not be quick enough to fit in quick group :)
> > 
> It is not too important if it would only be in clone and auto
> > Thanks,
> > Eryu
> > 
> > > 
--
To unsubscribe from this list: send the line "unsubscribe fstests" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eryu Guan Feb. 28, 2017, 3:34 a.m. UTC | #5
[Didn't see my reply hit the list, resend]

On Mon, Feb 27, 2017 at 04:26:23PM +0200, nave vardy wrote:
> On Fri, 2017-02-24 at 17:47 +0800, Eryu Guan wrote:
> > On Thu, Feb 23, 2017 at 02:30:54PM +0200, Nave Vardy wrote:
> > > 
> > > reflink: concurrent operations test
> > > 
> > > perform read operation on the target file while
> > > doing write or fpunch operations on the reflinks.
> > I'm having the same question as Darrick's, is this test reproducing
> > some
> > kind of bugs for you?
> Maybe it would be better if I'll first explain my motivation for
> sending the patch. I am working at Plexistor a company that develops a
> new file-system for persistent memory. The file-system supports cow,
> and we wrote some fstests, we thought to contribute for the community.
> For the test itself, it helped us to found a bug in the development of
> cow, no other generic test currently exists did.

Thanks a lot for writing new tests!

> > 
> > > 
> > > 
> > > Signed-off-by: Nave Vardy <nave.vardy@plexistor.com>
> > > ---
> > >  tests/generic/409     | 97
> > > +++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  tests/generic/409.out |  2 ++
> > >  tests/generic/group   |  1 +
> > >  3 files changed, 100 insertions(+)
> > >  create mode 100644 tests/generic/409
> > 0755 file mode for new test. This is done automatically if you use
> > "./new generic" to setup new test template.
> > 
> In the next version of the patch I'll use this.
> > > 
> > >  create mode 100644 tests/generic/409.out
> > > 
> > > diff --git a/tests/generic/409 b/tests/generic/409
> > > new file mode 100644
> > > index 0000000..eb1da13
> > > --- /dev/null
> > > +++ b/tests/generic/409
> > > @@ -0,0 +1,97 @@
> > > +#!/bin/bash
> > > +# FS QA Test No. 409
> > > +#
> > > +# test for races between write or fpunch operations on reflinked
> > > files
> > > +# to read operations on the targed file
> > > +#
> > > +#-----------------------------------------------------------------
> > > ------
> > > +#
> > > +# Copyright (c) 2017 Plexistor Ltd. All Rights Reserved.
> > > +#
> > > +# This program is free software; you can redistribute it and/or
> > > +# modify it under the terms of the GNU General Public License as
> > > +# published by the Free Software Foundation.
> > > +#
> > > +# This program is distributed in the hope that it would be useful,
> > > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > > +# GNU General Public License for more details.
> > > +#
> > > +# You should have received a copy of the GNU General Public
> > > License
> > > +# along with this program; if not, write the Free Software
> > > Foundation,
> > > +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> > > +#
> > > +#-----------------------------------------------------------------
> > > ------
> > > +
> > > +seq=`basename $0`
> > > +seqres=$RESULT_DIR/$seq
> > > +echo "QA output created by $seq"
> > > +
> > > +here=`pwd`
> > > +tmp=/tmp/$$
> > > +status=0    # success is the default!
> > > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > > +
> > > +_cleanup()
> > > +{
> > > +	cd /
> > > +	rm -f $tmp.*
> > > +}
> > > +
> > > +# get standard environment, filters and checks
> > > +. ./common/rc
> > > +. ./common/reflink
> > > +
> > > +# remove previous $seqres.full before test
> > > +rm -f $seqres.full
> > > +
> > > +# real QA test starts here
> > > +_supported_fs generic
> > > +_supported_os Linux
> > > +_require_scratch_reflink
> > > +_require_cp_reflink
> > > +
> > > +_scratch_mkfs_sized $((400 * 1024 ** 2)) >> $seqres.full 2>&1 ||
> > > _fail "mkfs failed"
> > Same question on the filesystem size.
> > 
> I used _scratch_mkfs_sized to avoid enospace in this scenario. 
> I took a lager margin than I needed, which I can narrow down.
> The targted file is 10mb and I used 20 reflinks, because the tests
> writes the reflinks, in the end each will take place of max 10mb (it is
> not accurate because I use fpunch for every second block), so we have
> 21 files sum to max size of 210mb, so I think 250mb will be good
> enough.

Then I think, as Darrick suggested, _require_fs_space() is what you need.

> > > 
> > > +_scratch_mount || _fail "mount failed"
> > > +
> > > +echo "Silence is golden"
> > > +
> > > +workfile=${SCRATCH_MNT}/workfile
> > > +light_clone=${SCRATCH_MNT}/light_clone
> > > +
> > > +file_size=$((10 * 1024 * 1024))
> > > +bs=4096
> I used 4096 because this is our file-system copy-on-write granularity,
> this is in order to get maximum effect. I'll be able to use
> _get_file_block_size().
> > > +block_num=$((file_size / bs))
> > > +reflinks_num=20
> > > +
> > > +$XFS_IO_PROG -f -c "pwrite 0 $file_size" $workfile >> $seqres.full
> > > +
> > > +#create all reflinkfs
> > > +for ((i=1; i<reflinks_num; i++));
> > > +do
> > > +	cp --reflink $workfile ${light_clone}_$i
> I'll change this to use _cp_reflink()
> > > +done
> > > +
> > > +#for each block simultaneously write to all reflinks
> > > +for ((block_index=0; block_index<block_num; block_index++));
> > > +do
> > > +	for ((i=0; i<reflinks_num; i++));
> > > +	do
> > > +		if [ $i -eq 0 ]; then
> > > +			$XFS_IO_PROG -c "pread $((block_index *
> > > bs)) $bs" \
> > > +						$workfile >>
> > > $seqres.full &
> > > +		elif [ $((block_index % 2)) -eq 0 ]; then
> > > +			$XFS_IO_PROG -c "pwrite $((block_index *
> > > bs)) $bs" \
> > > +					${light_clone}_$i  >>
> > > $seqres.full &
> > > +		else
> > > +			$XFS_IO_PROG -c "fpunch $((block_index *
> > > bs)) $bs" \
> > > +					${light_clone}_$i >>
> > > $seqres.full &
> > > +		fi
> > I don't see much concurrency in this loop, for each block_index
> > there's
> > only one pread process and reflinks_num processes doing pwrite or
> > fpunch. Seems only pwrites are racing with pwrites and fpunchs are
> > racing with fpunchs. How about:
> > 
> > for ((block_index=0; block_index<block_num; block_index++)); do
> > 	for ((i=0; i<reflinks_num; i++)); do
> > 		$XFS_IO_PROG -c "pread ..." ... &
> > 		if [ $((i % 2)) -eq 0 ]; then
> > 			$XFS_IO_PROG -c "pwrite ..." ... &
> > 		else
> > 			$XFS_IO_PROG -c "fpunch ..." ... &
> > 		fi
> > 	done
> > 	wait
> > done
> > 
> > So for every pwrite or fpunch there's a pread racing with it. But
> > because I don't know if you're reproducing some specific bug with
> > this
> > loop, I'm not sure if my version still works.
> > 
> I understand your comment and your suggestion, we interntionally didn't
> won't to modify the workfile so we just read it and race the pread with
> the pwrites of the reflinks (or the fpunchs). we wanted that all
> reflinks simultaneously will perform pwrite and in the next iteration
> fpunch (and not half reflinks pwrite and the other half fpunch - which
> is fine but describes another scenario). I think we can combine your
> suggestion with my version, to get more concurrency like this:

Looks reasonable, please add some comments to describe this workload.

> 
> for ((block_index=0; block_index<block_num; block_index++)); do
>      for ((i=0; i<reflinks_num; i++)); do

Perhaps we can start with i=1 then remove the "$i -ne 0" check?

Thanks,
Eryu

>           $XFS_IO_PROG -c "pread $((block_index * 	bs)) $bs" \
> 					$workfile >> $seqres.full &
>           if [ $i -ne 0 ]; then
>               if [ $((block_index % 2)) -eq 0 ]; then
> 		  $XFS_IO_PROG -c "pwrite $((block_index * bs)) $bs" \        
>                             ${light_clone}_$i >> $seqres.full &
>               else
>                   $XFS_IO_PROG -c "fpunch $((block_index * bs)) $bs" \
>                                     ${light_clone}_$i >> $seqres.full &
>               fi
>            fi
>        wait
>        done
> done		 
> > > +	wait
> > > +done
> > > +
> > > +# success, all done
> > > +status=0
> > > +exit
> > > diff --git a/tests/generic/409.out b/tests/generic/409.out
> > > new file mode 100644
> > > index 0000000..6f11537
> > > --- /dev/null
> > > +++ b/tests/generic/409.out
> > > @@ -0,0 +1,2 @@
> > > +QA output created by 409
> > > +Silence is golden
> > > diff --git a/tests/generic/group b/tests/generic/group
> > > index d14f221..27ff229 100644
> > > --- a/tests/generic/group
> > > +++ b/tests/generic/group
> > > @@ -411,3 +411,4 @@
> > >  406 auto quick dangerous
> > >  407 auto quick clone metadata
> > >  408 auto quick clone dedupe metadata
> > > +409 auto quick clone
> > If you decide to increase the concurrency/workload, probably the test
> > will not be quick enough to fit in quick group :)
> > 
> It is not too important if it would only be in clone and auto
> > Thanks,
> > Eryu
> > 
> > > 
--
To unsubscribe from this list: send the line "unsubscribe fstests" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/tests/generic/409 b/tests/generic/409
new file mode 100644
index 0000000..eb1da13
--- /dev/null
+++ b/tests/generic/409
@@ -0,0 +1,97 @@ 
+#!/bin/bash
+# FS QA Test No. 409
+#
+# test for races between write or fpunch operations on reflinked files
+# to read operations on the targed file
+#
+#-----------------------------------------------------------------------
+#
+# Copyright (c) 2017 Plexistor Ltd. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#
+#-----------------------------------------------------------------------
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=0    # success is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+	cd /
+	rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/reflink
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+_supported_fs generic
+_supported_os Linux
+_require_scratch_reflink
+_require_cp_reflink
+
+_scratch_mkfs_sized $((400 * 1024 ** 2)) >> $seqres.full 2>&1 || _fail "mkfs failed"
+_scratch_mount || _fail "mount failed"
+
+echo "Silence is golden"
+
+workfile=${SCRATCH_MNT}/workfile
+light_clone=${SCRATCH_MNT}/light_clone
+
+file_size=$((10 * 1024 * 1024))
+bs=4096
+block_num=$((file_size / bs))
+reflinks_num=20
+
+$XFS_IO_PROG -f -c "pwrite 0 $file_size" $workfile >> $seqres.full
+
+#create all reflinkfs
+for ((i=1; i<reflinks_num; i++));
+do
+	cp --reflink $workfile ${light_clone}_$i
+done
+
+#for each block simultaneously write to all reflinks
+for ((block_index=0; block_index<block_num; block_index++));
+do
+	for ((i=0; i<reflinks_num; i++));
+	do
+		if [ $i -eq 0 ]; then
+			$XFS_IO_PROG -c "pread $((block_index * bs)) $bs" \
+						$workfile >> $seqres.full &
+		elif [ $((block_index % 2)) -eq 0 ]; then
+			$XFS_IO_PROG -c "pwrite $((block_index * bs)) $bs" \
+					${light_clone}_$i  >> $seqres.full &
+		else
+			$XFS_IO_PROG -c "fpunch $((block_index * bs)) $bs" \
+					${light_clone}_$i >> $seqres.full &
+		fi
+	done
+	# wait for all threads to join before moving to next index
+	wait
+done
+
+# success, all done
+status=0
+exit
diff --git a/tests/generic/409.out b/tests/generic/409.out
new file mode 100644
index 0000000..6f11537
--- /dev/null
+++ b/tests/generic/409.out
@@ -0,0 +1,2 @@ 
+QA output created by 409
+Silence is golden
diff --git a/tests/generic/group b/tests/generic/group
index d14f221..27ff229 100644
--- a/tests/generic/group
+++ b/tests/generic/group
@@ -411,3 +411,4 @@ 
 406 auto quick dangerous
 407 auto quick clone metadata
 408 auto quick clone dedupe metadata
+409 auto quick clone