Message ID | 20160616150146.GA14015@infradead.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Jun 16, 2016 at 08:01:47AM -0700, Christoph Hellwig wrote: > I've run into an odd bug when testing the clone / reflink functionality, > and managed to isolate a testcase not relying on clones. The issue is > that after a lot of direct I/O hitting ENOSPC on a NFS mount I run > into a silly rename that never gets cleaned up after cleaning up the > directory. I have a redhat bug for what sounds like the same behavior. I spent some time looking at it but I don't think I got anywere. https://bugzilla.redhat.com/show_bug.cgi?id=1132610 (But you probably can't see that right now; I'll see if I can open it.) --b. > > Note that so far I've only managed to reproduce it against a Linux > server running XFS (ext4 and btrfs seem fine) despite looking like > a client bug, and although once in a while a single run of the testcase > cause the bug I usually need a few iterations. > > All over it seems like some odd dentry refcount race where we see > a spurious reference for one reason or another. > > > Below is a patch to xfstests to add this reproducer: > > > diff --git a/tests/generic/700 b/tests/generic/700 > new file mode 100755 > index 0000000..33505b0 > --- /dev/null > +++ b/tests/generic/700 > @@ -0,0 +1,130 @@ > +#! /bin/bash > +# FS QA Test No. 700 > +# > +# Dumbed down Copy of the original version of generic/187 that reproduced > +# NFS silly rename issues. > +#----------------------------------------------------------------------- > +# Copyright (c) 2015, Oracle and/or its affiliates. All Rights Reserved. > +# > +# This program is free software; you can redistribute it and/or > +# modify it under the terms of the GNU General Public License as > +# published by the Free Software Foundation. > +# > +# This program is distributed in the hope that it would be useful, > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > +# GNU General Public License for more details. > +# > +# You should have received a copy of the GNU General Public License > +# along with this program; if not, write the Free Software Foundation, > +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA > +#----------------------------------------------------------------------- > + > +seq=`basename $0` > +seqres=$RESULT_DIR/$seq > +echo "QA output created by $seq" > + > +here=`pwd` > +tmp=/tmp/$$ > +status=1 # failure is the default! > +trap "_cleanup; exit \$status" 0 1 2 3 15 > + > +_cleanup() > +{ > + cd / > + rm -rf $tmp.* $testdir > +} > + > +# get standard environment, filters and checks > +. ./common/rc > +. ./common/filter > + > +# real QA test starts here > +_supported_os Linux > +_require_xfs_io_command "falloc" > +_require_xfs_io_command "fpunch" > +test $FSTYP = "btrfs" && _notrun "Can't fragment free space on btrfs." > +_require_odirect > + > +rm -f $seqres.full > + > +_fragment_freesp() > +{ > + file=$1 > + > + # consume nearly all available space (leave ~1MB) > + avail=`_get_available_space $SCRATCH_MNT` > + echo "$avail bytes left" > + filesize=$((avail - 1048576)) > + $XFS_IO_PROG -fc "truncate $filesize" $file > + > + chunks=20 > + chunksizemb=$((filesize / chunks / 1048576)) > + seq 1 $chunks | while read f; do > + echo "$((f * chunksizemb)) file size $f / 20" > + $XFS_IO_PROG -fc "falloc -k $(( (f - 1) * chunksizemb))m ${chunksizemb}m" $file > + done > + > + chunks=100 > + chunksizemb=$((filesize / chunks / 1048576)) > + seq 80 $chunks | while read f; do > + echo "$((f * chunksizemb)) file size $f / $chunks" > + $XFS_IO_PROG -fc "falloc -k $(( (f - 1) * chunksizemb))m ${chunksizemb}m" $file > + done > + > + filesizemb=$((filesize / 1048576)) > + $XFS_IO_PROG -fc "falloc -k 0 ${filesizemb}m" $file > + > + # Try again anyway > + avail=`_get_available_space $SCRATCH_MNT` > + $XFS_IO_PROG -fc "pwrite -S 0x65 0 $avail" ${file}.${i} > + > + # Punch out whatever we need > + seq 1 $((nr * 4)) | while read f; do > + $XFS_IO_PROG -f -c "fpunch $((f * 2 * blksz)) $blksz" $file > + done > +} > + > +echo "Format and mount" > +_scratch_mkfs > $seqres.full 2>&1 > +_scratch_mount >> $seqres.full 2>&1 > + > +testdir=$SCRATCH_MNT/test-$seq > +mkdir $testdir > + > +echo "Create the original files" > +blksz=65536 > +nr=1024 > +filesize=$((blksz * nr)) > +_pwrite_byte 0x61 0 $filesize $testdir/file1 >> $seqres.full > +_pwrite_byte 0x62 0 $filesize $testdir/file2 >> $seqres.full > +seq 0 2 $((nr-1)) | while read f; do > +# _reflink_range $testdir/file1 $((blksz * f)) $testdir/file3 $((blksz * f)) $blksz >> $seqres.full > + touch $testdir/file3 > + _pwrite_byte 0x61 $((blksz * f)) $blksz $testdir/file3.chk >> $seqres.full > +done > +seq 1 2 $((nr-1)) | while read f; do > + touch $testdir/file3 > +# _reflink_range $testdir/file2 $((blksz * f)) $testdir/file3 $((blksz * f)) $blksz >> $seqres.full > + _pwrite_byte 0x62 $((blksz * f)) $blksz $testdir/file3.chk >> $seqres.full > +done > +_scratch_cycle_mount > +_fragment_freesp $testdir/bigfile >> $seqres.full 2>&1 > +filesize=$((blksz * nr)) > +_scratch_cycle_mount > + > +echo "Overwrite" > +cowoff=$((filesize / 4)) > +cowsz=$((filesize / 2)) > +$XFS_IO_PROG -d -f -c "pwrite -S 0x63 -b $cowsz $cowoff $cowsz" $testdir/file3 >> $seqres.full > +_pwrite_byte 0x63 $cowoff $cowsz $testdir/file3.chk >> $seqres.full > + > +# mount / unmount > +_scratch_cycle_mount > + > +echo "Deleting" > +rm -r $testdir > + > +# success, all done > +status=0 > +exit > diff --git a/tests/generic/700.out b/tests/generic/700.out > new file mode 100644 > index 0000000..9f7ce0f > --- /dev/null > +++ b/tests/generic/700.out > @@ -0,0 +1,5 @@ > +QA output created by 700 > +Format and mount > +Create the original files > +Overwrite > +Deleting > diff --git a/tests/generic/group b/tests/generic/group > index 2bd40a1..46a796a 100644 > --- a/tests/generic/group > +++ b/tests/generic/group > @@ -354,6 +354,7 @@ > 349 blockdev quick rw > 350 blockdev quick rw > 351 blockdev quick rw > +700 auto nfs > 923 auto quick clone > 924 auto quick clone > 925 auto quick clone > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jun 16, 2016 at 01:29:52PM -0400, J. Bruce Fields wrote: > On Thu, Jun 16, 2016 at 08:01:47AM -0700, Christoph Hellwig wrote: > > I've run into an odd bug when testing the clone / reflink functionality, > > and managed to isolate a testcase not relying on clones. The issue is > > that after a lot of direct I/O hitting ENOSPC on a NFS mount I run > > into a silly rename that never gets cleaned up after cleaning up the > > directory. > > I have a redhat bug for what sounds like the same behavior. I spent > some time looking at it but I don't think I got anywere. > > https://bugzilla.redhat.com/show_bug.cgi?id=1132610 > > (But you probably can't see that right now; I'll see if I can open it.) I can't see it indeed. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Jun 17, 2016 at 04:31:10AM -0700, Christoph Hellwig wrote: > On Thu, Jun 16, 2016 at 01:29:52PM -0400, J. Bruce Fields wrote: > > On Thu, Jun 16, 2016 at 08:01:47AM -0700, Christoph Hellwig wrote: > > > I've run into an odd bug when testing the clone / reflink functionality, > > > and managed to isolate a testcase not relying on clones. The issue is > > > that after a lot of direct I/O hitting ENOSPC on a NFS mount I run > > > into a silly rename that never gets cleaned up after cleaning up the > > > directory. > > > > I have a redhat bug for what sounds like the same behavior. I spent > > some time looking at it but I don't think I got anywere. > > > > https://bugzilla.redhat.com/show_bug.cgi?id=1132610 > > > > (But you probably can't see that right now; I'll see if I can open it.) > > I can't see it indeed. Fixed. Looking at it, I actually don't think there's much of interest besides reproducers, which you already have. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/tests/generic/700 b/tests/generic/700 new file mode 100755 index 0000000..33505b0 --- /dev/null +++ b/tests/generic/700 @@ -0,0 +1,130 @@ +#! /bin/bash +# FS QA Test No. 700 +# +# Dumbed down Copy of the original version of generic/187 that reproduced +# NFS silly rename issues. +#----------------------------------------------------------------------- +# Copyright (c) 2015, Oracle and/or its affiliates. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#----------------------------------------------------------------------- + +seq=`basename $0` +seqres=$RESULT_DIR/$seq +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! +trap "_cleanup; exit \$status" 0 1 2 3 15 + +_cleanup() +{ + cd / + rm -rf $tmp.* $testdir +} + +# get standard environment, filters and checks +. ./common/rc +. ./common/filter + +# real QA test starts here +_supported_os Linux +_require_xfs_io_command "falloc" +_require_xfs_io_command "fpunch" +test $FSTYP = "btrfs" && _notrun "Can't fragment free space on btrfs." +_require_odirect + +rm -f $seqres.full + +_fragment_freesp() +{ + file=$1 + + # consume nearly all available space (leave ~1MB) + avail=`_get_available_space $SCRATCH_MNT` + echo "$avail bytes left" + filesize=$((avail - 1048576)) + $XFS_IO_PROG -fc "truncate $filesize" $file + + chunks=20 + chunksizemb=$((filesize / chunks / 1048576)) + seq 1 $chunks | while read f; do + echo "$((f * chunksizemb)) file size $f / 20" + $XFS_IO_PROG -fc "falloc -k $(( (f - 1) * chunksizemb))m ${chunksizemb}m" $file + done + + chunks=100 + chunksizemb=$((filesize / chunks / 1048576)) + seq 80 $chunks | while read f; do + echo "$((f * chunksizemb)) file size $f / $chunks" + $XFS_IO_PROG -fc "falloc -k $(( (f - 1) * chunksizemb))m ${chunksizemb}m" $file + done + + filesizemb=$((filesize / 1048576)) + $XFS_IO_PROG -fc "falloc -k 0 ${filesizemb}m" $file + + # Try again anyway + avail=`_get_available_space $SCRATCH_MNT` + $XFS_IO_PROG -fc "pwrite -S 0x65 0 $avail" ${file}.${i} + + # Punch out whatever we need + seq 1 $((nr * 4)) | while read f; do + $XFS_IO_PROG -f -c "fpunch $((f * 2 * blksz)) $blksz" $file + done +} + +echo "Format and mount" +_scratch_mkfs > $seqres.full 2>&1 +_scratch_mount >> $seqres.full 2>&1 + +testdir=$SCRATCH_MNT/test-$seq +mkdir $testdir + +echo "Create the original files" +blksz=65536 +nr=1024 +filesize=$((blksz * nr)) +_pwrite_byte 0x61 0 $filesize $testdir/file1 >> $seqres.full +_pwrite_byte 0x62 0 $filesize $testdir/file2 >> $seqres.full +seq 0 2 $((nr-1)) | while read f; do +# _reflink_range $testdir/file1 $((blksz * f)) $testdir/file3 $((blksz * f)) $blksz >> $seqres.full + touch $testdir/file3 + _pwrite_byte 0x61 $((blksz * f)) $blksz $testdir/file3.chk >> $seqres.full +done +seq 1 2 $((nr-1)) | while read f; do + touch $testdir/file3 +# _reflink_range $testdir/file2 $((blksz * f)) $testdir/file3 $((blksz * f)) $blksz >> $seqres.full + _pwrite_byte 0x62 $((blksz * f)) $blksz $testdir/file3.chk >> $seqres.full +done +_scratch_cycle_mount +_fragment_freesp $testdir/bigfile >> $seqres.full 2>&1 +filesize=$((blksz * nr)) +_scratch_cycle_mount + +echo "Overwrite" +cowoff=$((filesize / 4)) +cowsz=$((filesize / 2)) +$XFS_IO_PROG -d -f -c "pwrite -S 0x63 -b $cowsz $cowoff $cowsz" $testdir/file3 >> $seqres.full +_pwrite_byte 0x63 $cowoff $cowsz $testdir/file3.chk >> $seqres.full + +# mount / unmount +_scratch_cycle_mount + +echo "Deleting" +rm -r $testdir + +# success, all done +status=0 +exit diff --git a/tests/generic/700.out b/tests/generic/700.out new file mode 100644 index 0000000..9f7ce0f --- /dev/null +++ b/tests/generic/700.out @@ -0,0 +1,5 @@ +QA output created by 700 +Format and mount +Create the original files +Overwrite +Deleting diff --git a/tests/generic/group b/tests/generic/group index 2bd40a1..46a796a 100644 --- a/tests/generic/group +++ b/tests/generic/group @@ -354,6 +354,7 @@ 349 blockdev quick rw 350 blockdev quick rw 351 blockdev quick rw +700 auto nfs 923 auto quick clone 924 auto quick clone 925 auto quick clone