Message ID | 164193783590.3008286.3623476203965250828.stgit@magnolia (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | fstests: random fixes | expand |
On Tue, Jan 11, 2022 at 01:50:35PM -0800, Darrick J. Wong wrote: > From: Darrick J. Wong <djwong@kernel.org> > > Add a regression test to check that XFS_IOC_ALLOCSP isn't handing out > stale disk blocks for preallocation. > > Signed-off-by: Darrick J. Wong <djwong@kernel.org> > --- > .gitignore | 1 > src/Makefile | 2 - > src/allocstale.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > tests/xfs/832 | 56 +++++++++++++++++++++++++ > tests/xfs/832.out | 2 + > 5 files changed, 177 insertions(+), 1 deletion(-) > create mode 100644 src/allocstale.c > create mode 100755 tests/xfs/832 > create mode 100644 tests/xfs/832.out > > > diff --git a/.gitignore b/.gitignore > index 65b93307..ba0c572b 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -56,6 +56,7 @@ tags > # src/ binaries > /src/af_unix > /src/alloc > +/src/allocstale > /src/append_reader > /src/append_writer > /src/attr_replace_test > diff --git a/src/Makefile b/src/Makefile > index 1737ed0e..111ce1d9 100644 > --- a/src/Makefile > +++ b/src/Makefile > @@ -18,7 +18,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \ > t_ext4_dax_journal_corruption t_ext4_dax_inline_corruption \ > t_ofd_locks t_mmap_collision mmap-write-concurrent \ > t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \ > - t_mmap_writev_overlap checkpoint_journal mmap-rw-fault > + t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale > > LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ > preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \ > diff --git a/src/allocstale.c b/src/allocstale.c > new file mode 100644 > index 00000000..6253fe4c > --- /dev/null > +++ b/src/allocstale.c > @@ -0,0 +1,117 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2022 Oracle. All Rights Reserved. > + * Author: Darrick J. Wong <djwong@kernel.org> > + * > + * Test program to try to trip over XFS_IOC_ALLOCSP mapping stale disk blocks > + * into a file. > + */ > +#include <xfs/xfs.h> > +#include <stdlib.h> > +#include <stdio.h> > +#include <unistd.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <fcntl.h> > +#include <errno.h> > +#include <unistd.h> > +#include <string.h> > + > +#ifndef XFS_IOC_ALLOCSP > +# define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) > +#endif > + > +int > +main( > + int argc, > + char *argv[]) > +{ > + struct stat sb; > + char *buf, *zeroes; > + unsigned long i; > + unsigned long iterations; > + int fd, ret; > + > + if (argc != 3) { > + fprintf(stderr, "Usage: %s filename iterations\n", argv[0]); > + return 1; > + } > + > + errno = 0; > + iterations = strtoul(argv[2], NULL, 0); > + if (errno) { > + perror(argv[2]); > + return 1; > + } > + > + fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0600); > + if (fd < 0) { > + perror(argv[1]); > + return 1; > + } > + > + ret = fstat(fd, &sb); > + if (ret) { > + perror(argv[1]); > + return 1; > + } > + > + buf = malloc(sb.st_blksize); > + if (!buf) { > + perror("pread buffer"); > + return 1; > + } > + > + zeroes = calloc(1, sb.st_blksize); > + if (!zeroes) { > + perror("zeroes buffer"); > + return 1; > + } > + > + for (i = 1; i <= iterations; i++) { > + struct xfs_flock64 arg = { }; > + ssize_t read_bytes; > + off_t offset = sb.st_blksize * i; > + > + /* Ensure the last block of the file is a hole... */ > + ret = ftruncate(fd, offset - 1); > + if (ret) { > + perror("truncate"); > + return 1; > + } > + > + /* > + * ...then use ALLOCSP to allocate the last block in the file. > + * An unpatched kernel neglects to mark the new mapping > + * unwritten or to zero the ondisk block, so... > + */ > + arg.l_whence = SEEK_SET; > + arg.l_start = offset; > + ret = ioctl(fd, XFS_IOC_ALLOCSP, &arg); > + if (ret < 0) { > + perror("ioctl"); > + return 1; > + } > + > + /* ... we can read old disk contents here. */ > + read_bytes = pread(fd, buf, sb.st_blksize, > + offset - sb.st_blksize); > + if (read_bytes < 0) { > + perror(argv[1]); > + return 1; > + } > + if (read_bytes != sb.st_blksize) { > + fprintf(stderr, "%s: short read of %zd bytes\n", > + argv[1], read_bytes); > + return 1; > + } > + > + if (memcmp(zeroes, buf, sb.st_blksize) != 0) { > + fprintf(stderr, "%s: found junk near offset %zd!\n", > + argv[1], offset - sb.st_blksize); > + return 2; > + } > + } > + > + return 0; > +} > diff --git a/tests/xfs/832 b/tests/xfs/832 > new file mode 100755 > index 00000000..3820ff8c > --- /dev/null > +++ b/tests/xfs/832 > @@ -0,0 +1,56 @@ > +#! /bin/bash > +# SPDX-License-Identifier: GPL-2.0 > +# Copyright (c) 2022 Oracle. All Rights Reserved. > +# > +# FS QA Test 832 > +# > +# Regression test for commit: > +# > +# 983d8e60f508 ("xfs: map unwritten blocks in XFS_IOC_{ALLOC,FREE}SP just like fallocate") > +# > +. ./common/preamble > +_begin_fstest auto quick prealloc > + > +# Import common functions. > +. ./common/filter > + > +# real QA test starts here > + > +# Modify as appropriate. > +_supported_fs xfs > +_require_test > +_require_scratch Added the following require rule on commit. _require_test_program "allocstale" Thanks, Eryu > + > +size_mb=32 > +# Write a known pattern to the disk so that we can detect stale disk blocks > +# being mapped into the file. In the test author's experience, the bug will > +# reproduce within the first 500KB's worth of ALLOCSP calls, so running up > +# to 16MB should suffice. > +$XFS_IO_PROG -d -c "pwrite -S 0x58 -b 8m 0 ${size_mb}m" $SCRATCH_DEV > $seqres.full > +MKFS_OPTIONS="-K $MKFS_OPTIONS" _scratch_mkfs_sized $((size_mb * 1048576)) >> $seqres.full > + > +_scratch_mount > + > +# Force the file to be created on the data device, which we pre-initialized > +# with a known pattern. The bug exists in the generic bmap code, so the choice > +# of backing device does not matter, and ignoring the rt device gets us out of > +# needing to detect things like rt extent size. > +_xfs_force_bdev data $SCRATCH_MNT > +testfile=$SCRATCH_MNT/a > + > +# Allow the test program to expand the file to consume half the free space. > +blksz=$(_get_file_block_size $SCRATCH_MNT) > +iterations=$(( (size_mb / 2) * 1048576 / blksz)) > +echo "Setting up $iterations runs for block size $blksz" >> $seqres.full > + > +# Run reproducer program and dump file contents if we see stale data. Full > +# details are in the source for the C program, but in a nutshell we run ALLOCSP > +# one block at a time to see if it'll give us blocks full of 'X'es. > +$here/src/allocstale $testfile $iterations > +res=$? > +test $res -eq 2 && od -tx1 -Ad -c $testfile > + > +# success, all done > +echo Silence is golden > +status=0 > +exit > diff --git a/tests/xfs/832.out b/tests/xfs/832.out > new file mode 100644 > index 00000000..bb8a6c12 > --- /dev/null > +++ b/tests/xfs/832.out > @@ -0,0 +1,2 @@ > +QA output created by 832 > +Silence is golden
On Tue, Jan 11, 2022 at 01:50:35PM -0800, Darrick J. Wong wrote: > From: Darrick J. Wong <djwong@kernel.org> > > Add a regression test to check that XFS_IOC_ALLOCSP isn't handing out > stale disk blocks for preallocation. > > Signed-off-by: Darrick J. Wong <djwong@kernel.org> > --- Hi Darrick, This case is easily failed on some multi-striped storage[1]. The full output as [2], the out.bad output as [3]. Thanks, Zorro [1] FSTYP -- xfs (non-debug) PLATFORM -- Linux/x86_64 xxx-xxxxx-xx 5.14.0-xxx #1 SMP PREEMPT Fri Jan 14 09:24:44 UTC 2022 MKFS_OPTIONS -- -f -m crc=1,finobt=1,rmapbt=0,reflink=1,bigtime=1,inobtcount=1 -i sparse=1 /dev/sda4 MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/sda4 /mnt/xfstests/scratch xfs/832 _check_xfs_filesystem: filesystem on /dev/sda4 is inconsistent (r) (see /var/lib/xfstests/results//xfs/832.full for details) - output mismatch (see /var/lib/xfstests/results//xfs/832.out.bad) --- tests/xfs/832.out 2022-01-15 22:18:35.175245791 -0500 +++ /var/lib/xfstests/results//xfs/832.out.bad 2022-01-15 22:20:46.630697627 -0500 @@ -1,2 +1,3 @@ QA output created by 832 +ioctl: No space left on device Silence is golden ... (Run 'diff -u /var/lib/xfstests/tests/xfs/832.out /var/lib/xfstests/results//xfs/832.out.bad' to see the entire diff) Ran: xfs/832 Failures: xfs/832 Failed 1 of 1 tests [2] wrote 33554432/33554432 bytes at offset 0 32 MiB, 4 ops; 0.1368 sec (233.848 MiB/sec and 29.2310 ops/sec) meta-data=/dev/sda4 isize=512 agcount=1, agsize=4112 blks = sectsz=512 attr=2, projid32bit=1 = crc=1 finobt=1, sparse=1, rmapbt=0 = reflink=1 bigtime=1 inobtcount=1 data = bsize=4096 blocks=4112, imaxpct=25 = sunit=16 swidth=32 blks naming =version 2 bsize=4096 ascii-ci=0, ftype=1 log =internal log bsize=4096 blocks=1872, version=2 = sectsz=512 sunit=16 blks, lazy-count=1 realtime =none extsz=4096 blocks=0, rtextents=0 Setting up 4096 runs for block size 4096 _check_xfs_filesystem: filesystem on /dev/sda4 is inconsistent (r) *** xfs_repair -n output *** Phase 1 - find and verify superblock... Only one AG detected - cannot validate filesystem geometry. Use the -o force_geometry option to proceed. *** end xfs_repair output ... [3] QA output created by 832 ioctl: No space left on device Silence is golden > .gitignore | 1 > src/Makefile | 2 - > src/allocstale.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > tests/xfs/832 | 56 +++++++++++++++++++++++++ > tests/xfs/832.out | 2 + > 5 files changed, 177 insertions(+), 1 deletion(-) > create mode 100644 src/allocstale.c > create mode 100755 tests/xfs/832 > create mode 100644 tests/xfs/832.out > > > diff --git a/.gitignore b/.gitignore > index 65b93307..ba0c572b 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -56,6 +56,7 @@ tags > # src/ binaries > /src/af_unix > /src/alloc > +/src/allocstale > /src/append_reader > /src/append_writer > /src/attr_replace_test > diff --git a/src/Makefile b/src/Makefile > index 1737ed0e..111ce1d9 100644 > --- a/src/Makefile > +++ b/src/Makefile > @@ -18,7 +18,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \ > t_ext4_dax_journal_corruption t_ext4_dax_inline_corruption \ > t_ofd_locks t_mmap_collision mmap-write-concurrent \ > t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \ > - t_mmap_writev_overlap checkpoint_journal mmap-rw-fault > + t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale > > LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ > preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \ > diff --git a/src/allocstale.c b/src/allocstale.c > new file mode 100644 > index 00000000..6253fe4c > --- /dev/null > +++ b/src/allocstale.c > @@ -0,0 +1,117 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2022 Oracle. All Rights Reserved. > + * Author: Darrick J. Wong <djwong@kernel.org> > + * > + * Test program to try to trip over XFS_IOC_ALLOCSP mapping stale disk blocks > + * into a file. > + */ > +#include <xfs/xfs.h> > +#include <stdlib.h> > +#include <stdio.h> > +#include <unistd.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <fcntl.h> > +#include <errno.h> > +#include <unistd.h> > +#include <string.h> > + > +#ifndef XFS_IOC_ALLOCSP > +# define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) > +#endif > + > +int > +main( > + int argc, > + char *argv[]) > +{ > + struct stat sb; > + char *buf, *zeroes; > + unsigned long i; > + unsigned long iterations; > + int fd, ret; > + > + if (argc != 3) { > + fprintf(stderr, "Usage: %s filename iterations\n", argv[0]); > + return 1; > + } > + > + errno = 0; > + iterations = strtoul(argv[2], NULL, 0); > + if (errno) { > + perror(argv[2]); > + return 1; > + } > + > + fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0600); > + if (fd < 0) { > + perror(argv[1]); > + return 1; > + } > + > + ret = fstat(fd, &sb); > + if (ret) { > + perror(argv[1]); > + return 1; > + } > + > + buf = malloc(sb.st_blksize); > + if (!buf) { > + perror("pread buffer"); > + return 1; > + } > + > + zeroes = calloc(1, sb.st_blksize); > + if (!zeroes) { > + perror("zeroes buffer"); > + return 1; > + } > + > + for (i = 1; i <= iterations; i++) { > + struct xfs_flock64 arg = { }; > + ssize_t read_bytes; > + off_t offset = sb.st_blksize * i; > + > + /* Ensure the last block of the file is a hole... */ > + ret = ftruncate(fd, offset - 1); > + if (ret) { > + perror("truncate"); > + return 1; > + } > + > + /* > + * ...then use ALLOCSP to allocate the last block in the file. > + * An unpatched kernel neglects to mark the new mapping > + * unwritten or to zero the ondisk block, so... > + */ > + arg.l_whence = SEEK_SET; > + arg.l_start = offset; > + ret = ioctl(fd, XFS_IOC_ALLOCSP, &arg); > + if (ret < 0) { > + perror("ioctl"); > + return 1; > + } > + > + /* ... we can read old disk contents here. */ > + read_bytes = pread(fd, buf, sb.st_blksize, > + offset - sb.st_blksize); > + if (read_bytes < 0) { > + perror(argv[1]); > + return 1; > + } > + if (read_bytes != sb.st_blksize) { > + fprintf(stderr, "%s: short read of %zd bytes\n", > + argv[1], read_bytes); > + return 1; > + } > + > + if (memcmp(zeroes, buf, sb.st_blksize) != 0) { > + fprintf(stderr, "%s: found junk near offset %zd!\n", > + argv[1], offset - sb.st_blksize); > + return 2; > + } > + } > + > + return 0; > +} > diff --git a/tests/xfs/832 b/tests/xfs/832 > new file mode 100755 > index 00000000..3820ff8c > --- /dev/null > +++ b/tests/xfs/832 > @@ -0,0 +1,56 @@ > +#! /bin/bash > +# SPDX-License-Identifier: GPL-2.0 > +# Copyright (c) 2022 Oracle. All Rights Reserved. > +# > +# FS QA Test 832 > +# > +# Regression test for commit: > +# > +# 983d8e60f508 ("xfs: map unwritten blocks in XFS_IOC_{ALLOC,FREE}SP just like fallocate") > +# > +. ./common/preamble > +_begin_fstest auto quick prealloc > + > +# Import common functions. > +. ./common/filter > + > +# real QA test starts here > + > +# Modify as appropriate. > +_supported_fs xfs > +_require_test > +_require_scratch > + > +size_mb=32 > +# Write a known pattern to the disk so that we can detect stale disk blocks > +# being mapped into the file. In the test author's experience, the bug will > +# reproduce within the first 500KB's worth of ALLOCSP calls, so running up > +# to 16MB should suffice. > +$XFS_IO_PROG -d -c "pwrite -S 0x58 -b 8m 0 ${size_mb}m" $SCRATCH_DEV > $seqres.full > +MKFS_OPTIONS="-K $MKFS_OPTIONS" _scratch_mkfs_sized $((size_mb * 1048576)) >> $seqres.full > + > +_scratch_mount > + > +# Force the file to be created on the data device, which we pre-initialized > +# with a known pattern. The bug exists in the generic bmap code, so the choice > +# of backing device does not matter, and ignoring the rt device gets us out of > +# needing to detect things like rt extent size. > +_xfs_force_bdev data $SCRATCH_MNT > +testfile=$SCRATCH_MNT/a > + > +# Allow the test program to expand the file to consume half the free space. > +blksz=$(_get_file_block_size $SCRATCH_MNT) > +iterations=$(( (size_mb / 2) * 1048576 / blksz)) > +echo "Setting up $iterations runs for block size $blksz" >> $seqres.full > + > +# Run reproducer program and dump file contents if we see stale data. Full > +# details are in the source for the C program, but in a nutshell we run ALLOCSP > +# one block at a time to see if it'll give us blocks full of 'X'es. > +$here/src/allocstale $testfile $iterations > +res=$? > +test $res -eq 2 && od -tx1 -Ad -c $testfile > + > +# success, all done > +echo Silence is golden > +status=0 > +exit > diff --git a/tests/xfs/832.out b/tests/xfs/832.out > new file mode 100644 > index 00000000..bb8a6c12 > --- /dev/null > +++ b/tests/xfs/832.out > @@ -0,0 +1,2 @@ > +QA output created by 832 > +Silence is golden >
On Tue, Jan 18, 2022 at 11:37:35AM +0800, Zorro Lang wrote: > On Tue, Jan 11, 2022 at 01:50:35PM -0800, Darrick J. Wong wrote: > > From: Darrick J. Wong <djwong@kernel.org> > > > > Add a regression test to check that XFS_IOC_ALLOCSP isn't handing out > > stale disk blocks for preallocation. > > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org> > > --- > > Hi Darrick, > > This case is easily failed on some multi-striped storage[1]. The full output > as [2], the out.bad output as [3]. > > Thanks, > Zorro > > [1] > FSTYP -- xfs (non-debug) > PLATFORM -- Linux/x86_64 xxx-xxxxx-xx 5.14.0-xxx #1 SMP PREEMPT Fri Jan 14 09:24:44 UTC 2022 > MKFS_OPTIONS -- -f -m crc=1,finobt=1,rmapbt=0,reflink=1,bigtime=1,inobtcount=1 -i sparse=1 /dev/sda4 > MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/sda4 /mnt/xfstests/scratch > > xfs/832 _check_xfs_filesystem: filesystem on /dev/sda4 is inconsistent (r) > (see /var/lib/xfstests/results//xfs/832.full for details) > - output mismatch (see /var/lib/xfstests/results//xfs/832.out.bad) > --- tests/xfs/832.out 2022-01-15 22:18:35.175245791 -0500 > +++ /var/lib/xfstests/results//xfs/832.out.bad 2022-01-15 22:20:46.630697627 -0500 > @@ -1,2 +1,3 @@ > QA output created by 832 > +ioctl: No space left on device > Silence is golden > ... > (Run 'diff -u /var/lib/xfstests/tests/xfs/832.out /var/lib/xfstests/results//xfs/832.out.bad' to see the entire diff) > Ran: xfs/832 > Failures: xfs/832 > Failed 1 of 1 tests > > [2] > wrote 33554432/33554432 bytes at offset 0 > 32 MiB, 4 ops; 0.1368 sec (233.848 MiB/sec and 29.2310 ops/sec) > meta-data=/dev/sda4 isize=512 agcount=1, agsize=4112 blks Single AG filesystems aren't a supported configuration. Is sda4 actually 16MB? I'm a little surprised that this was the outcome of "_scratch_mkfs_sized 33554432" on line 32. --D > = sectsz=512 attr=2, projid32bit=1 > = crc=1 finobt=1, sparse=1, rmapbt=0 > = reflink=1 bigtime=1 inobtcount=1 > data = bsize=4096 blocks=4112, imaxpct=25 > = sunit=16 swidth=32 blks > naming =version 2 bsize=4096 ascii-ci=0, ftype=1 > log =internal log bsize=4096 blocks=1872, version=2 > = sectsz=512 sunit=16 blks, lazy-count=1 > realtime =none extsz=4096 blocks=0, rtextents=0 > Setting up 4096 runs for block size 4096 > _check_xfs_filesystem: filesystem on /dev/sda4 is inconsistent (r) > *** xfs_repair -n output *** > Phase 1 - find and verify superblock... > Only one AG detected - cannot validate filesystem geometry. > Use the -o force_geometry option to proceed. > *** end xfs_repair output > ... > > [3] > QA output created by 832 > ioctl: No space left on device > Silence is golden > > > > .gitignore | 1 > > src/Makefile | 2 - > > src/allocstale.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > > tests/xfs/832 | 56 +++++++++++++++++++++++++ > > tests/xfs/832.out | 2 + > > 5 files changed, 177 insertions(+), 1 deletion(-) > > create mode 100644 src/allocstale.c > > create mode 100755 tests/xfs/832 > > create mode 100644 tests/xfs/832.out > > > > > > diff --git a/.gitignore b/.gitignore > > index 65b93307..ba0c572b 100644 > > --- a/.gitignore > > +++ b/.gitignore > > @@ -56,6 +56,7 @@ tags > > # src/ binaries > > /src/af_unix > > /src/alloc > > +/src/allocstale > > /src/append_reader > > /src/append_writer > > /src/attr_replace_test > > diff --git a/src/Makefile b/src/Makefile > > index 1737ed0e..111ce1d9 100644 > > --- a/src/Makefile > > +++ b/src/Makefile > > @@ -18,7 +18,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \ > > t_ext4_dax_journal_corruption t_ext4_dax_inline_corruption \ > > t_ofd_locks t_mmap_collision mmap-write-concurrent \ > > t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \ > > - t_mmap_writev_overlap checkpoint_journal mmap-rw-fault > > + t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale > > > > LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ > > preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \ > > diff --git a/src/allocstale.c b/src/allocstale.c > > new file mode 100644 > > index 00000000..6253fe4c > > --- /dev/null > > +++ b/src/allocstale.c > > @@ -0,0 +1,117 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * Copyright (C) 2022 Oracle. All Rights Reserved. > > + * Author: Darrick J. Wong <djwong@kernel.org> > > + * > > + * Test program to try to trip over XFS_IOC_ALLOCSP mapping stale disk blocks > > + * into a file. > > + */ > > +#include <xfs/xfs.h> > > +#include <stdlib.h> > > +#include <stdio.h> > > +#include <unistd.h> > > +#include <sys/types.h> > > +#include <sys/stat.h> > > +#include <fcntl.h> > > +#include <errno.h> > > +#include <unistd.h> > > +#include <string.h> > > + > > +#ifndef XFS_IOC_ALLOCSP > > +# define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) > > +#endif > > + > > +int > > +main( > > + int argc, > > + char *argv[]) > > +{ > > + struct stat sb; > > + char *buf, *zeroes; > > + unsigned long i; > > + unsigned long iterations; > > + int fd, ret; > > + > > + if (argc != 3) { > > + fprintf(stderr, "Usage: %s filename iterations\n", argv[0]); > > + return 1; > > + } > > + > > + errno = 0; > > + iterations = strtoul(argv[2], NULL, 0); > > + if (errno) { > > + perror(argv[2]); > > + return 1; > > + } > > + > > + fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0600); > > + if (fd < 0) { > > + perror(argv[1]); > > + return 1; > > + } > > + > > + ret = fstat(fd, &sb); > > + if (ret) { > > + perror(argv[1]); > > + return 1; > > + } > > + > > + buf = malloc(sb.st_blksize); > > + if (!buf) { > > + perror("pread buffer"); > > + return 1; > > + } > > + > > + zeroes = calloc(1, sb.st_blksize); > > + if (!zeroes) { > > + perror("zeroes buffer"); > > + return 1; > > + } > > + > > + for (i = 1; i <= iterations; i++) { > > + struct xfs_flock64 arg = { }; > > + ssize_t read_bytes; > > + off_t offset = sb.st_blksize * i; > > + > > + /* Ensure the last block of the file is a hole... */ > > + ret = ftruncate(fd, offset - 1); > > + if (ret) { > > + perror("truncate"); > > + return 1; > > + } > > + > > + /* > > + * ...then use ALLOCSP to allocate the last block in the file. > > + * An unpatched kernel neglects to mark the new mapping > > + * unwritten or to zero the ondisk block, so... > > + */ > > + arg.l_whence = SEEK_SET; > > + arg.l_start = offset; > > + ret = ioctl(fd, XFS_IOC_ALLOCSP, &arg); > > + if (ret < 0) { > > + perror("ioctl"); > > + return 1; > > + } > > + > > + /* ... we can read old disk contents here. */ > > + read_bytes = pread(fd, buf, sb.st_blksize, > > + offset - sb.st_blksize); > > + if (read_bytes < 0) { > > + perror(argv[1]); > > + return 1; > > + } > > + if (read_bytes != sb.st_blksize) { > > + fprintf(stderr, "%s: short read of %zd bytes\n", > > + argv[1], read_bytes); > > + return 1; > > + } > > + > > + if (memcmp(zeroes, buf, sb.st_blksize) != 0) { > > + fprintf(stderr, "%s: found junk near offset %zd!\n", > > + argv[1], offset - sb.st_blksize); > > + return 2; > > + } > > + } > > + > > + return 0; > > +} > > diff --git a/tests/xfs/832 b/tests/xfs/832 > > new file mode 100755 > > index 00000000..3820ff8c > > --- /dev/null > > +++ b/tests/xfs/832 > > @@ -0,0 +1,56 @@ > > +#! /bin/bash > > +# SPDX-License-Identifier: GPL-2.0 > > +# Copyright (c) 2022 Oracle. All Rights Reserved. > > +# > > +# FS QA Test 832 > > +# > > +# Regression test for commit: > > +# > > +# 983d8e60f508 ("xfs: map unwritten blocks in XFS_IOC_{ALLOC,FREE}SP just like fallocate") > > +# > > +. ./common/preamble > > +_begin_fstest auto quick prealloc > > + > > +# Import common functions. > > +. ./common/filter > > + > > +# real QA test starts here > > + > > +# Modify as appropriate. > > +_supported_fs xfs > > +_require_test > > +_require_scratch > > + > > +size_mb=32 > > +# Write a known pattern to the disk so that we can detect stale disk blocks > > +# being mapped into the file. In the test author's experience, the bug will > > +# reproduce within the first 500KB's worth of ALLOCSP calls, so running up > > +# to 16MB should suffice. > > +$XFS_IO_PROG -d -c "pwrite -S 0x58 -b 8m 0 ${size_mb}m" $SCRATCH_DEV > $seqres.full > > +MKFS_OPTIONS="-K $MKFS_OPTIONS" _scratch_mkfs_sized $((size_mb * 1048576)) >> $seqres.full > > + > > +_scratch_mount > > + > > +# Force the file to be created on the data device, which we pre-initialized > > +# with a known pattern. The bug exists in the generic bmap code, so the choice > > +# of backing device does not matter, and ignoring the rt device gets us out of > > +# needing to detect things like rt extent size. > > +_xfs_force_bdev data $SCRATCH_MNT > > +testfile=$SCRATCH_MNT/a > > + > > +# Allow the test program to expand the file to consume half the free space. > > +blksz=$(_get_file_block_size $SCRATCH_MNT) > > +iterations=$(( (size_mb / 2) * 1048576 / blksz)) > > +echo "Setting up $iterations runs for block size $blksz" >> $seqres.full > > + > > +# Run reproducer program and dump file contents if we see stale data. Full > > +# details are in the source for the C program, but in a nutshell we run ALLOCSP > > +# one block at a time to see if it'll give us blocks full of 'X'es. > > +$here/src/allocstale $testfile $iterations > > +res=$? > > +test $res -eq 2 && od -tx1 -Ad -c $testfile > > + > > +# success, all done > > +echo Silence is golden > > +status=0 > > +exit > > diff --git a/tests/xfs/832.out b/tests/xfs/832.out > > new file mode 100644 > > index 00000000..bb8a6c12 > > --- /dev/null > > +++ b/tests/xfs/832.out > > @@ -0,0 +1,2 @@ > > +QA output created by 832 > > +Silence is golden > > >
On Tue, Jan 18, 2022 at 10:09:11AM -0800, Darrick J. Wong wrote: > On Tue, Jan 18, 2022 at 11:37:35AM +0800, Zorro Lang wrote: > > On Tue, Jan 11, 2022 at 01:50:35PM -0800, Darrick J. Wong wrote: > > > From: Darrick J. Wong <djwong@kernel.org> > > > > > > Add a regression test to check that XFS_IOC_ALLOCSP isn't handing out > > > stale disk blocks for preallocation. > > > > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org> > > > --- > > > > Hi Darrick, > > > > This case is easily failed on some multi-striped storage[1]. The full output > > as [2], the out.bad output as [3]. > > > > Thanks, > > Zorro > > > > [1] > > FSTYP -- xfs (non-debug) > > PLATFORM -- Linux/x86_64 xxx-xxxxx-xx 5.14.0-xxx #1 SMP PREEMPT Fri Jan 14 09:24:44 UTC 2022 > > MKFS_OPTIONS -- -f -m crc=1,finobt=1,rmapbt=0,reflink=1,bigtime=1,inobtcount=1 -i sparse=1 /dev/sda4 > > MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/sda4 /mnt/xfstests/scratch > > > > xfs/832 _check_xfs_filesystem: filesystem on /dev/sda4 is inconsistent (r) > > (see /var/lib/xfstests/results//xfs/832.full for details) > > - output mismatch (see /var/lib/xfstests/results//xfs/832.out.bad) > > --- tests/xfs/832.out 2022-01-15 22:18:35.175245791 -0500 > > +++ /var/lib/xfstests/results//xfs/832.out.bad 2022-01-15 22:20:46.630697627 -0500 > > @@ -1,2 +1,3 @@ > > QA output created by 832 > > +ioctl: No space left on device > > Silence is golden > > ... > > (Run 'diff -u /var/lib/xfstests/tests/xfs/832.out /var/lib/xfstests/results//xfs/832.out.bad' to see the entire diff) > > Ran: xfs/832 > > Failures: xfs/832 > > Failed 1 of 1 tests > > > > [2] > > wrote 33554432/33554432 bytes at offset 0 > > 32 MiB, 4 ops; 0.1368 sec (233.848 MiB/sec and 29.2310 ops/sec) > > meta-data=/dev/sda4 isize=512 agcount=1, agsize=4112 blks > > Single AG filesystems aren't a supported configuration. Is sda4 > actually 16MB? I'm a little surprised that this was the outcome of > "_scratch_mkfs_sized 33554432" on line 32. Sure, I'll try to reserve that test machine, to figure out if SCRATCH_DEV is 16MB. BTW some other testing hit log size too small error[1], the SCRATCH_DEV information as [2]. And it can be reproduced manually by [3]. Thanks, Zorro [1] # cat results//xfs/832.out.bad QA output created by 832 log size 4085 blocks too small, minimum size is 4608 blocks ... [2] # mkfs.xfs -f $SCRATCH_DEV meta-data=/dev/mapper/rhel8_hp--xfscratch isize=512 agcount=16, agsize=8192000 blks = sectsz=512 attr=2, projid32bit=1 = crc=1 finobt=1, sparse=1, rmapbt=0 = reflink=1 bigtime=0 inobtcount=0 data = bsize=4096 blocks=131072000, imaxpct=25 = sunit=64 swidth=64 blks naming =version 2 bsize=4096 ascii-ci=0, ftype=1 log =internal log bsize=4096 blocks=64000, version=2 = sectsz=512 sunit=64 blks, lazy-count=1 realtime =none extsz=4096 blocks=0, rtextents=0 [3] # mkfs.xfs -f -d size=33554432 /dev/mapper/rhel_hp--dl380pg8--01-xfscratch log size 4085 blocks too small, minimum size is 4608 blocks Usage: mkfs.xfs /* metadata */ [-m crc=0|1,finobt=0|1,uuid=xxx,rmapbt=0|1,reflink=0|1, inobtcount=0|1,bigtime=0|1] ... > > --D > > > = sectsz=512 attr=2, projid32bit=1 > > = crc=1 finobt=1, sparse=1, rmapbt=0 > > = reflink=1 bigtime=1 inobtcount=1 > > data = bsize=4096 blocks=4112, imaxpct=25 > > = sunit=16 swidth=32 blks > > naming =version 2 bsize=4096 ascii-ci=0, ftype=1 > > log =internal log bsize=4096 blocks=1872, version=2 > > = sectsz=512 sunit=16 blks, lazy-count=1 > > realtime =none extsz=4096 blocks=0, rtextents=0 > > Setting up 4096 runs for block size 4096 > > _check_xfs_filesystem: filesystem on /dev/sda4 is inconsistent (r) > > *** xfs_repair -n output *** > > Phase 1 - find and verify superblock... > > Only one AG detected - cannot validate filesystem geometry. > > Use the -o force_geometry option to proceed. > > *** end xfs_repair output > > ... > > > > [3] > > QA output created by 832 > > ioctl: No space left on device > > Silence is golden > > > > > > > .gitignore | 1 > > > src/Makefile | 2 - > > > src/allocstale.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > > > tests/xfs/832 | 56 +++++++++++++++++++++++++ > > > tests/xfs/832.out | 2 + > > > 5 files changed, 177 insertions(+), 1 deletion(-) > > > create mode 100644 src/allocstale.c > > > create mode 100755 tests/xfs/832 > > > create mode 100644 tests/xfs/832.out > > > > > > > > > diff --git a/.gitignore b/.gitignore > > > index 65b93307..ba0c572b 100644 > > > --- a/.gitignore > > > +++ b/.gitignore > > > @@ -56,6 +56,7 @@ tags > > > # src/ binaries > > > /src/af_unix > > > /src/alloc > > > +/src/allocstale > > > /src/append_reader > > > /src/append_writer > > > /src/attr_replace_test > > > diff --git a/src/Makefile b/src/Makefile > > > index 1737ed0e..111ce1d9 100644 > > > --- a/src/Makefile > > > +++ b/src/Makefile > > > @@ -18,7 +18,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \ > > > t_ext4_dax_journal_corruption t_ext4_dax_inline_corruption \ > > > t_ofd_locks t_mmap_collision mmap-write-concurrent \ > > > t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \ > > > - t_mmap_writev_overlap checkpoint_journal mmap-rw-fault > > > + t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale > > > > > > LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ > > > preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \ > > > diff --git a/src/allocstale.c b/src/allocstale.c > > > new file mode 100644 > > > index 00000000..6253fe4c > > > --- /dev/null > > > +++ b/src/allocstale.c > > > @@ -0,0 +1,117 @@ > > > +// SPDX-License-Identifier: GPL-2.0 > > > +/* > > > + * Copyright (C) 2022 Oracle. All Rights Reserved. > > > + * Author: Darrick J. Wong <djwong@kernel.org> > > > + * > > > + * Test program to try to trip over XFS_IOC_ALLOCSP mapping stale disk blocks > > > + * into a file. > > > + */ > > > +#include <xfs/xfs.h> > > > +#include <stdlib.h> > > > +#include <stdio.h> > > > +#include <unistd.h> > > > +#include <sys/types.h> > > > +#include <sys/stat.h> > > > +#include <fcntl.h> > > > +#include <errno.h> > > > +#include <unistd.h> > > > +#include <string.h> > > > + > > > +#ifndef XFS_IOC_ALLOCSP > > > +# define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) > > > +#endif > > > + > > > +int > > > +main( > > > + int argc, > > > + char *argv[]) > > > +{ > > > + struct stat sb; > > > + char *buf, *zeroes; > > > + unsigned long i; > > > + unsigned long iterations; > > > + int fd, ret; > > > + > > > + if (argc != 3) { > > > + fprintf(stderr, "Usage: %s filename iterations\n", argv[0]); > > > + return 1; > > > + } > > > + > > > + errno = 0; > > > + iterations = strtoul(argv[2], NULL, 0); > > > + if (errno) { > > > + perror(argv[2]); > > > + return 1; > > > + } > > > + > > > + fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0600); > > > + if (fd < 0) { > > > + perror(argv[1]); > > > + return 1; > > > + } > > > + > > > + ret = fstat(fd, &sb); > > > + if (ret) { > > > + perror(argv[1]); > > > + return 1; > > > + } > > > + > > > + buf = malloc(sb.st_blksize); > > > + if (!buf) { > > > + perror("pread buffer"); > > > + return 1; > > > + } > > > + > > > + zeroes = calloc(1, sb.st_blksize); > > > + if (!zeroes) { > > > + perror("zeroes buffer"); > > > + return 1; > > > + } > > > + > > > + for (i = 1; i <= iterations; i++) { > > > + struct xfs_flock64 arg = { }; > > > + ssize_t read_bytes; > > > + off_t offset = sb.st_blksize * i; > > > + > > > + /* Ensure the last block of the file is a hole... */ > > > + ret = ftruncate(fd, offset - 1); > > > + if (ret) { > > > + perror("truncate"); > > > + return 1; > > > + } > > > + > > > + /* > > > + * ...then use ALLOCSP to allocate the last block in the file. > > > + * An unpatched kernel neglects to mark the new mapping > > > + * unwritten or to zero the ondisk block, so... > > > + */ > > > + arg.l_whence = SEEK_SET; > > > + arg.l_start = offset; > > > + ret = ioctl(fd, XFS_IOC_ALLOCSP, &arg); > > > + if (ret < 0) { > > > + perror("ioctl"); > > > + return 1; > > > + } > > > + > > > + /* ... we can read old disk contents here. */ > > > + read_bytes = pread(fd, buf, sb.st_blksize, > > > + offset - sb.st_blksize); > > > + if (read_bytes < 0) { > > > + perror(argv[1]); > > > + return 1; > > > + } > > > + if (read_bytes != sb.st_blksize) { > > > + fprintf(stderr, "%s: short read of %zd bytes\n", > > > + argv[1], read_bytes); > > > + return 1; > > > + } > > > + > > > + if (memcmp(zeroes, buf, sb.st_blksize) != 0) { > > > + fprintf(stderr, "%s: found junk near offset %zd!\n", > > > + argv[1], offset - sb.st_blksize); > > > + return 2; > > > + } > > > + } > > > + > > > + return 0; > > > +} > > > diff --git a/tests/xfs/832 b/tests/xfs/832 > > > new file mode 100755 > > > index 00000000..3820ff8c > > > --- /dev/null > > > +++ b/tests/xfs/832 > > > @@ -0,0 +1,56 @@ > > > +#! /bin/bash > > > +# SPDX-License-Identifier: GPL-2.0 > > > +# Copyright (c) 2022 Oracle. All Rights Reserved. > > > +# > > > +# FS QA Test 832 > > > +# > > > +# Regression test for commit: > > > +# > > > +# 983d8e60f508 ("xfs: map unwritten blocks in XFS_IOC_{ALLOC,FREE}SP just like fallocate") > > > +# > > > +. ./common/preamble > > > +_begin_fstest auto quick prealloc > > > + > > > +# Import common functions. > > > +. ./common/filter > > > + > > > +# real QA test starts here > > > + > > > +# Modify as appropriate. > > > +_supported_fs xfs > > > +_require_test > > > +_require_scratch > > > + > > > +size_mb=32 > > > +# Write a known pattern to the disk so that we can detect stale disk blocks > > > +# being mapped into the file. In the test author's experience, the bug will > > > +# reproduce within the first 500KB's worth of ALLOCSP calls, so running up > > > +# to 16MB should suffice. > > > +$XFS_IO_PROG -d -c "pwrite -S 0x58 -b 8m 0 ${size_mb}m" $SCRATCH_DEV > $seqres.full > > > +MKFS_OPTIONS="-K $MKFS_OPTIONS" _scratch_mkfs_sized $((size_mb * 1048576)) >> $seqres.full > > > + > > > +_scratch_mount > > > + > > > +# Force the file to be created on the data device, which we pre-initialized > > > +# with a known pattern. The bug exists in the generic bmap code, so the choice > > > +# of backing device does not matter, and ignoring the rt device gets us out of > > > +# needing to detect things like rt extent size. > > > +_xfs_force_bdev data $SCRATCH_MNT > > > +testfile=$SCRATCH_MNT/a > > > + > > > +# Allow the test program to expand the file to consume half the free space. > > > +blksz=$(_get_file_block_size $SCRATCH_MNT) > > > +iterations=$(( (size_mb / 2) * 1048576 / blksz)) > > > +echo "Setting up $iterations runs for block size $blksz" >> $seqres.full > > > + > > > +# Run reproducer program and dump file contents if we see stale data. Full > > > +# details are in the source for the C program, but in a nutshell we run ALLOCSP > > > +# one block at a time to see if it'll give us blocks full of 'X'es. > > > +$here/src/allocstale $testfile $iterations > > > +res=$? > > > +test $res -eq 2 && od -tx1 -Ad -c $testfile > > > + > > > +# success, all done > > > +echo Silence is golden > > > +status=0 > > > +exit > > > diff --git a/tests/xfs/832.out b/tests/xfs/832.out > > > new file mode 100644 > > > index 00000000..bb8a6c12 > > > --- /dev/null > > > +++ b/tests/xfs/832.out > > > @@ -0,0 +1,2 @@ > > > +QA output created by 832 > > > +Silence is golden > > > > > >
On Wed, Jan 19, 2022 at 12:07:06PM +0800, Zorro Lang wrote: > On Tue, Jan 18, 2022 at 10:09:11AM -0800, Darrick J. Wong wrote: > > On Tue, Jan 18, 2022 at 11:37:35AM +0800, Zorro Lang wrote: > > > On Tue, Jan 11, 2022 at 01:50:35PM -0800, Darrick J. Wong wrote: > > > > From: Darrick J. Wong <djwong@kernel.org> > > > > > > > > Add a regression test to check that XFS_IOC_ALLOCSP isn't handing out > > > > stale disk blocks for preallocation. > > > > > > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org> > > > > --- > > > > > > Hi Darrick, > > > > > > This case is easily failed on some multi-striped storage[1]. The full output > > > as [2], the out.bad output as [3]. > > > > > > Thanks, > > > Zorro > > > > > > [1] > > > FSTYP -- xfs (non-debug) > > > PLATFORM -- Linux/x86_64 xxx-xxxxx-xx 5.14.0-xxx #1 SMP PREEMPT Fri Jan 14 09:24:44 UTC 2022 > > > MKFS_OPTIONS -- -f -m crc=1,finobt=1,rmapbt=0,reflink=1,bigtime=1,inobtcount=1 -i sparse=1 /dev/sda4 > > > MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/sda4 /mnt/xfstests/scratch > > > > > > xfs/832 _check_xfs_filesystem: filesystem on /dev/sda4 is inconsistent (r) > > > (see /var/lib/xfstests/results//xfs/832.full for details) > > > - output mismatch (see /var/lib/xfstests/results//xfs/832.out.bad) > > > --- tests/xfs/832.out 2022-01-15 22:18:35.175245791 -0500 > > > +++ /var/lib/xfstests/results//xfs/832.out.bad 2022-01-15 22:20:46.630697627 -0500 > > > @@ -1,2 +1,3 @@ > > > QA output created by 832 > > > +ioctl: No space left on device > > > Silence is golden > > > ... > > > (Run 'diff -u /var/lib/xfstests/tests/xfs/832.out /var/lib/xfstests/results//xfs/832.out.bad' to see the entire diff) > > > Ran: xfs/832 > > > Failures: xfs/832 > > > Failed 1 of 1 tests > > > > > > [2] > > > wrote 33554432/33554432 bytes at offset 0 > > > 32 MiB, 4 ops; 0.1368 sec (233.848 MiB/sec and 29.2310 ops/sec) > > > meta-data=/dev/sda4 isize=512 agcount=1, agsize=4112 blks > > > > Single AG filesystems aren't a supported configuration. Is sda4 > > actually 16MB? I'm a little surprised that this was the outcome of > > "_scratch_mkfs_sized 33554432" on line 32. > > Sure, I'll try to reserve that test machine, to figure out if SCRATCH_DEV > is 16MB. > > BTW some other testing hit log size too small error[1], the SCRATCH_DEV > information as [2]. And it can be reproduced manually by [3]. > > Thanks, > Zorro > > [1] > # cat results//xfs/832.out.bad > QA output created by 832 > log size 4085 blocks too small, minimum size is 4608 blocks > ... > > [2] > # mkfs.xfs -f $SCRATCH_DEV > meta-data=/dev/mapper/rhel8_hp--xfscratch isize=512 agcount=16, agsize=8192000 blks > = sectsz=512 attr=2, projid32bit=1 > = crc=1 finobt=1, sparse=1, rmapbt=0 > = reflink=1 bigtime=0 inobtcount=0 > data = bsize=4096 blocks=131072000, imaxpct=25 > = sunit=64 swidth=64 blks > naming =version 2 bsize=4096 ascii-ci=0, ftype=1 > log =internal log bsize=4096 blocks=64000, version=2 > = sectsz=512 sunit=64 blks, lazy-count=1 > realtime =none extsz=4096 blocks=0, rtextents=0 > > [3] > # mkfs.xfs -f -d size=33554432 /dev/mapper/rhel_hp--dl380pg8--01-xfscratch > log size 4085 blocks too small, minimum size is 4608 blocks Echoing a conversation we had on IRC: I think I'll adjust this test to write 256M to the scratch device and create a 256M filesystem to try to avoid all these "fs too small" issues. --D > Usage: mkfs.xfs > /* metadata */ [-m crc=0|1,finobt=0|1,uuid=xxx,rmapbt=0|1,reflink=0|1, > inobtcount=0|1,bigtime=0|1] > ... > > > > > --D > > > > > = sectsz=512 attr=2, projid32bit=1 > > > = crc=1 finobt=1, sparse=1, rmapbt=0 > > > = reflink=1 bigtime=1 inobtcount=1 > > > data = bsize=4096 blocks=4112, imaxpct=25 > > > = sunit=16 swidth=32 blks > > > naming =version 2 bsize=4096 ascii-ci=0, ftype=1 > > > log =internal log bsize=4096 blocks=1872, version=2 > > > = sectsz=512 sunit=16 blks, lazy-count=1 > > > realtime =none extsz=4096 blocks=0, rtextents=0 > > > Setting up 4096 runs for block size 4096 > > > _check_xfs_filesystem: filesystem on /dev/sda4 is inconsistent (r) > > > *** xfs_repair -n output *** > > > Phase 1 - find and verify superblock... > > > Only one AG detected - cannot validate filesystem geometry. > > > Use the -o force_geometry option to proceed. > > > *** end xfs_repair output > > > ... > > > > > > [3] > > > QA output created by 832 > > > ioctl: No space left on device > > > Silence is golden > > > > > > > > > > .gitignore | 1 > > > > src/Makefile | 2 - > > > > src/allocstale.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > > > > tests/xfs/832 | 56 +++++++++++++++++++++++++ > > > > tests/xfs/832.out | 2 + > > > > 5 files changed, 177 insertions(+), 1 deletion(-) > > > > create mode 100644 src/allocstale.c > > > > create mode 100755 tests/xfs/832 > > > > create mode 100644 tests/xfs/832.out > > > > > > > > > > > > diff --git a/.gitignore b/.gitignore > > > > index 65b93307..ba0c572b 100644 > > > > --- a/.gitignore > > > > +++ b/.gitignore > > > > @@ -56,6 +56,7 @@ tags > > > > # src/ binaries > > > > /src/af_unix > > > > /src/alloc > > > > +/src/allocstale > > > > /src/append_reader > > > > /src/append_writer > > > > /src/attr_replace_test > > > > diff --git a/src/Makefile b/src/Makefile > > > > index 1737ed0e..111ce1d9 100644 > > > > --- a/src/Makefile > > > > +++ b/src/Makefile > > > > @@ -18,7 +18,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \ > > > > t_ext4_dax_journal_corruption t_ext4_dax_inline_corruption \ > > > > t_ofd_locks t_mmap_collision mmap-write-concurrent \ > > > > t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \ > > > > - t_mmap_writev_overlap checkpoint_journal mmap-rw-fault > > > > + t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale > > > > > > > > LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ > > > > preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \ > > > > diff --git a/src/allocstale.c b/src/allocstale.c > > > > new file mode 100644 > > > > index 00000000..6253fe4c > > > > --- /dev/null > > > > +++ b/src/allocstale.c > > > > @@ -0,0 +1,117 @@ > > > > +// SPDX-License-Identifier: GPL-2.0 > > > > +/* > > > > + * Copyright (C) 2022 Oracle. All Rights Reserved. > > > > + * Author: Darrick J. Wong <djwong@kernel.org> > > > > + * > > > > + * Test program to try to trip over XFS_IOC_ALLOCSP mapping stale disk blocks > > > > + * into a file. > > > > + */ > > > > +#include <xfs/xfs.h> > > > > +#include <stdlib.h> > > > > +#include <stdio.h> > > > > +#include <unistd.h> > > > > +#include <sys/types.h> > > > > +#include <sys/stat.h> > > > > +#include <fcntl.h> > > > > +#include <errno.h> > > > > +#include <unistd.h> > > > > +#include <string.h> > > > > + > > > > +#ifndef XFS_IOC_ALLOCSP > > > > +# define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) > > > > +#endif > > > > + > > > > +int > > > > +main( > > > > + int argc, > > > > + char *argv[]) > > > > +{ > > > > + struct stat sb; > > > > + char *buf, *zeroes; > > > > + unsigned long i; > > > > + unsigned long iterations; > > > > + int fd, ret; > > > > + > > > > + if (argc != 3) { > > > > + fprintf(stderr, "Usage: %s filename iterations\n", argv[0]); > > > > + return 1; > > > > + } > > > > + > > > > + errno = 0; > > > > + iterations = strtoul(argv[2], NULL, 0); > > > > + if (errno) { > > > > + perror(argv[2]); > > > > + return 1; > > > > + } > > > > + > > > > + fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0600); > > > > + if (fd < 0) { > > > > + perror(argv[1]); > > > > + return 1; > > > > + } > > > > + > > > > + ret = fstat(fd, &sb); > > > > + if (ret) { > > > > + perror(argv[1]); > > > > + return 1; > > > > + } > > > > + > > > > + buf = malloc(sb.st_blksize); > > > > + if (!buf) { > > > > + perror("pread buffer"); > > > > + return 1; > > > > + } > > > > + > > > > + zeroes = calloc(1, sb.st_blksize); > > > > + if (!zeroes) { > > > > + perror("zeroes buffer"); > > > > + return 1; > > > > + } > > > > + > > > > + for (i = 1; i <= iterations; i++) { > > > > + struct xfs_flock64 arg = { }; > > > > + ssize_t read_bytes; > > > > + off_t offset = sb.st_blksize * i; > > > > + > > > > + /* Ensure the last block of the file is a hole... */ > > > > + ret = ftruncate(fd, offset - 1); > > > > + if (ret) { > > > > + perror("truncate"); > > > > + return 1; > > > > + } > > > > + > > > > + /* > > > > + * ...then use ALLOCSP to allocate the last block in the file. > > > > + * An unpatched kernel neglects to mark the new mapping > > > > + * unwritten or to zero the ondisk block, so... > > > > + */ > > > > + arg.l_whence = SEEK_SET; > > > > + arg.l_start = offset; > > > > + ret = ioctl(fd, XFS_IOC_ALLOCSP, &arg); > > > > + if (ret < 0) { > > > > + perror("ioctl"); > > > > + return 1; > > > > + } > > > > + > > > > + /* ... we can read old disk contents here. */ > > > > + read_bytes = pread(fd, buf, sb.st_blksize, > > > > + offset - sb.st_blksize); > > > > + if (read_bytes < 0) { > > > > + perror(argv[1]); > > > > + return 1; > > > > + } > > > > + if (read_bytes != sb.st_blksize) { > > > > + fprintf(stderr, "%s: short read of %zd bytes\n", > > > > + argv[1], read_bytes); > > > > + return 1; > > > > + } > > > > + > > > > + if (memcmp(zeroes, buf, sb.st_blksize) != 0) { > > > > + fprintf(stderr, "%s: found junk near offset %zd!\n", > > > > + argv[1], offset - sb.st_blksize); > > > > + return 2; > > > > + } > > > > + } > > > > + > > > > + return 0; > > > > +} > > > > diff --git a/tests/xfs/832 b/tests/xfs/832 > > > > new file mode 100755 > > > > index 00000000..3820ff8c > > > > --- /dev/null > > > > +++ b/tests/xfs/832 > > > > @@ -0,0 +1,56 @@ > > > > +#! /bin/bash > > > > +# SPDX-License-Identifier: GPL-2.0 > > > > +# Copyright (c) 2022 Oracle. All Rights Reserved. > > > > +# > > > > +# FS QA Test 832 > > > > +# > > > > +# Regression test for commit: > > > > +# > > > > +# 983d8e60f508 ("xfs: map unwritten blocks in XFS_IOC_{ALLOC,FREE}SP just like fallocate") > > > > +# > > > > +. ./common/preamble > > > > +_begin_fstest auto quick prealloc > > > > + > > > > +# Import common functions. > > > > +. ./common/filter > > > > + > > > > +# real QA test starts here > > > > + > > > > +# Modify as appropriate. > > > > +_supported_fs xfs > > > > +_require_test > > > > +_require_scratch > > > > + > > > > +size_mb=32 > > > > +# Write a known pattern to the disk so that we can detect stale disk blocks > > > > +# being mapped into the file. In the test author's experience, the bug will > > > > +# reproduce within the first 500KB's worth of ALLOCSP calls, so running up > > > > +# to 16MB should suffice. > > > > +$XFS_IO_PROG -d -c "pwrite -S 0x58 -b 8m 0 ${size_mb}m" $SCRATCH_DEV > $seqres.full > > > > +MKFS_OPTIONS="-K $MKFS_OPTIONS" _scratch_mkfs_sized $((size_mb * 1048576)) >> $seqres.full > > > > + > > > > +_scratch_mount > > > > + > > > > +# Force the file to be created on the data device, which we pre-initialized > > > > +# with a known pattern. The bug exists in the generic bmap code, so the choice > > > > +# of backing device does not matter, and ignoring the rt device gets us out of > > > > +# needing to detect things like rt extent size. > > > > +_xfs_force_bdev data $SCRATCH_MNT > > > > +testfile=$SCRATCH_MNT/a > > > > + > > > > +# Allow the test program to expand the file to consume half the free space. > > > > +blksz=$(_get_file_block_size $SCRATCH_MNT) > > > > +iterations=$(( (size_mb / 2) * 1048576 / blksz)) > > > > +echo "Setting up $iterations runs for block size $blksz" >> $seqres.full > > > > + > > > > +# Run reproducer program and dump file contents if we see stale data. Full > > > > +# details are in the source for the C program, but in a nutshell we run ALLOCSP > > > > +# one block at a time to see if it'll give us blocks full of 'X'es. > > > > +$here/src/allocstale $testfile $iterations > > > > +res=$? > > > > +test $res -eq 2 && od -tx1 -Ad -c $testfile > > > > + > > > > +# success, all done > > > > +echo Silence is golden > > > > +status=0 > > > > +exit > > > > diff --git a/tests/xfs/832.out b/tests/xfs/832.out > > > > new file mode 100644 > > > > index 00000000..bb8a6c12 > > > > --- /dev/null > > > > +++ b/tests/xfs/832.out > > > > @@ -0,0 +1,2 @@ > > > > +QA output created by 832 > > > > +Silence is golden > > > > > > > > > >
diff --git a/.gitignore b/.gitignore index 65b93307..ba0c572b 100644 --- a/.gitignore +++ b/.gitignore @@ -56,6 +56,7 @@ tags # src/ binaries /src/af_unix /src/alloc +/src/allocstale /src/append_reader /src/append_writer /src/attr_replace_test diff --git a/src/Makefile b/src/Makefile index 1737ed0e..111ce1d9 100644 --- a/src/Makefile +++ b/src/Makefile @@ -18,7 +18,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \ t_ext4_dax_journal_corruption t_ext4_dax_inline_corruption \ t_ofd_locks t_mmap_collision mmap-write-concurrent \ t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \ - t_mmap_writev_overlap checkpoint_journal mmap-rw-fault + t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \ diff --git a/src/allocstale.c b/src/allocstale.c new file mode 100644 index 00000000..6253fe4c --- /dev/null +++ b/src/allocstale.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + * + * Test program to try to trip over XFS_IOC_ALLOCSP mapping stale disk blocks + * into a file. + */ +#include <xfs/xfs.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> + +#ifndef XFS_IOC_ALLOCSP +# define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) +#endif + +int +main( + int argc, + char *argv[]) +{ + struct stat sb; + char *buf, *zeroes; + unsigned long i; + unsigned long iterations; + int fd, ret; + + if (argc != 3) { + fprintf(stderr, "Usage: %s filename iterations\n", argv[0]); + return 1; + } + + errno = 0; + iterations = strtoul(argv[2], NULL, 0); + if (errno) { + perror(argv[2]); + return 1; + } + + fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0600); + if (fd < 0) { + perror(argv[1]); + return 1; + } + + ret = fstat(fd, &sb); + if (ret) { + perror(argv[1]); + return 1; + } + + buf = malloc(sb.st_blksize); + if (!buf) { + perror("pread buffer"); + return 1; + } + + zeroes = calloc(1, sb.st_blksize); + if (!zeroes) { + perror("zeroes buffer"); + return 1; + } + + for (i = 1; i <= iterations; i++) { + struct xfs_flock64 arg = { }; + ssize_t read_bytes; + off_t offset = sb.st_blksize * i; + + /* Ensure the last block of the file is a hole... */ + ret = ftruncate(fd, offset - 1); + if (ret) { + perror("truncate"); + return 1; + } + + /* + * ...then use ALLOCSP to allocate the last block in the file. + * An unpatched kernel neglects to mark the new mapping + * unwritten or to zero the ondisk block, so... + */ + arg.l_whence = SEEK_SET; + arg.l_start = offset; + ret = ioctl(fd, XFS_IOC_ALLOCSP, &arg); + if (ret < 0) { + perror("ioctl"); + return 1; + } + + /* ... we can read old disk contents here. */ + read_bytes = pread(fd, buf, sb.st_blksize, + offset - sb.st_blksize); + if (read_bytes < 0) { + perror(argv[1]); + return 1; + } + if (read_bytes != sb.st_blksize) { + fprintf(stderr, "%s: short read of %zd bytes\n", + argv[1], read_bytes); + return 1; + } + + if (memcmp(zeroes, buf, sb.st_blksize) != 0) { + fprintf(stderr, "%s: found junk near offset %zd!\n", + argv[1], offset - sb.st_blksize); + return 2; + } + } + + return 0; +} diff --git a/tests/xfs/832 b/tests/xfs/832 new file mode 100755 index 00000000..3820ff8c --- /dev/null +++ b/tests/xfs/832 @@ -0,0 +1,56 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2022 Oracle. All Rights Reserved. +# +# FS QA Test 832 +# +# Regression test for commit: +# +# 983d8e60f508 ("xfs: map unwritten blocks in XFS_IOC_{ALLOC,FREE}SP just like fallocate") +# +. ./common/preamble +_begin_fstest auto quick prealloc + +# Import common functions. +. ./common/filter + +# real QA test starts here + +# Modify as appropriate. +_supported_fs xfs +_require_test +_require_scratch + +size_mb=32 +# Write a known pattern to the disk so that we can detect stale disk blocks +# being mapped into the file. In the test author's experience, the bug will +# reproduce within the first 500KB's worth of ALLOCSP calls, so running up +# to 16MB should suffice. +$XFS_IO_PROG -d -c "pwrite -S 0x58 -b 8m 0 ${size_mb}m" $SCRATCH_DEV > $seqres.full +MKFS_OPTIONS="-K $MKFS_OPTIONS" _scratch_mkfs_sized $((size_mb * 1048576)) >> $seqres.full + +_scratch_mount + +# Force the file to be created on the data device, which we pre-initialized +# with a known pattern. The bug exists in the generic bmap code, so the choice +# of backing device does not matter, and ignoring the rt device gets us out of +# needing to detect things like rt extent size. +_xfs_force_bdev data $SCRATCH_MNT +testfile=$SCRATCH_MNT/a + +# Allow the test program to expand the file to consume half the free space. +blksz=$(_get_file_block_size $SCRATCH_MNT) +iterations=$(( (size_mb / 2) * 1048576 / blksz)) +echo "Setting up $iterations runs for block size $blksz" >> $seqres.full + +# Run reproducer program and dump file contents if we see stale data. Full +# details are in the source for the C program, but in a nutshell we run ALLOCSP +# one block at a time to see if it'll give us blocks full of 'X'es. +$here/src/allocstale $testfile $iterations +res=$? +test $res -eq 2 && od -tx1 -Ad -c $testfile + +# success, all done +echo Silence is golden +status=0 +exit diff --git a/tests/xfs/832.out b/tests/xfs/832.out new file mode 100644 index 00000000..bb8a6c12 --- /dev/null +++ b/tests/xfs/832.out @@ -0,0 +1,2 @@ +QA output created by 832 +Silence is golden