Message ID | 652ec55049e94a59f66f4112fb8707629db3001d.1722008942.git.fdmanana@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | generic: test page fault during direct IO write with O_APPEND | expand |
On Fri, Jul 26, 2024 at 04:55:46PM +0100, fdmanana@kernel.org wrote: > From: Filipe Manana <fdmanana@suse.com> > > Test that doing a direct IO append write to a file when the input buffer > was not yet faulted in, does not result in an incorrect file size. > > This exercises a bug on btrfs reported by users and which is fixed by > the following kernel patch: > > "btrfs: fix corruption after buffer fault in during direct IO append write" > > Signed-off-by: Filipe Manana <fdmanana@suse.com> > --- > .gitignore | 1 + > src/Makefile | 2 +- > src/dio-append-buf-fault.c | 131 +++++++++++++++++++++++++++++++++++++ > tests/generic/362 | 28 ++++++++ > tests/generic/362.out | 2 + > 5 files changed, 163 insertions(+), 1 deletion(-) > create mode 100644 src/dio-append-buf-fault.c > create mode 100755 tests/generic/362 > create mode 100644 tests/generic/362.out > > diff --git a/.gitignore b/.gitignore > index b5f15162..97c7e001 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -72,6 +72,7 @@ tags > /src/deduperace > /src/detached_mounts_propagation > /src/devzero > +/src/dio-append-buf-fault > /src/dio-buf-fault > /src/dio-interleaved > /src/dio-invalidate-cache > diff --git a/src/Makefile b/src/Makefile > index 99796137..559209be 100644 > --- a/src/Makefile > +++ b/src/Makefile > @@ -20,7 +20,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \ > t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \ > t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \ > t_mmap_cow_memory_failure fake-dump-rootino dio-buf-fault rewinddir-test \ > - readdir-while-renames > + readdir-while-renames dio-append-buf-fault > > LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ > preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \ > diff --git a/src/dio-append-buf-fault.c b/src/dio-append-buf-fault.c > new file mode 100644 > index 00000000..f4be4845 > --- /dev/null > +++ b/src/dio-append-buf-fault.c > @@ -0,0 +1,131 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (c) 2024 SUSE Linux Products GmbH. All Rights Reserved. > + */ > + > +/* > + * Test a direct IO write in append mode with a buffer that was not faulted in > + * (or just partially) before the write. > + */ > + > +/* Get the O_DIRECT definition. */ > +#ifndef _GNU_SOURCE > +#define _GNU_SOURCE > +#endif > + > +#include <stdio.h> > +#include <stdlib.h> > +#include <unistd.h> > +#include <stdint.h> > +#include <fcntl.h> > +#include <errno.h> > +#include <string.h> > +#include <sys/mman.h> > +#include <sys/stat.h> > + > +int main(int argc, char *argv[]) > +{ > + struct stat stbuf; > + int fd; > + long pagesize; > + void *buf; > + ssize_t ret; > + > + if (argc != 2) { > + fprintf(stderr, "Use: %s <file path>\n", argv[0]); > + return 1; > + } > + > + /* > + * First try an append write against an empty file of a buffer with a > + * size matching the page size. The buffer is not faulted in before > + * attempting the write. > + */ > + > + fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666); > + if (fd == -1) { > + perror("Failed to open/create file"); > + return 2; > + } > + > + pagesize = sysconf(_SC_PAGE_SIZE); > + if (pagesize == -1) { > + perror("Failed to get page size"); > + return 3; > + } > + > + buf = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, > + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); > + if (buf == MAP_FAILED) { > + perror("Failed to allocate first buffer"); > + return 4; > + } > + > + ret = write(fd, buf, pagesize); > + if (ret < 0) { > + perror("First write failed"); > + return 5; > + } > + > + ret = fstat(fd, &stbuf); > + if (ret < 0) { > + perror("First stat failed"); > + return 6; > + } > + > + if (stbuf.st_size != pagesize) { > + fprintf(stderr, > + "Wrong file size after first write, got %jd expected %ld\n", > + (intmax_t)stbuf.st_size, pagesize); > + return 7; > + } > + > + munmap(buf, pagesize); > + close(fd); > + > + /* > + * Now try an append write against an empty file of a buffer with a > + * size matching twice the page size. Only the first page of the buffer > + * is faulted in before attempting the write, so that the second page > + * should be faulted in during the write. > + */ > + fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666); > + if (fd == -1) { > + perror("Failed to open/create file"); > + return 8; > + } > + > + buf = mmap(NULL, pagesize * 2, PROT_READ | PROT_WRITE, > + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); > + if (buf == MAP_FAILED) { > + perror("Failed to allocate second buffer"); > + return 9; > + } > + > + /* Fault in first page of the buffer before the write. */ > + memset(buf, 0, 1); > + > + ret = write(fd, buf, pagesize * 2); > + if (ret < 0) { > + perror("Second write failed"); Hi Filipe, This patch looks good to me, just a question about this part. Is it possible to get (0 < ret < pagesize * 2) at here? Is so, should we report fail too? > + return 10; > + } > + > + ret = fstat(fd, &stbuf); > + if (ret < 0) { > + perror("Second stat failed"); > + return 11; > + } > + > + if (stbuf.st_size != pagesize * 2) { > + fprintf(stderr, > + "Wrong file size after second write, got %jd expected %ld\n", > + (intmax_t)stbuf.st_size, pagesize * 2); Does this try to check the stbuf.st_size isn't equal to the write(2) return value? Or checks stbuf.st_size != pagesize * 2, when the return value is good (equal to pagesize * 2) ? Thanks, Zorro > + return 12; > + } > + > + munmap(buf, pagesize * 2); > + close(fd); > + > + return 0; > +} [snip]
On Fri, Jul 26, 2024 at 6:58 PM Zorro Lang <zlang@redhat.com> wrote: > > On Fri, Jul 26, 2024 at 04:55:46PM +0100, fdmanana@kernel.org wrote: > > From: Filipe Manana <fdmanana@suse.com> > > > > Test that doing a direct IO append write to a file when the input buffer > > was not yet faulted in, does not result in an incorrect file size. > > > > This exercises a bug on btrfs reported by users and which is fixed by > > the following kernel patch: > > > > "btrfs: fix corruption after buffer fault in during direct IO append write" > > > > Signed-off-by: Filipe Manana <fdmanana@suse.com> > > --- > > .gitignore | 1 + > > src/Makefile | 2 +- > > src/dio-append-buf-fault.c | 131 +++++++++++++++++++++++++++++++++++++ > > tests/generic/362 | 28 ++++++++ > > tests/generic/362.out | 2 + > > 5 files changed, 163 insertions(+), 1 deletion(-) > > create mode 100644 src/dio-append-buf-fault.c > > create mode 100755 tests/generic/362 > > create mode 100644 tests/generic/362.out > > > > diff --git a/.gitignore b/.gitignore > > index b5f15162..97c7e001 100644 > > --- a/.gitignore > > +++ b/.gitignore > > @@ -72,6 +72,7 @@ tags > > /src/deduperace > > /src/detached_mounts_propagation > > /src/devzero > > +/src/dio-append-buf-fault > > /src/dio-buf-fault > > /src/dio-interleaved > > /src/dio-invalidate-cache > > diff --git a/src/Makefile b/src/Makefile > > index 99796137..559209be 100644 > > --- a/src/Makefile > > +++ b/src/Makefile > > @@ -20,7 +20,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \ > > t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \ > > t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \ > > t_mmap_cow_memory_failure fake-dump-rootino dio-buf-fault rewinddir-test \ > > - readdir-while-renames > > + readdir-while-renames dio-append-buf-fault > > > > LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ > > preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \ > > diff --git a/src/dio-append-buf-fault.c b/src/dio-append-buf-fault.c > > new file mode 100644 > > index 00000000..f4be4845 > > --- /dev/null > > +++ b/src/dio-append-buf-fault.c > > @@ -0,0 +1,131 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * Copyright (c) 2024 SUSE Linux Products GmbH. All Rights Reserved. > > + */ > > + > > +/* > > + * Test a direct IO write in append mode with a buffer that was not faulted in > > + * (or just partially) before the write. > > + */ > > + > > +/* Get the O_DIRECT definition. */ > > +#ifndef _GNU_SOURCE > > +#define _GNU_SOURCE > > +#endif > > + > > +#include <stdio.h> > > +#include <stdlib.h> > > +#include <unistd.h> > > +#include <stdint.h> > > +#include <fcntl.h> > > +#include <errno.h> > > +#include <string.h> > > +#include <sys/mman.h> > > +#include <sys/stat.h> > > + > > +int main(int argc, char *argv[]) > > +{ > > + struct stat stbuf; > > + int fd; > > + long pagesize; > > + void *buf; > > + ssize_t ret; > > + > > + if (argc != 2) { > > + fprintf(stderr, "Use: %s <file path>\n", argv[0]); > > + return 1; > > + } > > + > > + /* > > + * First try an append write against an empty file of a buffer with a > > + * size matching the page size. The buffer is not faulted in before > > + * attempting the write. > > + */ > > + > > + fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666); > > + if (fd == -1) { > > + perror("Failed to open/create file"); > > + return 2; > > + } > > + > > + pagesize = sysconf(_SC_PAGE_SIZE); > > + if (pagesize == -1) { > > + perror("Failed to get page size"); > > + return 3; > > + } > > + > > + buf = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, > > + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); > > + if (buf == MAP_FAILED) { > > + perror("Failed to allocate first buffer"); > > + return 4; > > + } > > + > > + ret = write(fd, buf, pagesize); > > + if (ret < 0) { > > + perror("First write failed"); > > + return 5; > > + } > > + > > + ret = fstat(fd, &stbuf); > > + if (ret < 0) { > > + perror("First stat failed"); > > + return 6; > > + } > > + > > + if (stbuf.st_size != pagesize) { > > + fprintf(stderr, > > + "Wrong file size after first write, got %jd expected %ld\n", > > + (intmax_t)stbuf.st_size, pagesize); > > + return 7; > > + } > > + > > + munmap(buf, pagesize); > > + close(fd); > > + > > + /* > > + * Now try an append write against an empty file of a buffer with a > > + * size matching twice the page size. Only the first page of the buffer > > + * is faulted in before attempting the write, so that the second page > > + * should be faulted in during the write. > > + */ > > + fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666); > > + if (fd == -1) { > > + perror("Failed to open/create file"); > > + return 8; > > + } > > + > > + buf = mmap(NULL, pagesize * 2, PROT_READ | PROT_WRITE, > > + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); > > + if (buf == MAP_FAILED) { > > + perror("Failed to allocate second buffer"); > > + return 9; > > + } > > + > > + /* Fault in first page of the buffer before the write. */ > > + memset(buf, 0, 1); > > + > > + ret = write(fd, buf, pagesize * 2); > > + if (ret < 0) { > > + perror("Second write failed"); > > Hi Filipe, > > This patch looks good to me, just a question about this part. Is it possible > to get (0 < ret < pagesize * 2) at here? Is so, should we report fail too? It is possible, if a short write happens. If that's the case, we detect the failure below when checking the file size with the stat call. > > > + return 10; > > + } > > + > > + ret = fstat(fd, &stbuf); > > + if (ret < 0) { > > + perror("Second stat failed"); > > + return 11; > > + } > > + > > + if (stbuf.st_size != pagesize * 2) { > > + fprintf(stderr, > > + "Wrong file size after second write, got %jd expected %ld\n", > > + (intmax_t)stbuf.st_size, pagesize * 2); > > Does this try to check the stbuf.st_size isn't equal to the write(2) return > value? Or checks stbuf.st_size != pagesize * 2, when the return value is > good (equal to pagesize * 2) ? It checks if it is equals to pagesize * 2, which is supposed to be the final file size, meaning the write succeeded and wrote all the expected data (pagesize * 2). Thanks. > > Thanks, > Zorro > > > + return 12; > > + } > > + > > + munmap(buf, pagesize * 2); > > + close(fd); > > + > > + return 0; > > +} > > [snip] >
On Fri, Jul 26, 2024 at 07:12:34PM +0100, Filipe Manana wrote: > On Fri, Jul 26, 2024 at 6:58 PM Zorro Lang <zlang@redhat.com> wrote: > > > > On Fri, Jul 26, 2024 at 04:55:46PM +0100, fdmanana@kernel.org wrote: > > > From: Filipe Manana <fdmanana@suse.com> > > > > > > Test that doing a direct IO append write to a file when the input buffer > > > was not yet faulted in, does not result in an incorrect file size. > > > > > > This exercises a bug on btrfs reported by users and which is fixed by > > > the following kernel patch: > > > > > > "btrfs: fix corruption after buffer fault in during direct IO append write" > > > > > > Signed-off-by: Filipe Manana <fdmanana@suse.com> > > > --- > > > .gitignore | 1 + > > > src/Makefile | 2 +- > > > src/dio-append-buf-fault.c | 131 +++++++++++++++++++++++++++++++++++++ > > > tests/generic/362 | 28 ++++++++ > > > tests/generic/362.out | 2 + > > > 5 files changed, 163 insertions(+), 1 deletion(-) > > > create mode 100644 src/dio-append-buf-fault.c > > > create mode 100755 tests/generic/362 > > > create mode 100644 tests/generic/362.out > > > > > > diff --git a/.gitignore b/.gitignore > > > index b5f15162..97c7e001 100644 > > > --- a/.gitignore > > > +++ b/.gitignore > > > @@ -72,6 +72,7 @@ tags > > > /src/deduperace > > > /src/detached_mounts_propagation > > > /src/devzero > > > +/src/dio-append-buf-fault > > > /src/dio-buf-fault > > > /src/dio-interleaved > > > /src/dio-invalidate-cache > > > diff --git a/src/Makefile b/src/Makefile > > > index 99796137..559209be 100644 > > > --- a/src/Makefile > > > +++ b/src/Makefile > > > @@ -20,7 +20,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \ > > > t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \ > > > t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \ > > > t_mmap_cow_memory_failure fake-dump-rootino dio-buf-fault rewinddir-test \ > > > - readdir-while-renames > > > + readdir-while-renames dio-append-buf-fault > > > > > > LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ > > > preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \ > > > diff --git a/src/dio-append-buf-fault.c b/src/dio-append-buf-fault.c > > > new file mode 100644 > > > index 00000000..f4be4845 > > > --- /dev/null > > > +++ b/src/dio-append-buf-fault.c > > > @@ -0,0 +1,131 @@ > > > +// SPDX-License-Identifier: GPL-2.0 > > > +/* > > > + * Copyright (c) 2024 SUSE Linux Products GmbH. All Rights Reserved. > > > + */ > > > + > > > +/* > > > + * Test a direct IO write in append mode with a buffer that was not faulted in > > > + * (or just partially) before the write. > > > + */ > > > + > > > +/* Get the O_DIRECT definition. */ > > > +#ifndef _GNU_SOURCE > > > +#define _GNU_SOURCE > > > +#endif > > > + > > > +#include <stdio.h> > > > +#include <stdlib.h> > > > +#include <unistd.h> > > > +#include <stdint.h> > > > +#include <fcntl.h> > > > +#include <errno.h> > > > +#include <string.h> > > > +#include <sys/mman.h> > > > +#include <sys/stat.h> > > > + > > > +int main(int argc, char *argv[]) > > > +{ > > > + struct stat stbuf; > > > + int fd; > > > + long pagesize; > > > + void *buf; > > > + ssize_t ret; > > > + > > > + if (argc != 2) { > > > + fprintf(stderr, "Use: %s <file path>\n", argv[0]); > > > + return 1; > > > + } > > > + > > > + /* > > > + * First try an append write against an empty file of a buffer with a > > > + * size matching the page size. The buffer is not faulted in before > > > + * attempting the write. > > > + */ > > > + > > > + fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666); > > > + if (fd == -1) { > > > + perror("Failed to open/create file"); > > > + return 2; > > > + } > > > + > > > + pagesize = sysconf(_SC_PAGE_SIZE); > > > + if (pagesize == -1) { > > > + perror("Failed to get page size"); > > > + return 3; > > > + } > > > + > > > + buf = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, > > > + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); > > > + if (buf == MAP_FAILED) { > > > + perror("Failed to allocate first buffer"); > > > + return 4; > > > + } > > > + > > > + ret = write(fd, buf, pagesize); > > > + if (ret < 0) { > > > + perror("First write failed"); > > > + return 5; > > > + } > > > + > > > + ret = fstat(fd, &stbuf); > > > + if (ret < 0) { > > > + perror("First stat failed"); > > > + return 6; > > > + } > > > + > > > + if (stbuf.st_size != pagesize) { > > > + fprintf(stderr, > > > + "Wrong file size after first write, got %jd expected %ld\n", > > > + (intmax_t)stbuf.st_size, pagesize); > > > + return 7; > > > + } > > > + > > > + munmap(buf, pagesize); > > > + close(fd); > > > + > > > + /* > > > + * Now try an append write against an empty file of a buffer with a > > > + * size matching twice the page size. Only the first page of the buffer > > > + * is faulted in before attempting the write, so that the second page > > > + * should be faulted in during the write. > > > + */ > > > + fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666); > > > + if (fd == -1) { > > > + perror("Failed to open/create file"); > > > + return 8; > > > + } > > > + > > > + buf = mmap(NULL, pagesize * 2, PROT_READ | PROT_WRITE, > > > + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); > > > + if (buf == MAP_FAILED) { > > > + perror("Failed to allocate second buffer"); > > > + return 9; > > > + } > > > + > > > + /* Fault in first page of the buffer before the write. */ > > > + memset(buf, 0, 1); > > > + > > > + ret = write(fd, buf, pagesize * 2); > > > + if (ret < 0) { > > > + perror("Second write failed"); > > > > Hi Filipe, > > > > This patch looks good to me, just a question about this part. Is it possible > > to get (0 < ret < pagesize * 2) at here? Is so, should we report fail too? > > It is possible, if a short write happens. > If that's the case, we detect the failure below when checking the file > size with the stat call. > > > > > > + return 10; > > > + } > > > + > > > + ret = fstat(fd, &stbuf); > > > + if (ret < 0) { > > > + perror("Second stat failed"); > > > + return 11; > > > + } > > > + > > > + if (stbuf.st_size != pagesize * 2) { > > > + fprintf(stderr, > > > + "Wrong file size after second write, got %jd expected %ld\n", > > > + (intmax_t)stbuf.st_size, pagesize * 2); > > > > Does this try to check the stbuf.st_size isn't equal to the write(2) return > > value? Or checks stbuf.st_size != pagesize * 2, when the return value is > > good (equal to pagesize * 2) ? > > It checks if it is equals to pagesize * 2, which is supposed to be the > final file size, meaning the write succeeded and wrote all the > expected data (pagesize * 2). Thanks for your explanation. I noticed that the "Wrong file size after second write, got %jd expected %ld\n" line means the bug is triggered: # ./check -s default generic/362 SECTION -- default FSTYP -- btrfs PLATFORM -- Linux/x86_64 dell-xxxxx-xxx 6.10.0-0.rc7.20240712git43db1e03c086.62.fc41.x86_64 #1 SMP PREEMPT_DYNAMIC Fri Jul 12 22:31:14 UTC 2024 MKFS_OPTIONS -- /dev/sda6 MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/sda6 /mnt/scratch generic/362 1s ... - output mismatch (see /root/git/xfstests/results//default/generic/362.out.bad) --- tests/generic/362.out 2024-07-27 01:38:47.810847933 +0800 +++ /root/git/xfstests/results//default/generic/362.out.bad 2024-07-27 01:41:50.126428012 +0800 @@ -1,2 +1,3 @@ QA output created by 362 +Wrong file size after first write, got 8192 expected 4096 Silence is golden ... (Run 'diff -u /root/git/xfstests/tests/generic/362.out /root/git/xfstests/results//default/generic/362.out.bad' to see the entire diff) HINT: You _MAY_ be missing kernel fix: xxxxxxxxxxxx btrfs: fix corruption after buffer fault in during direct IO append write Ran: generic/362 Failures: generic/362 Failed 1 of 1 tests I thought a "short write" isn't a bug, just a rare test failure (or we use a loop write to avoid that?). So we might can make sure the write() returns "pagesize * 2" at first, then check (stbuf.st_size != pagesize * 2) for the bug itself. What do you think? Thanks, Zorro > > Thanks. > > > > > > Thanks, > > Zorro > > > > > + return 12; > > > + } > > > + > > > + munmap(buf, pagesize * 2); > > > + close(fd); > > > + > > > + return 0; > > > +} > > > > [snip] > > >
On Sat, Jul 27, 2024 at 9:27 AM Zorro Lang <zlang@redhat.com> wrote: > > On Fri, Jul 26, 2024 at 07:12:34PM +0100, Filipe Manana wrote: > > On Fri, Jul 26, 2024 at 6:58 PM Zorro Lang <zlang@redhat.com> wrote: > > > > > > On Fri, Jul 26, 2024 at 04:55:46PM +0100, fdmanana@kernel.org wrote: > > > > From: Filipe Manana <fdmanana@suse.com> > > > > > > > > Test that doing a direct IO append write to a file when the input buffer > > > > was not yet faulted in, does not result in an incorrect file size. > > > > > > > > This exercises a bug on btrfs reported by users and which is fixed by > > > > the following kernel patch: > > > > > > > > "btrfs: fix corruption after buffer fault in during direct IO append write" > > > > > > > > Signed-off-by: Filipe Manana <fdmanana@suse.com> > > > > --- > > > > .gitignore | 1 + > > > > src/Makefile | 2 +- > > > > src/dio-append-buf-fault.c | 131 +++++++++++++++++++++++++++++++++++++ > > > > tests/generic/362 | 28 ++++++++ > > > > tests/generic/362.out | 2 + > > > > 5 files changed, 163 insertions(+), 1 deletion(-) > > > > create mode 100644 src/dio-append-buf-fault.c > > > > create mode 100755 tests/generic/362 > > > > create mode 100644 tests/generic/362.out > > > > > > > > diff --git a/.gitignore b/.gitignore > > > > index b5f15162..97c7e001 100644 > > > > --- a/.gitignore > > > > +++ b/.gitignore > > > > @@ -72,6 +72,7 @@ tags > > > > /src/deduperace > > > > /src/detached_mounts_propagation > > > > /src/devzero > > > > +/src/dio-append-buf-fault > > > > /src/dio-buf-fault > > > > /src/dio-interleaved > > > > /src/dio-invalidate-cache > > > > diff --git a/src/Makefile b/src/Makefile > > > > index 99796137..559209be 100644 > > > > --- a/src/Makefile > > > > +++ b/src/Makefile > > > > @@ -20,7 +20,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \ > > > > t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \ > > > > t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \ > > > > t_mmap_cow_memory_failure fake-dump-rootino dio-buf-fault rewinddir-test \ > > > > - readdir-while-renames > > > > + readdir-while-renames dio-append-buf-fault > > > > > > > > LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ > > > > preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \ > > > > diff --git a/src/dio-append-buf-fault.c b/src/dio-append-buf-fault.c > > > > new file mode 100644 > > > > index 00000000..f4be4845 > > > > --- /dev/null > > > > +++ b/src/dio-append-buf-fault.c > > > > @@ -0,0 +1,131 @@ > > > > +// SPDX-License-Identifier: GPL-2.0 > > > > +/* > > > > + * Copyright (c) 2024 SUSE Linux Products GmbH. All Rights Reserved. > > > > + */ > > > > + > > > > +/* > > > > + * Test a direct IO write in append mode with a buffer that was not faulted in > > > > + * (or just partially) before the write. > > > > + */ > > > > + > > > > +/* Get the O_DIRECT definition. */ > > > > +#ifndef _GNU_SOURCE > > > > +#define _GNU_SOURCE > > > > +#endif > > > > + > > > > +#include <stdio.h> > > > > +#include <stdlib.h> > > > > +#include <unistd.h> > > > > +#include <stdint.h> > > > > +#include <fcntl.h> > > > > +#include <errno.h> > > > > +#include <string.h> > > > > +#include <sys/mman.h> > > > > +#include <sys/stat.h> > > > > + > > > > +int main(int argc, char *argv[]) > > > > +{ > > > > + struct stat stbuf; > > > > + int fd; > > > > + long pagesize; > > > > + void *buf; > > > > + ssize_t ret; > > > > + > > > > + if (argc != 2) { > > > > + fprintf(stderr, "Use: %s <file path>\n", argv[0]); > > > > + return 1; > > > > + } > > > > + > > > > + /* > > > > + * First try an append write against an empty file of a buffer with a > > > > + * size matching the page size. The buffer is not faulted in before > > > > + * attempting the write. > > > > + */ > > > > + > > > > + fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666); > > > > + if (fd == -1) { > > > > + perror("Failed to open/create file"); > > > > + return 2; > > > > + } > > > > + > > > > + pagesize = sysconf(_SC_PAGE_SIZE); > > > > + if (pagesize == -1) { > > > > + perror("Failed to get page size"); > > > > + return 3; > > > > + } > > > > + > > > > + buf = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, > > > > + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); > > > > + if (buf == MAP_FAILED) { > > > > + perror("Failed to allocate first buffer"); > > > > + return 4; > > > > + } > > > > + > > > > + ret = write(fd, buf, pagesize); > > > > + if (ret < 0) { > > > > + perror("First write failed"); > > > > + return 5; > > > > + } > > > > + > > > > + ret = fstat(fd, &stbuf); > > > > + if (ret < 0) { > > > > + perror("First stat failed"); > > > > + return 6; > > > > + } > > > > + > > > > + if (stbuf.st_size != pagesize) { > > > > + fprintf(stderr, > > > > + "Wrong file size after first write, got %jd expected %ld\n", > > > > + (intmax_t)stbuf.st_size, pagesize); > > > > + return 7; > > > > + } > > > > + > > > > + munmap(buf, pagesize); > > > > + close(fd); > > > > + > > > > + /* > > > > + * Now try an append write against an empty file of a buffer with a > > > > + * size matching twice the page size. Only the first page of the buffer > > > > + * is faulted in before attempting the write, so that the second page > > > > + * should be faulted in during the write. > > > > + */ > > > > + fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666); > > > > + if (fd == -1) { > > > > + perror("Failed to open/create file"); > > > > + return 8; > > > > + } > > > > + > > > > + buf = mmap(NULL, pagesize * 2, PROT_READ | PROT_WRITE, > > > > + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); > > > > + if (buf == MAP_FAILED) { > > > > + perror("Failed to allocate second buffer"); > > > > + return 9; > > > > + } > > > > + > > > > + /* Fault in first page of the buffer before the write. */ > > > > + memset(buf, 0, 1); > > > > + > > > > + ret = write(fd, buf, pagesize * 2); > > > > + if (ret < 0) { > > > > + perror("Second write failed"); > > > > > > Hi Filipe, > > > > > > This patch looks good to me, just a question about this part. Is it possible > > > to get (0 < ret < pagesize * 2) at here? Is so, should we report fail too? > > > > It is possible, if a short write happens. > > If that's the case, we detect the failure below when checking the file > > size with the stat call. > > > > > > > > > + return 10; > > > > + } > > > > + > > > > + ret = fstat(fd, &stbuf); > > > > + if (ret < 0) { > > > > + perror("Second stat failed"); > > > > + return 11; > > > > + } > > > > + > > > > + if (stbuf.st_size != pagesize * 2) { > > > > + fprintf(stderr, > > > > + "Wrong file size after second write, got %jd expected %ld\n", > > > > + (intmax_t)stbuf.st_size, pagesize * 2); > > > > > > Does this try to check the stbuf.st_size isn't equal to the write(2) return > > > value? Or checks stbuf.st_size != pagesize * 2, when the return value is > > > good (equal to pagesize * 2) ? > > > > It checks if it is equals to pagesize * 2, which is supposed to be the > > final file size, meaning the write succeeded and wrote all the > > expected data (pagesize * 2). > > Thanks for your explanation. > > I noticed that the "Wrong file size after second write, got %jd expected %ld\n" > line means the bug is triggered: Correct. > > # ./check -s default generic/362 > SECTION -- default > FSTYP -- btrfs > PLATFORM -- Linux/x86_64 dell-xxxxx-xxx 6.10.0-0.rc7.20240712git43db1e03c086.62.fc41.x86_64 #1 SMP PREEMPT_DYNAMIC Fri Jul 12 22:31:14 UTC 2024 > MKFS_OPTIONS -- /dev/sda6 > MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/sda6 /mnt/scratch > > generic/362 1s ... - output mismatch (see /root/git/xfstests/results//default/generic/362.out.bad) > --- tests/generic/362.out 2024-07-27 01:38:47.810847933 +0800 > +++ /root/git/xfstests/results//default/generic/362.out.bad 2024-07-27 01:41:50.126428012 +0800 > @@ -1,2 +1,3 @@ > QA output created by 362 > +Wrong file size after first write, got 8192 expected 4096 Yes, that's expected with unpatched btrfs. > Silence is golden > ... > (Run 'diff -u /root/git/xfstests/tests/generic/362.out /root/git/xfstests/results//default/generic/362.out.bad' to see the entire diff) > > HINT: You _MAY_ be missing kernel fix: > xxxxxxxxxxxx btrfs: fix corruption after buffer fault in during direct IO append write > > Ran: generic/362 > Failures: generic/362 > Failed 1 of 1 tests > > I thought a "short write" isn't a bug, just a rare test failure (or we use a loop > write to avoid that?). So we might can make sure the write() returns "pagesize * 2" > at first, then check (stbuf.st_size != pagesize * 2) for the bug itself. > > What do you think? Fine ok. I've just sent a v2 with that change plus a minor one to always let the second test run even if the first one fails: https://lore.kernel.org/linux-btrfs/6c52fe9ce75354a931afdc6d2f7fb638c7f06b00.1722079321.git.fdmanana@suse.com/ Thanks. > > Thanks, > Zorro > > > > > Thanks. > > > > > > > > > > Thanks, > > > Zorro > > > > > > > + return 12; > > > > + } > > > > + > > > > + munmap(buf, pagesize * 2); > > > > + close(fd); > > > > + > > > > + return 0; > > > > +} > > > > > > [snip] > > > > > >
diff --git a/.gitignore b/.gitignore index b5f15162..97c7e001 100644 --- a/.gitignore +++ b/.gitignore @@ -72,6 +72,7 @@ tags /src/deduperace /src/detached_mounts_propagation /src/devzero +/src/dio-append-buf-fault /src/dio-buf-fault /src/dio-interleaved /src/dio-invalidate-cache diff --git a/src/Makefile b/src/Makefile index 99796137..559209be 100644 --- a/src/Makefile +++ b/src/Makefile @@ -20,7 +20,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \ t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \ t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \ t_mmap_cow_memory_failure fake-dump-rootino dio-buf-fault rewinddir-test \ - readdir-while-renames + readdir-while-renames dio-append-buf-fault LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \ diff --git a/src/dio-append-buf-fault.c b/src/dio-append-buf-fault.c new file mode 100644 index 00000000..f4be4845 --- /dev/null +++ b/src/dio-append-buf-fault.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 SUSE Linux Products GmbH. All Rights Reserved. + */ + +/* + * Test a direct IO write in append mode with a buffer that was not faulted in + * (or just partially) before the write. + */ + +/* Get the O_DIRECT definition. */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdint.h> +#include <fcntl.h> +#include <errno.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> + +int main(int argc, char *argv[]) +{ + struct stat stbuf; + int fd; + long pagesize; + void *buf; + ssize_t ret; + + if (argc != 2) { + fprintf(stderr, "Use: %s <file path>\n", argv[0]); + return 1; + } + + /* + * First try an append write against an empty file of a buffer with a + * size matching the page size. The buffer is not faulted in before + * attempting the write. + */ + + fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666); + if (fd == -1) { + perror("Failed to open/create file"); + return 2; + } + + pagesize = sysconf(_SC_PAGE_SIZE); + if (pagesize == -1) { + perror("Failed to get page size"); + return 3; + } + + buf = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buf == MAP_FAILED) { + perror("Failed to allocate first buffer"); + return 4; + } + + ret = write(fd, buf, pagesize); + if (ret < 0) { + perror("First write failed"); + return 5; + } + + ret = fstat(fd, &stbuf); + if (ret < 0) { + perror("First stat failed"); + return 6; + } + + if (stbuf.st_size != pagesize) { + fprintf(stderr, + "Wrong file size after first write, got %jd expected %ld\n", + (intmax_t)stbuf.st_size, pagesize); + return 7; + } + + munmap(buf, pagesize); + close(fd); + + /* + * Now try an append write against an empty file of a buffer with a + * size matching twice the page size. Only the first page of the buffer + * is faulted in before attempting the write, so that the second page + * should be faulted in during the write. + */ + fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666); + if (fd == -1) { + perror("Failed to open/create file"); + return 8; + } + + buf = mmap(NULL, pagesize * 2, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buf == MAP_FAILED) { + perror("Failed to allocate second buffer"); + return 9; + } + + /* Fault in first page of the buffer before the write. */ + memset(buf, 0, 1); + + ret = write(fd, buf, pagesize * 2); + if (ret < 0) { + perror("Second write failed"); + return 10; + } + + ret = fstat(fd, &stbuf); + if (ret < 0) { + perror("Second stat failed"); + return 11; + } + + if (stbuf.st_size != pagesize * 2) { + fprintf(stderr, + "Wrong file size after second write, got %jd expected %ld\n", + (intmax_t)stbuf.st_size, pagesize * 2); + return 12; + } + + munmap(buf, pagesize * 2); + close(fd); + + return 0; +} diff --git a/tests/generic/362 b/tests/generic/362 new file mode 100755 index 00000000..2c127347 --- /dev/null +++ b/tests/generic/362 @@ -0,0 +1,28 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2024 SUSE Linux Products GmbH. All Rights Reserved. +# +# FS QA Test 362 +# +# Test that doing a direct IO append write to a file when the input buffer was +# not yet faulted in, does not result in an incorrect file size. +# +. ./common/preamble +_begin_fstest auto quick + +_require_test +_require_odirect +_require_test_program dio-append-buf-fault + +[ $FSTYP == "btrfs" ] && \ + _fixed_by_kernel_commit xxxxxxxxxxxx \ + "btrfs: fix corruption after buffer fault in during direct IO append write" + +# On error the test program writes messages to stderr, causing a golden output +# mismatch and making the test fail. +$here/src/dio-append-buf-fault $TEST_DIR/dio-append-buf-fault + +# success, all done +echo "Silence is golden" +status=0 +exit diff --git a/tests/generic/362.out b/tests/generic/362.out new file mode 100644 index 00000000..0ff40905 --- /dev/null +++ b/tests/generic/362.out @@ -0,0 +1,2 @@ +QA output created by 362 +Silence is golden