diff mbox series

generic: test page fault during direct IO write with O_APPEND

Message ID 652ec55049e94a59f66f4112fb8707629db3001d.1722008942.git.fdmanana@suse.com (mailing list archive)
State New, archived
Headers show
Series generic: test page fault during direct IO write with O_APPEND | expand

Commit Message

Filipe Manana July 26, 2024, 3:55 p.m. UTC
From: Filipe Manana <fdmanana@suse.com>

Test that doing a direct IO append write to a file when the input buffer
was not yet faulted in, does not result in an incorrect file size.

This exercises a bug on btrfs reported by users and which is fixed by
the following kernel patch:

   "btrfs: fix corruption after buffer fault in during direct IO append write"

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 .gitignore                 |   1 +
 src/Makefile               |   2 +-
 src/dio-append-buf-fault.c | 131 +++++++++++++++++++++++++++++++++++++
 tests/generic/362          |  28 ++++++++
 tests/generic/362.out      |   2 +
 5 files changed, 163 insertions(+), 1 deletion(-)
 create mode 100644 src/dio-append-buf-fault.c
 create mode 100755 tests/generic/362
 create mode 100644 tests/generic/362.out

Comments

Zorro Lang July 26, 2024, 5:58 p.m. UTC | #1
On Fri, Jul 26, 2024 at 04:55:46PM +0100, fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
> 
> Test that doing a direct IO append write to a file when the input buffer
> was not yet faulted in, does not result in an incorrect file size.
> 
> This exercises a bug on btrfs reported by users and which is fixed by
> the following kernel patch:
> 
>    "btrfs: fix corruption after buffer fault in during direct IO append write"
> 
> Signed-off-by: Filipe Manana <fdmanana@suse.com>
> ---
>  .gitignore                 |   1 +
>  src/Makefile               |   2 +-
>  src/dio-append-buf-fault.c | 131 +++++++++++++++++++++++++++++++++++++
>  tests/generic/362          |  28 ++++++++
>  tests/generic/362.out      |   2 +
>  5 files changed, 163 insertions(+), 1 deletion(-)
>  create mode 100644 src/dio-append-buf-fault.c
>  create mode 100755 tests/generic/362
>  create mode 100644 tests/generic/362.out
> 
> diff --git a/.gitignore b/.gitignore
> index b5f15162..97c7e001 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -72,6 +72,7 @@ tags
>  /src/deduperace
>  /src/detached_mounts_propagation
>  /src/devzero
> +/src/dio-append-buf-fault
>  /src/dio-buf-fault
>  /src/dio-interleaved
>  /src/dio-invalidate-cache
> diff --git a/src/Makefile b/src/Makefile
> index 99796137..559209be 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -20,7 +20,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
>  	t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \
>  	t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \
>  	t_mmap_cow_memory_failure fake-dump-rootino dio-buf-fault rewinddir-test \
> -	readdir-while-renames
> +	readdir-while-renames dio-append-buf-fault
>  
>  LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
>  	preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
> diff --git a/src/dio-append-buf-fault.c b/src/dio-append-buf-fault.c
> new file mode 100644
> index 00000000..f4be4845
> --- /dev/null
> +++ b/src/dio-append-buf-fault.c
> @@ -0,0 +1,131 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2024 SUSE Linux Products GmbH.  All Rights Reserved.
> + */
> +
> +/*
> + * Test a direct IO write in append mode with a buffer that was not faulted in
> + * (or just partially) before the write.
> + */
> +
> +/* Get the O_DIRECT definition. */
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <stdint.h>
> +#include <fcntl.h>
> +#include <errno.h>
> +#include <string.h>
> +#include <sys/mman.h>
> +#include <sys/stat.h>
> +
> +int main(int argc, char *argv[])
> +{
> +	struct stat stbuf;
> +	int fd;
> +	long pagesize;
> +	void *buf;
> +	ssize_t ret;
> +
> +	if (argc != 2) {
> +		fprintf(stderr, "Use: %s <file path>\n", argv[0]);
> +		return 1;
> +	}
> +
> +	/*
> +	 * First try an append write against an empty file of a buffer with a
> +	 * size matching the page size. The buffer is not faulted in before
> +	 * attempting the write.
> +	 */
> +
> +	fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666);
> +	if (fd == -1) {
> +		perror("Failed to open/create file");
> +		return 2;
> +	}
> +
> +	pagesize = sysconf(_SC_PAGE_SIZE);
> +	if (pagesize == -1) {
> +		perror("Failed to get page size");
> +		return 3;
> +	}
> +
> +	buf = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
> +		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> +	if (buf == MAP_FAILED) {
> +		perror("Failed to allocate first buffer");
> +		return 4;
> +	}
> +
> +	ret = write(fd, buf, pagesize);
> +	if (ret < 0) {
> +		perror("First write failed");
> +		return 5;
> +	}
> +
> +	ret = fstat(fd, &stbuf);
> +	if (ret < 0) {
> +		perror("First stat failed");
> +		return 6;
> +	}
> +
> +	if (stbuf.st_size != pagesize) {
> +		fprintf(stderr,
> +			"Wrong file size after first write, got %jd expected %ld\n",
> +			(intmax_t)stbuf.st_size, pagesize);
> +		return 7;
> +	}
> +
> +	munmap(buf, pagesize);
> +	close(fd);
> +
> +	/*
> +	 * Now try an append write against an empty file of a buffer with a
> +	 * size matching twice the page size. Only the first page of the buffer
> +	 * is faulted in before attempting the write, so that the second page
> +	 * should be faulted in during the write.
> +	 */
> +	fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666);
> +	if (fd == -1) {
> +		perror("Failed to open/create file");
> +		return 8;
> +	}
> +
> +	buf = mmap(NULL, pagesize * 2, PROT_READ | PROT_WRITE,
> +		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> +	if (buf == MAP_FAILED) {
> +		perror("Failed to allocate second buffer");
> +		return 9;
> +	}
> +
> +	/* Fault in first page of the buffer before the write. */
> +	memset(buf, 0, 1);
> +
> +	ret = write(fd, buf, pagesize * 2);
> +	if (ret < 0) {
> +		perror("Second write failed");

Hi Filipe,

This patch looks good to me, just a question about this part. Is it possible
to get (0 < ret < pagesize * 2) at here? Is so, should we report fail too?

> +		return 10;
> +	}
> +
> +	ret = fstat(fd, &stbuf);
> +	if (ret < 0) {
> +		perror("Second stat failed");
> +		return 11;
> +	}
> +
> +	if (stbuf.st_size != pagesize * 2) {
> +		fprintf(stderr,
> +			"Wrong file size after second write, got %jd expected %ld\n",
> +			(intmax_t)stbuf.st_size, pagesize * 2);

Does this try to check the stbuf.st_size isn't equal to the write(2) return
value? Or checks stbuf.st_size != pagesize * 2, when the return value is
good (equal to pagesize * 2) ?

Thanks,
Zorro

> +		return 12;
> +	}
> +
> +	munmap(buf, pagesize * 2);
> +	close(fd);
> +
> +	return 0;
> +}

[snip]
Filipe Manana July 26, 2024, 6:12 p.m. UTC | #2
On Fri, Jul 26, 2024 at 6:58 PM Zorro Lang <zlang@redhat.com> wrote:
>
> On Fri, Jul 26, 2024 at 04:55:46PM +0100, fdmanana@kernel.org wrote:
> > From: Filipe Manana <fdmanana@suse.com>
> >
> > Test that doing a direct IO append write to a file when the input buffer
> > was not yet faulted in, does not result in an incorrect file size.
> >
> > This exercises a bug on btrfs reported by users and which is fixed by
> > the following kernel patch:
> >
> >    "btrfs: fix corruption after buffer fault in during direct IO append write"
> >
> > Signed-off-by: Filipe Manana <fdmanana@suse.com>
> > ---
> >  .gitignore                 |   1 +
> >  src/Makefile               |   2 +-
> >  src/dio-append-buf-fault.c | 131 +++++++++++++++++++++++++++++++++++++
> >  tests/generic/362          |  28 ++++++++
> >  tests/generic/362.out      |   2 +
> >  5 files changed, 163 insertions(+), 1 deletion(-)
> >  create mode 100644 src/dio-append-buf-fault.c
> >  create mode 100755 tests/generic/362
> >  create mode 100644 tests/generic/362.out
> >
> > diff --git a/.gitignore b/.gitignore
> > index b5f15162..97c7e001 100644
> > --- a/.gitignore
> > +++ b/.gitignore
> > @@ -72,6 +72,7 @@ tags
> >  /src/deduperace
> >  /src/detached_mounts_propagation
> >  /src/devzero
> > +/src/dio-append-buf-fault
> >  /src/dio-buf-fault
> >  /src/dio-interleaved
> >  /src/dio-invalidate-cache
> > diff --git a/src/Makefile b/src/Makefile
> > index 99796137..559209be 100644
> > --- a/src/Makefile
> > +++ b/src/Makefile
> > @@ -20,7 +20,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
> >       t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \
> >       t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \
> >       t_mmap_cow_memory_failure fake-dump-rootino dio-buf-fault rewinddir-test \
> > -     readdir-while-renames
> > +     readdir-while-renames dio-append-buf-fault
> >
> >  LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
> >       preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
> > diff --git a/src/dio-append-buf-fault.c b/src/dio-append-buf-fault.c
> > new file mode 100644
> > index 00000000..f4be4845
> > --- /dev/null
> > +++ b/src/dio-append-buf-fault.c
> > @@ -0,0 +1,131 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (c) 2024 SUSE Linux Products GmbH.  All Rights Reserved.
> > + */
> > +
> > +/*
> > + * Test a direct IO write in append mode with a buffer that was not faulted in
> > + * (or just partially) before the write.
> > + */
> > +
> > +/* Get the O_DIRECT definition. */
> > +#ifndef _GNU_SOURCE
> > +#define _GNU_SOURCE
> > +#endif
> > +
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <unistd.h>
> > +#include <stdint.h>
> > +#include <fcntl.h>
> > +#include <errno.h>
> > +#include <string.h>
> > +#include <sys/mman.h>
> > +#include <sys/stat.h>
> > +
> > +int main(int argc, char *argv[])
> > +{
> > +     struct stat stbuf;
> > +     int fd;
> > +     long pagesize;
> > +     void *buf;
> > +     ssize_t ret;
> > +
> > +     if (argc != 2) {
> > +             fprintf(stderr, "Use: %s <file path>\n", argv[0]);
> > +             return 1;
> > +     }
> > +
> > +     /*
> > +      * First try an append write against an empty file of a buffer with a
> > +      * size matching the page size. The buffer is not faulted in before
> > +      * attempting the write.
> > +      */
> > +
> > +     fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666);
> > +     if (fd == -1) {
> > +             perror("Failed to open/create file");
> > +             return 2;
> > +     }
> > +
> > +     pagesize = sysconf(_SC_PAGE_SIZE);
> > +     if (pagesize == -1) {
> > +             perror("Failed to get page size");
> > +             return 3;
> > +     }
> > +
> > +     buf = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
> > +                MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> > +     if (buf == MAP_FAILED) {
> > +             perror("Failed to allocate first buffer");
> > +             return 4;
> > +     }
> > +
> > +     ret = write(fd, buf, pagesize);
> > +     if (ret < 0) {
> > +             perror("First write failed");
> > +             return 5;
> > +     }
> > +
> > +     ret = fstat(fd, &stbuf);
> > +     if (ret < 0) {
> > +             perror("First stat failed");
> > +             return 6;
> > +     }
> > +
> > +     if (stbuf.st_size != pagesize) {
> > +             fprintf(stderr,
> > +                     "Wrong file size after first write, got %jd expected %ld\n",
> > +                     (intmax_t)stbuf.st_size, pagesize);
> > +             return 7;
> > +     }
> > +
> > +     munmap(buf, pagesize);
> > +     close(fd);
> > +
> > +     /*
> > +      * Now try an append write against an empty file of a buffer with a
> > +      * size matching twice the page size. Only the first page of the buffer
> > +      * is faulted in before attempting the write, so that the second page
> > +      * should be faulted in during the write.
> > +      */
> > +     fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666);
> > +     if (fd == -1) {
> > +             perror("Failed to open/create file");
> > +             return 8;
> > +     }
> > +
> > +     buf = mmap(NULL, pagesize * 2, PROT_READ | PROT_WRITE,
> > +                MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> > +     if (buf == MAP_FAILED) {
> > +             perror("Failed to allocate second buffer");
> > +             return 9;
> > +     }
> > +
> > +     /* Fault in first page of the buffer before the write. */
> > +     memset(buf, 0, 1);
> > +
> > +     ret = write(fd, buf, pagesize * 2);
> > +     if (ret < 0) {
> > +             perror("Second write failed");
>
> Hi Filipe,
>
> This patch looks good to me, just a question about this part. Is it possible
> to get (0 < ret < pagesize * 2) at here? Is so, should we report fail too?

It is possible, if a short write happens.
If that's the case, we detect the failure below when checking the file
size with the stat call.

>
> > +             return 10;
> > +     }
> > +
> > +     ret = fstat(fd, &stbuf);
> > +     if (ret < 0) {
> > +             perror("Second stat failed");
> > +             return 11;
> > +     }
> > +
> > +     if (stbuf.st_size != pagesize * 2) {
> > +             fprintf(stderr,
> > +                     "Wrong file size after second write, got %jd expected %ld\n",
> > +                     (intmax_t)stbuf.st_size, pagesize * 2);
>
> Does this try to check the stbuf.st_size isn't equal to the write(2) return
> value? Or checks stbuf.st_size != pagesize * 2, when the return value is
> good (equal to pagesize * 2) ?

It checks if it is equals to pagesize * 2, which is supposed to be the
final file size, meaning the write succeeded and wrote all the
expected data (pagesize * 2).

Thanks.


>
> Thanks,
> Zorro
>
> > +             return 12;
> > +     }
> > +
> > +     munmap(buf, pagesize * 2);
> > +     close(fd);
> > +
> > +     return 0;
> > +}
>
> [snip]
>
Zorro Lang July 27, 2024, 8:27 a.m. UTC | #3
On Fri, Jul 26, 2024 at 07:12:34PM +0100, Filipe Manana wrote:
> On Fri, Jul 26, 2024 at 6:58 PM Zorro Lang <zlang@redhat.com> wrote:
> >
> > On Fri, Jul 26, 2024 at 04:55:46PM +0100, fdmanana@kernel.org wrote:
> > > From: Filipe Manana <fdmanana@suse.com>
> > >
> > > Test that doing a direct IO append write to a file when the input buffer
> > > was not yet faulted in, does not result in an incorrect file size.
> > >
> > > This exercises a bug on btrfs reported by users and which is fixed by
> > > the following kernel patch:
> > >
> > >    "btrfs: fix corruption after buffer fault in during direct IO append write"
> > >
> > > Signed-off-by: Filipe Manana <fdmanana@suse.com>
> > > ---
> > >  .gitignore                 |   1 +
> > >  src/Makefile               |   2 +-
> > >  src/dio-append-buf-fault.c | 131 +++++++++++++++++++++++++++++++++++++
> > >  tests/generic/362          |  28 ++++++++
> > >  tests/generic/362.out      |   2 +
> > >  5 files changed, 163 insertions(+), 1 deletion(-)
> > >  create mode 100644 src/dio-append-buf-fault.c
> > >  create mode 100755 tests/generic/362
> > >  create mode 100644 tests/generic/362.out
> > >
> > > diff --git a/.gitignore b/.gitignore
> > > index b5f15162..97c7e001 100644
> > > --- a/.gitignore
> > > +++ b/.gitignore
> > > @@ -72,6 +72,7 @@ tags
> > >  /src/deduperace
> > >  /src/detached_mounts_propagation
> > >  /src/devzero
> > > +/src/dio-append-buf-fault
> > >  /src/dio-buf-fault
> > >  /src/dio-interleaved
> > >  /src/dio-invalidate-cache
> > > diff --git a/src/Makefile b/src/Makefile
> > > index 99796137..559209be 100644
> > > --- a/src/Makefile
> > > +++ b/src/Makefile
> > > @@ -20,7 +20,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
> > >       t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \
> > >       t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \
> > >       t_mmap_cow_memory_failure fake-dump-rootino dio-buf-fault rewinddir-test \
> > > -     readdir-while-renames
> > > +     readdir-while-renames dio-append-buf-fault
> > >
> > >  LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
> > >       preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
> > > diff --git a/src/dio-append-buf-fault.c b/src/dio-append-buf-fault.c
> > > new file mode 100644
> > > index 00000000..f4be4845
> > > --- /dev/null
> > > +++ b/src/dio-append-buf-fault.c
> > > @@ -0,0 +1,131 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +/*
> > > + * Copyright (c) 2024 SUSE Linux Products GmbH.  All Rights Reserved.
> > > + */
> > > +
> > > +/*
> > > + * Test a direct IO write in append mode with a buffer that was not faulted in
> > > + * (or just partially) before the write.
> > > + */
> > > +
> > > +/* Get the O_DIRECT definition. */
> > > +#ifndef _GNU_SOURCE
> > > +#define _GNU_SOURCE
> > > +#endif
> > > +
> > > +#include <stdio.h>
> > > +#include <stdlib.h>
> > > +#include <unistd.h>
> > > +#include <stdint.h>
> > > +#include <fcntl.h>
> > > +#include <errno.h>
> > > +#include <string.h>
> > > +#include <sys/mman.h>
> > > +#include <sys/stat.h>
> > > +
> > > +int main(int argc, char *argv[])
> > > +{
> > > +     struct stat stbuf;
> > > +     int fd;
> > > +     long pagesize;
> > > +     void *buf;
> > > +     ssize_t ret;
> > > +
> > > +     if (argc != 2) {
> > > +             fprintf(stderr, "Use: %s <file path>\n", argv[0]);
> > > +             return 1;
> > > +     }
> > > +
> > > +     /*
> > > +      * First try an append write against an empty file of a buffer with a
> > > +      * size matching the page size. The buffer is not faulted in before
> > > +      * attempting the write.
> > > +      */
> > > +
> > > +     fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666);
> > > +     if (fd == -1) {
> > > +             perror("Failed to open/create file");
> > > +             return 2;
> > > +     }
> > > +
> > > +     pagesize = sysconf(_SC_PAGE_SIZE);
> > > +     if (pagesize == -1) {
> > > +             perror("Failed to get page size");
> > > +             return 3;
> > > +     }
> > > +
> > > +     buf = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
> > > +                MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> > > +     if (buf == MAP_FAILED) {
> > > +             perror("Failed to allocate first buffer");
> > > +             return 4;
> > > +     }
> > > +
> > > +     ret = write(fd, buf, pagesize);
> > > +     if (ret < 0) {
> > > +             perror("First write failed");
> > > +             return 5;
> > > +     }
> > > +
> > > +     ret = fstat(fd, &stbuf);
> > > +     if (ret < 0) {
> > > +             perror("First stat failed");
> > > +             return 6;
> > > +     }
> > > +
> > > +     if (stbuf.st_size != pagesize) {
> > > +             fprintf(stderr,
> > > +                     "Wrong file size after first write, got %jd expected %ld\n",
> > > +                     (intmax_t)stbuf.st_size, pagesize);
> > > +             return 7;
> > > +     }
> > > +
> > > +     munmap(buf, pagesize);
> > > +     close(fd);
> > > +
> > > +     /*
> > > +      * Now try an append write against an empty file of a buffer with a
> > > +      * size matching twice the page size. Only the first page of the buffer
> > > +      * is faulted in before attempting the write, so that the second page
> > > +      * should be faulted in during the write.
> > > +      */
> > > +     fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666);
> > > +     if (fd == -1) {
> > > +             perror("Failed to open/create file");
> > > +             return 8;
> > > +     }
> > > +
> > > +     buf = mmap(NULL, pagesize * 2, PROT_READ | PROT_WRITE,
> > > +                MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> > > +     if (buf == MAP_FAILED) {
> > > +             perror("Failed to allocate second buffer");
> > > +             return 9;
> > > +     }
> > > +
> > > +     /* Fault in first page of the buffer before the write. */
> > > +     memset(buf, 0, 1);
> > > +
> > > +     ret = write(fd, buf, pagesize * 2);
> > > +     if (ret < 0) {
> > > +             perror("Second write failed");
> >
> > Hi Filipe,
> >
> > This patch looks good to me, just a question about this part. Is it possible
> > to get (0 < ret < pagesize * 2) at here? Is so, should we report fail too?
> 
> It is possible, if a short write happens.
> If that's the case, we detect the failure below when checking the file
> size with the stat call.
> 
> >
> > > +             return 10;
> > > +     }
> > > +
> > > +     ret = fstat(fd, &stbuf);
> > > +     if (ret < 0) {
> > > +             perror("Second stat failed");
> > > +             return 11;
> > > +     }
> > > +
> > > +     if (stbuf.st_size != pagesize * 2) {
> > > +             fprintf(stderr,
> > > +                     "Wrong file size after second write, got %jd expected %ld\n",
> > > +                     (intmax_t)stbuf.st_size, pagesize * 2);
> >
> > Does this try to check the stbuf.st_size isn't equal to the write(2) return
> > value? Or checks stbuf.st_size != pagesize * 2, when the return value is
> > good (equal to pagesize * 2) ?
> 
> It checks if it is equals to pagesize * 2, which is supposed to be the
> final file size, meaning the write succeeded and wrote all the
> expected data (pagesize * 2).

Thanks for your explanation.

I noticed that the "Wrong file size after second write, got %jd expected %ld\n"
line means the bug is triggered:

  # ./check -s default generic/362
  SECTION       -- default
  FSTYP         -- btrfs
  PLATFORM      -- Linux/x86_64 dell-xxxxx-xxx 6.10.0-0.rc7.20240712git43db1e03c086.62.fc41.x86_64 #1 SMP PREEMPT_DYNAMIC Fri Jul 12 22:31:14 UTC 2024
  MKFS_OPTIONS  -- /dev/sda6
  MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/sda6 /mnt/scratch

  generic/362 1s ... - output mismatch (see /root/git/xfstests/results//default/generic/362.out.bad)
      --- tests/generic/362.out   2024-07-27 01:38:47.810847933 +0800
      +++ /root/git/xfstests/results//default/generic/362.out.bad 2024-07-27 01:41:50.126428012 +0800
      @@ -1,2 +1,3 @@
       QA output created by 362
      +Wrong file size after first write, got 8192 expected 4096
       Silence is golden
      ...
      (Run 'diff -u /root/git/xfstests/tests/generic/362.out /root/git/xfstests/results//default/generic/362.out.bad'  to see the entire diff)

  HINT: You _MAY_ be missing kernel fix:
        xxxxxxxxxxxx btrfs: fix corruption after buffer fault in during direct IO append write

  Ran: generic/362
  Failures: generic/362
  Failed 1 of 1 tests

I thought a "short write" isn't a bug, just a rare test failure (or we use a loop
write to avoid that?). So we might can make sure the write() returns "pagesize * 2"
at first, then check (stbuf.st_size != pagesize * 2) for the bug itself.

What do you think?

Thanks,
Zorro

> 
> Thanks.
> 
> 
> >
> > Thanks,
> > Zorro
> >
> > > +             return 12;
> > > +     }
> > > +
> > > +     munmap(buf, pagesize * 2);
> > > +     close(fd);
> > > +
> > > +     return 0;
> > > +}
> >
> > [snip]
> >
>
Filipe Manana July 27, 2024, 11:30 a.m. UTC | #4
On Sat, Jul 27, 2024 at 9:27 AM Zorro Lang <zlang@redhat.com> wrote:
>
> On Fri, Jul 26, 2024 at 07:12:34PM +0100, Filipe Manana wrote:
> > On Fri, Jul 26, 2024 at 6:58 PM Zorro Lang <zlang@redhat.com> wrote:
> > >
> > > On Fri, Jul 26, 2024 at 04:55:46PM +0100, fdmanana@kernel.org wrote:
> > > > From: Filipe Manana <fdmanana@suse.com>
> > > >
> > > > Test that doing a direct IO append write to a file when the input buffer
> > > > was not yet faulted in, does not result in an incorrect file size.
> > > >
> > > > This exercises a bug on btrfs reported by users and which is fixed by
> > > > the following kernel patch:
> > > >
> > > >    "btrfs: fix corruption after buffer fault in during direct IO append write"
> > > >
> > > > Signed-off-by: Filipe Manana <fdmanana@suse.com>
> > > > ---
> > > >  .gitignore                 |   1 +
> > > >  src/Makefile               |   2 +-
> > > >  src/dio-append-buf-fault.c | 131 +++++++++++++++++++++++++++++++++++++
> > > >  tests/generic/362          |  28 ++++++++
> > > >  tests/generic/362.out      |   2 +
> > > >  5 files changed, 163 insertions(+), 1 deletion(-)
> > > >  create mode 100644 src/dio-append-buf-fault.c
> > > >  create mode 100755 tests/generic/362
> > > >  create mode 100644 tests/generic/362.out
> > > >
> > > > diff --git a/.gitignore b/.gitignore
> > > > index b5f15162..97c7e001 100644
> > > > --- a/.gitignore
> > > > +++ b/.gitignore
> > > > @@ -72,6 +72,7 @@ tags
> > > >  /src/deduperace
> > > >  /src/detached_mounts_propagation
> > > >  /src/devzero
> > > > +/src/dio-append-buf-fault
> > > >  /src/dio-buf-fault
> > > >  /src/dio-interleaved
> > > >  /src/dio-invalidate-cache
> > > > diff --git a/src/Makefile b/src/Makefile
> > > > index 99796137..559209be 100644
> > > > --- a/src/Makefile
> > > > +++ b/src/Makefile
> > > > @@ -20,7 +20,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
> > > >       t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \
> > > >       t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \
> > > >       t_mmap_cow_memory_failure fake-dump-rootino dio-buf-fault rewinddir-test \
> > > > -     readdir-while-renames
> > > > +     readdir-while-renames dio-append-buf-fault
> > > >
> > > >  LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
> > > >       preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
> > > > diff --git a/src/dio-append-buf-fault.c b/src/dio-append-buf-fault.c
> > > > new file mode 100644
> > > > index 00000000..f4be4845
> > > > --- /dev/null
> > > > +++ b/src/dio-append-buf-fault.c
> > > > @@ -0,0 +1,131 @@
> > > > +// SPDX-License-Identifier: GPL-2.0
> > > > +/*
> > > > + * Copyright (c) 2024 SUSE Linux Products GmbH.  All Rights Reserved.
> > > > + */
> > > > +
> > > > +/*
> > > > + * Test a direct IO write in append mode with a buffer that was not faulted in
> > > > + * (or just partially) before the write.
> > > > + */
> > > > +
> > > > +/* Get the O_DIRECT definition. */
> > > > +#ifndef _GNU_SOURCE
> > > > +#define _GNU_SOURCE
> > > > +#endif
> > > > +
> > > > +#include <stdio.h>
> > > > +#include <stdlib.h>
> > > > +#include <unistd.h>
> > > > +#include <stdint.h>
> > > > +#include <fcntl.h>
> > > > +#include <errno.h>
> > > > +#include <string.h>
> > > > +#include <sys/mman.h>
> > > > +#include <sys/stat.h>
> > > > +
> > > > +int main(int argc, char *argv[])
> > > > +{
> > > > +     struct stat stbuf;
> > > > +     int fd;
> > > > +     long pagesize;
> > > > +     void *buf;
> > > > +     ssize_t ret;
> > > > +
> > > > +     if (argc != 2) {
> > > > +             fprintf(stderr, "Use: %s <file path>\n", argv[0]);
> > > > +             return 1;
> > > > +     }
> > > > +
> > > > +     /*
> > > > +      * First try an append write against an empty file of a buffer with a
> > > > +      * size matching the page size. The buffer is not faulted in before
> > > > +      * attempting the write.
> > > > +      */
> > > > +
> > > > +     fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666);
> > > > +     if (fd == -1) {
> > > > +             perror("Failed to open/create file");
> > > > +             return 2;
> > > > +     }
> > > > +
> > > > +     pagesize = sysconf(_SC_PAGE_SIZE);
> > > > +     if (pagesize == -1) {
> > > > +             perror("Failed to get page size");
> > > > +             return 3;
> > > > +     }
> > > > +
> > > > +     buf = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
> > > > +                MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> > > > +     if (buf == MAP_FAILED) {
> > > > +             perror("Failed to allocate first buffer");
> > > > +             return 4;
> > > > +     }
> > > > +
> > > > +     ret = write(fd, buf, pagesize);
> > > > +     if (ret < 0) {
> > > > +             perror("First write failed");
> > > > +             return 5;
> > > > +     }
> > > > +
> > > > +     ret = fstat(fd, &stbuf);
> > > > +     if (ret < 0) {
> > > > +             perror("First stat failed");
> > > > +             return 6;
> > > > +     }
> > > > +
> > > > +     if (stbuf.st_size != pagesize) {
> > > > +             fprintf(stderr,
> > > > +                     "Wrong file size after first write, got %jd expected %ld\n",
> > > > +                     (intmax_t)stbuf.st_size, pagesize);
> > > > +             return 7;
> > > > +     }
> > > > +
> > > > +     munmap(buf, pagesize);
> > > > +     close(fd);
> > > > +
> > > > +     /*
> > > > +      * Now try an append write against an empty file of a buffer with a
> > > > +      * size matching twice the page size. Only the first page of the buffer
> > > > +      * is faulted in before attempting the write, so that the second page
> > > > +      * should be faulted in during the write.
> > > > +      */
> > > > +     fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666);
> > > > +     if (fd == -1) {
> > > > +             perror("Failed to open/create file");
> > > > +             return 8;
> > > > +     }
> > > > +
> > > > +     buf = mmap(NULL, pagesize * 2, PROT_READ | PROT_WRITE,
> > > > +                MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> > > > +     if (buf == MAP_FAILED) {
> > > > +             perror("Failed to allocate second buffer");
> > > > +             return 9;
> > > > +     }
> > > > +
> > > > +     /* Fault in first page of the buffer before the write. */
> > > > +     memset(buf, 0, 1);
> > > > +
> > > > +     ret = write(fd, buf, pagesize * 2);
> > > > +     if (ret < 0) {
> > > > +             perror("Second write failed");
> > >
> > > Hi Filipe,
> > >
> > > This patch looks good to me, just a question about this part. Is it possible
> > > to get (0 < ret < pagesize * 2) at here? Is so, should we report fail too?
> >
> > It is possible, if a short write happens.
> > If that's the case, we detect the failure below when checking the file
> > size with the stat call.
> >
> > >
> > > > +             return 10;
> > > > +     }
> > > > +
> > > > +     ret = fstat(fd, &stbuf);
> > > > +     if (ret < 0) {
> > > > +             perror("Second stat failed");
> > > > +             return 11;
> > > > +     }
> > > > +
> > > > +     if (stbuf.st_size != pagesize * 2) {
> > > > +             fprintf(stderr,
> > > > +                     "Wrong file size after second write, got %jd expected %ld\n",
> > > > +                     (intmax_t)stbuf.st_size, pagesize * 2);
> > >
> > > Does this try to check the stbuf.st_size isn't equal to the write(2) return
> > > value? Or checks stbuf.st_size != pagesize * 2, when the return value is
> > > good (equal to pagesize * 2) ?
> >
> > It checks if it is equals to pagesize * 2, which is supposed to be the
> > final file size, meaning the write succeeded and wrote all the
> > expected data (pagesize * 2).
>
> Thanks for your explanation.
>
> I noticed that the "Wrong file size after second write, got %jd expected %ld\n"
> line means the bug is triggered:

Correct.

>
>   # ./check -s default generic/362
>   SECTION       -- default
>   FSTYP         -- btrfs
>   PLATFORM      -- Linux/x86_64 dell-xxxxx-xxx 6.10.0-0.rc7.20240712git43db1e03c086.62.fc41.x86_64 #1 SMP PREEMPT_DYNAMIC Fri Jul 12 22:31:14 UTC 2024
>   MKFS_OPTIONS  -- /dev/sda6
>   MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/sda6 /mnt/scratch
>
>   generic/362 1s ... - output mismatch (see /root/git/xfstests/results//default/generic/362.out.bad)
>       --- tests/generic/362.out   2024-07-27 01:38:47.810847933 +0800
>       +++ /root/git/xfstests/results//default/generic/362.out.bad 2024-07-27 01:41:50.126428012 +0800
>       @@ -1,2 +1,3 @@
>        QA output created by 362
>       +Wrong file size after first write, got 8192 expected 4096

Yes, that's expected with unpatched btrfs.

>        Silence is golden
>       ...
>       (Run 'diff -u /root/git/xfstests/tests/generic/362.out /root/git/xfstests/results//default/generic/362.out.bad'  to see the entire diff)
>
>   HINT: You _MAY_ be missing kernel fix:
>         xxxxxxxxxxxx btrfs: fix corruption after buffer fault in during direct IO append write
>
>   Ran: generic/362
>   Failures: generic/362
>   Failed 1 of 1 tests
>
> I thought a "short write" isn't a bug, just a rare test failure (or we use a loop
> write to avoid that?). So we might can make sure the write() returns "pagesize * 2"
> at first, then check (stbuf.st_size != pagesize * 2) for the bug itself.
>
> What do you think?

Fine ok.

I've just sent a v2 with that change plus a minor one to always let
the second test run even if the first one fails:

https://lore.kernel.org/linux-btrfs/6c52fe9ce75354a931afdc6d2f7fb638c7f06b00.1722079321.git.fdmanana@suse.com/

Thanks.

>
> Thanks,
> Zorro
>
> >
> > Thanks.
> >
> >
> > >
> > > Thanks,
> > > Zorro
> > >
> > > > +             return 12;
> > > > +     }
> > > > +
> > > > +     munmap(buf, pagesize * 2);
> > > > +     close(fd);
> > > > +
> > > > +     return 0;
> > > > +}
> > >
> > > [snip]
> > >
> >
>
diff mbox series

Patch

diff --git a/.gitignore b/.gitignore
index b5f15162..97c7e001 100644
--- a/.gitignore
+++ b/.gitignore
@@ -72,6 +72,7 @@  tags
 /src/deduperace
 /src/detached_mounts_propagation
 /src/devzero
+/src/dio-append-buf-fault
 /src/dio-buf-fault
 /src/dio-interleaved
 /src/dio-invalidate-cache
diff --git a/src/Makefile b/src/Makefile
index 99796137..559209be 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -20,7 +20,7 @@  TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
 	t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \
 	t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \
 	t_mmap_cow_memory_failure fake-dump-rootino dio-buf-fault rewinddir-test \
-	readdir-while-renames
+	readdir-while-renames dio-append-buf-fault
 
 LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
 	preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
diff --git a/src/dio-append-buf-fault.c b/src/dio-append-buf-fault.c
new file mode 100644
index 00000000..f4be4845
--- /dev/null
+++ b/src/dio-append-buf-fault.c
@@ -0,0 +1,131 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2024 SUSE Linux Products GmbH.  All Rights Reserved.
+ */
+
+/*
+ * Test a direct IO write in append mode with a buffer that was not faulted in
+ * (or just partially) before the write.
+ */
+
+/* Get the O_DIRECT definition. */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+int main(int argc, char *argv[])
+{
+	struct stat stbuf;
+	int fd;
+	long pagesize;
+	void *buf;
+	ssize_t ret;
+
+	if (argc != 2) {
+		fprintf(stderr, "Use: %s <file path>\n", argv[0]);
+		return 1;
+	}
+
+	/*
+	 * First try an append write against an empty file of a buffer with a
+	 * size matching the page size. The buffer is not faulted in before
+	 * attempting the write.
+	 */
+
+	fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666);
+	if (fd == -1) {
+		perror("Failed to open/create file");
+		return 2;
+	}
+
+	pagesize = sysconf(_SC_PAGE_SIZE);
+	if (pagesize == -1) {
+		perror("Failed to get page size");
+		return 3;
+	}
+
+	buf = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (buf == MAP_FAILED) {
+		perror("Failed to allocate first buffer");
+		return 4;
+	}
+
+	ret = write(fd, buf, pagesize);
+	if (ret < 0) {
+		perror("First write failed");
+		return 5;
+	}
+
+	ret = fstat(fd, &stbuf);
+	if (ret < 0) {
+		perror("First stat failed");
+		return 6;
+	}
+
+	if (stbuf.st_size != pagesize) {
+		fprintf(stderr,
+			"Wrong file size after first write, got %jd expected %ld\n",
+			(intmax_t)stbuf.st_size, pagesize);
+		return 7;
+	}
+
+	munmap(buf, pagesize);
+	close(fd);
+
+	/*
+	 * Now try an append write against an empty file of a buffer with a
+	 * size matching twice the page size. Only the first page of the buffer
+	 * is faulted in before attempting the write, so that the second page
+	 * should be faulted in during the write.
+	 */
+	fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | O_APPEND, 0666);
+	if (fd == -1) {
+		perror("Failed to open/create file");
+		return 8;
+	}
+
+	buf = mmap(NULL, pagesize * 2, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (buf == MAP_FAILED) {
+		perror("Failed to allocate second buffer");
+		return 9;
+	}
+
+	/* Fault in first page of the buffer before the write. */
+	memset(buf, 0, 1);
+
+	ret = write(fd, buf, pagesize * 2);
+	if (ret < 0) {
+		perror("Second write failed");
+		return 10;
+	}
+
+	ret = fstat(fd, &stbuf);
+	if (ret < 0) {
+		perror("Second stat failed");
+		return 11;
+	}
+
+	if (stbuf.st_size != pagesize * 2) {
+		fprintf(stderr,
+			"Wrong file size after second write, got %jd expected %ld\n",
+			(intmax_t)stbuf.st_size, pagesize * 2);
+		return 12;
+	}
+
+	munmap(buf, pagesize * 2);
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/generic/362 b/tests/generic/362
new file mode 100755
index 00000000..2c127347
--- /dev/null
+++ b/tests/generic/362
@@ -0,0 +1,28 @@ 
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2024 SUSE Linux Products GmbH. All Rights Reserved.
+#
+# FS QA Test 362
+#
+# Test that doing a direct IO append write to a file when the input buffer was
+# not yet faulted in, does not result in an incorrect file size.
+#
+. ./common/preamble
+_begin_fstest auto quick
+
+_require_test
+_require_odirect
+_require_test_program dio-append-buf-fault
+
+[ $FSTYP == "btrfs" ] && \
+	_fixed_by_kernel_commit xxxxxxxxxxxx \
+	"btrfs: fix corruption after buffer fault in during direct IO append write"
+
+# On error the test program writes messages to stderr, causing a golden output
+# mismatch and making the test fail.
+$here/src/dio-append-buf-fault $TEST_DIR/dio-append-buf-fault
+
+# success, all done
+echo "Silence is golden"
+status=0
+exit
diff --git a/tests/generic/362.out b/tests/generic/362.out
new file mode 100644
index 00000000..0ff40905
--- /dev/null
+++ b/tests/generic/362.out
@@ -0,0 +1,2 @@ 
+QA output created by 362
+Silence is golden