diff mbox

[v2] generic: test pagecache invalidation after direct write

Message ID 20170321165314.11434-1-eguan@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Eryu Guan March 21, 2017, 4:53 p.m. UTC
Test if direct write invalidates pagecache correctly, so that subsequent
buffer read reads the correct data from disk.

This test is inspired by LTP tests dio29, and serves as a regression
test for the bug found by it, see kernel commit c771c14baa33
("iomap: invalidate page caches should be after iomap_dio_complete()
in direct write").

The test can be easily expanded to other write/read combinations, e.g.
buffer write + direct read and direct write + direct read, so they are
also being tested.

Signed-off-by: Eryu Guan <eguan@redhat.com>
---
v2: Address Brian's review comments
- compare buffer content byte-by-byte instead of strncmp
- use 'pids[i]' not *(pids + 1)
- dump buffer content to stdout on error
- initialize write buffer with (i + 1)
- use pwrite/pread instead of lseek+write/read
- remove increment of unused 'ret'
- call fsync(fd) instead of sync()
- fix typos

 .gitignore                 |   1 +
 src/Makefile               |   3 +-
 src/dio-invalidate-cache.c | 326 +++++++++++++++++++++++++++++++++++++++++++++
 tests/generic/418          | 122 +++++++++++++++++
 tests/generic/418.out      |   2 +
 tests/generic/group        |   1 +
 6 files changed, 454 insertions(+), 1 deletion(-)
 create mode 100644 src/dio-invalidate-cache.c
 create mode 100755 tests/generic/418
 create mode 100644 tests/generic/418.out

Comments

Brian Foster March 21, 2017, 5:35 p.m. UTC | #1
On Wed, Mar 22, 2017 at 12:53:14AM +0800, Eryu Guan wrote:
> Test if direct write invalidates pagecache correctly, so that subsequent
> buffer read reads the correct data from disk.
> 
> This test is inspired by LTP tests dio29, and serves as a regression
> test for the bug found by it, see kernel commit c771c14baa33
> ("iomap: invalidate page caches should be after iomap_dio_complete()
> in direct write").
> 
> The test can be easily expanded to other write/read combinations, e.g.
> buffer write + direct read and direct write + direct read, so they are
> also being tested.
> 
> Signed-off-by: Eryu Guan <eguan@redhat.com>
> ---
> v2: Address Brian's review comments
> - compare buffer content byte-by-byte instead of strncmp
> - use 'pids[i]' not *(pids + 1)
> - dump buffer content to stdout on error
> - initialize write buffer with (i + 1)
> - use pwrite/pread instead of lseek+write/read
> - remove increment of unused 'ret'
> - call fsync(fd) instead of sync()
> - fix typos
> 
>  .gitignore                 |   1 +
>  src/Makefile               |   3 +-
>  src/dio-invalidate-cache.c | 326 +++++++++++++++++++++++++++++++++++++++++++++
>  tests/generic/418          | 122 +++++++++++++++++
>  tests/generic/418.out      |   2 +
>  tests/generic/group        |   1 +
>  6 files changed, 454 insertions(+), 1 deletion(-)
>  create mode 100644 src/dio-invalidate-cache.c
>  create mode 100755 tests/generic/418
>  create mode 100644 tests/generic/418.out
> 
...
> diff --git a/src/dio-invalidate-cache.c b/src/dio-invalidate-cache.c
> new file mode 100644
> index 0000000..bc795f9
> --- /dev/null
> +++ b/src/dio-invalidate-cache.c
> @@ -0,0 +1,326 @@
...
> +static void kill_children(pid_t *pids, int nr_child)
> +{
> +	int i;
> +	pid_t pid;
> +
> +	for (i = 0; i < nr_child; i++) {
> +		pid = *(pids + i);
> +		if (pid == 0)
> +			continue;
> +		kill(pid, SIGTERM);
> +	}
> +	return;

Still have pids pointer arithmetic above and the return is unnecessary.
Otherwise looks Ok to me:

Reviewed-by: Brian Foster <bfoster@redhat.com>

> +}
> +
> +static int wait_children(pid_t *pids, int nr_child)
> +{
> +	int i, status, ret = 0;
> +	pid_t pid;
> +
> +	for (i = 0; i < nr_child; i++) {
> +		pid = pids[i];
> +		if (pid == 0)
> +			continue;
> +		waitpid(pid, &status, 0);
> +		ret += WEXITSTATUS(status);
> +	}
> +	return ret;
> +}
> +
> +static void dumpbuf(char *buf, int size, int blksz)
> +{
> +	int i;
> +
> +	printf("dumping buffer content\n");
> +	for (i = 0; i < size; i++) {
> +		if (((i % blksz) == 0) || ((i % 64) == 0))
> +			putchar('\n');
> +		printf("%x", buf[i]);
> +	}
> +	putchar('\n');
> +}
> +
> +static int run_test(const char *filename, int n_child, int blksz, off_t offset,
> +		    int nr_iter, int flag_rd, int flag_wr)
> +{
> +	char *buf_rd;
> +	char *buf_wr;
> +	off_t seekoff;
> +	int fd_rd, fd_wr;
> +	int i, ret;
> +	long page_size;
> +
> +	seekoff = offset + blksz * n_child;
> +
> +	page_size = sysconf(_SC_PAGESIZE);
> +	ret = posix_memalign((void **)&buf_rd, (size_t)page_size,
> +		blksz > page_size ? blksz : (size_t)page_size);
> +	if (ret) {
> +		fprintf(stderr, "posix_memalign(buf_rd, %d, %d) failed: %d\n",
> +			blksz, blksz, ret);
> +		exit(EXIT_FAILURE);
> +	}
> +	memset(buf_rd, 0, blksz);
> +	ret = posix_memalign((void **)&buf_wr, (size_t)page_size,
> +		blksz > page_size ? blksz : (size_t)page_size);
> +	if (ret) {
> +		fprintf(stderr, "posix_memalign(buf_wr, %d, %d) failed: %d\n",
> +			blksz, blksz, ret);
> +		exit(EXIT_FAILURE);
> +	}
> +	memset(buf_wr, 0, blksz);
> +
> +	fd_rd = open(filename, flag_rd);
> +	if (fd_rd < 0) {
> +		perror("open readonly for read");
> +		exit(EXIT_FAILURE);
> +	}
> +
> +	fd_wr = open(filename, flag_wr);
> +	if (fd_wr < 0) {
> +		perror("open writeonly for direct write");
> +		exit(EXIT_FAILURE);
> +	}
> +
> +#define log(format, ...) 			\
> +	if (verbose) {				\
> +		printf("[%d:%d] ", n_child, i);	\
> +		printf(format, __VA_ARGS__);	\
> +	}
> +
> +
> +	/* seek, write, read and verify */
> +	for (i = 0; i < nr_iter; i++) {
> +		memset(buf_wr, i + 1, blksz);
> +		log("pwrite(fd_wr, %p, %d, %lu)\n", buf_wr, blksz, seekoff);
> +		if (pwrite(fd_wr, buf_wr, blksz, seekoff) != blksz) {
> +			perror("direct write");
> +			exit(EXIT_FAILURE);
> +		}
> +
> +		/* make sure buffer write hits disk before direct read */
> +		if (!(flag_wr & O_DIRECT)) {
> +			if (fsync(fd_wr) < 0) {
> +				perror("fsync(fd_wr)");
> +				exit(EXIT_FAILURE);
> +			}
> +		}
> +
> +		log("pread(fd_rd, %p, %d, %lu)\n", buf_rd, blksz, seekoff);
> +		if (pread(fd_rd, buf_rd, blksz, seekoff) != blksz) {
> +			perror("buffer read");
> +			exit(EXIT_FAILURE);
> +		}
> +		if (cmpbuf(buf_wr, buf_rd, blksz) != 0) {
> +			fprintf(stderr, "[%d:%d] FAIL - comparison failed, "
> +				"offset %d\n", n_child, i, (int)seekoff);
> +			if (verbose)
> +				dumpbuf(buf_rd, blksz, blksz);
> +			exit(EXIT_FAILURE);
> +		}
> +	}
> +	exit(EXIT_SUCCESS);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	int nr_iter = 1;
> +	int nr_child = 1;
> +	int blksz = DEF_BLKSZ;
> +	int fd, i, ret = 0;
> +	int flag_rd = O_RDONLY;
> +	int flag_wr = O_WRONLY;
> +	int do_trunc = 0;
> +	int pre_fill = 0;
> +	int pre_alloc = 0;
> +	pid_t pid;
> +	pid_t *pids;
> +	off_t offset = 0;
> +	char *filename = NULL;
> +
> +	while ((i = getopt(argc, argv, "b:i:n:f:Fpo:tvrw")) != -1) {
> +		switch (i) {
> +		case 'b':
> +			if ((blksz = atoi(optarg)) <= 0) {
> +				fprintf(stderr, "blksz must be > 0\n");
> +				exit(EXIT_FAILURE);
> +			}
> +			if (blksz % 512 != 0) {
> +				fprintf(stderr, "blksz must be multiple of 512\n");
> +				exit(EXIT_FAILURE);
> +			}
> +			break;
> +		case 'i':
> +			if ((nr_iter = atoi(optarg)) <= 0) {
> +				fprintf(stderr, "iterations must be > 0\n");
> +				exit(EXIT_FAILURE);
> +			}
> +			break;
> +		case 'n':
> +			if ((nr_child = atoi(optarg)) <= 0) {
> +				fprintf(stderr, "no of children must be > 0\n");
> +				exit(EXIT_FAILURE);
> +			}
> +			break;
> +		case 'f':
> +			filename = optarg;
> +			break;
> +		case 'F':
> +			pre_fill = 1;
> +			break;
> +		case 'p':
> +			pre_alloc = 1;
> +			break;
> +		case 'r':
> +			flag_rd |= O_DIRECT;
> +			break;
> +		case 'w':
> +			flag_wr |= O_DIRECT;
> +			break;
> +		case 't':
> +			do_trunc = 1;
> +			break;
> +		case 'o':
> +			if ((offset = atol(optarg)) < 0) {
> +				fprintf(stderr, "offset must be >= 0\n");
> +				exit(EXIT_FAILURE);
> +			}
> +			break;
> +		case 'v':
> +			verbose = 1;
> +			break;
> +		case 'h':	/* fall through */
> +		default:
> +			usage(argv[0]);
> +		}
> +	}
> +
> +	if (filename == NULL)
> +		usage(argv[0]);
> +	if (pre_fill && pre_alloc) {
> +		fprintf(stderr, "Error: -F and -p are both specified\n");
> +		exit(EXIT_FAILURE);
> +	}
> +
> +	pids = malloc(nr_child * sizeof(pid_t));
> +	if (!pids) {
> +		fprintf(stderr, "failed to malloc memory for pids\n");
> +		exit(EXIT_FAILURE);
> +	}
> +	memset(pids, 0, nr_child * sizeof(pid_t));
> +
> +	/* create & truncate testfile first */
> +	fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0600);
> +	if (fd < 0) {
> +		perror("create & truncate testfile");
> +		free(pids);
> +		exit(EXIT_FAILURE);
> +	}
> +	if (do_trunc && (ftruncate(fd, blksz * nr_child) < 0)) {
> +		perror("ftruncate failed");
> +		free(pids);
> +		exit(EXIT_FAILURE);
> +	}
> +	if (pre_fill) {
> +		char *buf;
> +		buf = malloc(blksz * nr_child);
> +		memset(buf, 's', blksz * nr_child);
> +		write(fd, buf, blksz * nr_child);
> +		free(buf);
> +	}
> +	if (pre_alloc) {
> +		fallocate(fd, 0, 0, blksz * nr_child);
> +	}
> +	fsync(fd);
> +	close(fd);
> +
> +	/* fork workers */
> +	for (i = 0; i < nr_child; i++) {
> +		pid = fork();
> +		if (pid < 0) {
> +			perror("fork");
> +			kill_children(pids, nr_child);
> +			free(pids);
> +			exit(EXIT_FAILURE);
> +		} else if (pid == 0) {
> +			/* never returns */
> +			run_test(filename, i, blksz, offset, nr_iter,
> +				 flag_rd, flag_wr);
> +		} else {
> +			pids[i] = pid;
> +		}
> +	}
> +
> +	ret = wait_children(pids, nr_child);
> +	free(pids);
> +	exit(ret);
> +}
> diff --git a/tests/generic/418 b/tests/generic/418
> new file mode 100755
> index 0000000..1fa782e
> --- /dev/null
> +++ b/tests/generic/418
> @@ -0,0 +1,122 @@
> +#! /bin/bash
> +# FS QA Test 418
> +#
> +# Test pagecache invalidation in buffer/direct write/read combination.
> +#
> +# Fork N children, each child writes to and reads from its own region of the
> +# same test file, and check if what it reads is what it writes. The test region
> +# is determined by N * blksz. Write and read operation can be either direct or
> +# buffered.
> +#
> +# Regression test for commit c771c14baa33 ("iomap: invalidate page caches
> +# should be after iomap_dio_complete() in direct write")
> +#
> +#-----------------------------------------------------------------------
> +# Copyright (c) 2017 Red Hat Inc.  All Rights Reserved.
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU General Public License as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it would be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write the Free Software Foundation,
> +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> +#-----------------------------------------------------------------------
> +#
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1	# failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	rm -f $tmp.*
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +_supported_fs generic
> +_supported_os Linux
> +_require_test
> +_require_odirect
> +_require_block_device $TEST_DEV
> +_require_test_program "dio-invalidate-cache"
> +_require_test_program "feature"
> +
> +diotest=$here/src/dio-invalidate-cache
> +testfile=$TEST_DIR/$seq-diotest
> +sectorsize=`blockdev --getss $TEST_DEV`
> +pagesize=`src/feature -s`
> +
> +# test case array, test different write/read combinations
> +# -r: use direct read
> +# -w: use direct write
> +# -t: truncate file to final size before test, i.e. write to hole
> +# -p: fallocate whole file before test, i.e. write to allocated but unwritten extents
> +# -F: fulfill whole file before test, i.e. write to allocated & written extents
> +t_cases=(
> +	"-w"
> +	"-wt"
> +	"-wp"
> +	"-wF"
> +	"-r"
> +	"-rt"
> +	"-rp"
> +	"-rF"
> +	"-rw"
> +	"-rwt"
> +	"-rwp"
> +	"-rwF"
> +)
> +
> +runtest()
> +{
> +	local i=0
> +	local tc=""
> +	local loop=$1
> +	shift
> +
> +	for tc in ${t_cases[*]}; do
> +		echo "diotest $tc $*" >> $seqres.full
> +		i=0
> +		while [ $i -lt $loop ]; do
> +			$diotest $tc $* -f $testfile
> +			if [ $? -ne 0 ]; then
> +				echo "diotest $tc $* failed at loop $i" | \
> +					tee -a $seqres.full
> +				break
> +			fi
> +			let i=i+1
> +		done
> +	done
> +}
> +
> +while [ $sectorsize -le $((pagesize * 2)) ]; do
> +	# reproducer for the original bug
> +	runtest $((10 * LOAD_FACTOR)) -b $sectorsize -n 3 -i 1
> +	# try more processes and iterations
> +	runtest $((5 * LOAD_FACTOR))  -b $sectorsize -n 8 -i 4
> +	sectorsize=$((sectorsize * 2))
> +done
> +echo "Silence is golden"
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/generic/418.out b/tests/generic/418.out
> new file mode 100644
> index 0000000..954de31
> --- /dev/null
> +++ b/tests/generic/418.out
> @@ -0,0 +1,2 @@
> +QA output created by 418
> +Silence is golden
> diff --git a/tests/generic/group b/tests/generic/group
> index f0096bb..0a272b7 100644
> --- a/tests/generic/group
> +++ b/tests/generic/group
> @@ -420,3 +420,4 @@
>  415 auto clone
>  416 auto enospc
>  417 auto quick shutdown log
> +418 auto rw
> -- 
> 2.9.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe fstests" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe fstests" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/.gitignore b/.gitignore
index 48a40a0..1ed2a92 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,7 @@ 
 /src/dbtest
 /src/devzero
 /src/dio-interleaved
+/src/dio-invalidate-cache
 /src/dirperf
 /src/dirstress
 /src/dmiperf
diff --git a/src/Makefile b/src/Makefile
index eb5a56c..a7f27f0 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -21,7 +21,8 @@  LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
 	stale_handle pwrite_mmap_blocked t_dir_offset2 seek_sanity_test \
 	seek_copy_test t_readdir_1 t_readdir_2 fsync-tester nsexec cloner \
 	renameat2 t_getcwd e4compact test-nextquota punch-alternating \
-	attr-list-by-handle-cursor-test listxattr dio-interleaved t_dir_type
+	attr-list-by-handle-cursor-test listxattr dio-interleaved t_dir_type \
+	dio-invalidate-cache
 
 SUBDIRS =
 
diff --git a/src/dio-invalidate-cache.c b/src/dio-invalidate-cache.c
new file mode 100644
index 0000000..bc795f9
--- /dev/null
+++ b/src/dio-invalidate-cache.c
@@ -0,0 +1,326 @@ 
+/*
+ * Copyright (c) 2017 Red Hat Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/*
+ * Fork N children, each child writes to and reads from its own region of the
+ * same test file, and check if what it reads is what it writes. The test
+ * region is determined by N * blksz. Write and read operation can be either
+ * direct or buffered.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#define DEF_BLKSZ 4096
+
+int verbose = 0;
+
+static void usage(const char *prog)
+{
+	fprintf(stderr, "Usage: %s [-Fhptrwv] [-b blksz] [-n nr_child] [-i iterations] [-o offset] <-f filename>\n", prog);
+	fprintf(stderr, "\t-F\tPreallocate all blocks by writing them before test\n");
+	fprintf(stderr, "\t-p\tPreallocate all blocks using fallocate(2) before test\n");
+	fprintf(stderr, "\t-t\tTruncate test file to largest size before test\n");
+	fprintf(stderr, "\t-r\tDo direct read\n");
+	fprintf(stderr, "\t-w\tDo direct write\n");
+	fprintf(stderr, "\t-v\tBe verbose\n");
+	fprintf(stderr, "\t-h\tshow this help message\n");
+	exit(EXIT_FAILURE);
+}
+
+static int cmpbuf(char *b1, char *b2, int bsize)
+{
+	int i;
+
+	for (i = 0; i < bsize; i++) {
+		if (b1[i] != b2[i]) {
+			fprintf(stderr, "cmpbuf: offset %d: Expected: 0x%x,"
+				" got 0x%x\n", i, b1[i], b2[i]);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static void kill_children(pid_t *pids, int nr_child)
+{
+	int i;
+	pid_t pid;
+
+	for (i = 0; i < nr_child; i++) {
+		pid = *(pids + i);
+		if (pid == 0)
+			continue;
+		kill(pid, SIGTERM);
+	}
+	return;
+}
+
+static int wait_children(pid_t *pids, int nr_child)
+{
+	int i, status, ret = 0;
+	pid_t pid;
+
+	for (i = 0; i < nr_child; i++) {
+		pid = pids[i];
+		if (pid == 0)
+			continue;
+		waitpid(pid, &status, 0);
+		ret += WEXITSTATUS(status);
+	}
+	return ret;
+}
+
+static void dumpbuf(char *buf, int size, int blksz)
+{
+	int i;
+
+	printf("dumping buffer content\n");
+	for (i = 0; i < size; i++) {
+		if (((i % blksz) == 0) || ((i % 64) == 0))
+			putchar('\n');
+		printf("%x", buf[i]);
+	}
+	putchar('\n');
+}
+
+static int run_test(const char *filename, int n_child, int blksz, off_t offset,
+		    int nr_iter, int flag_rd, int flag_wr)
+{
+	char *buf_rd;
+	char *buf_wr;
+	off_t seekoff;
+	int fd_rd, fd_wr;
+	int i, ret;
+	long page_size;
+
+	seekoff = offset + blksz * n_child;
+
+	page_size = sysconf(_SC_PAGESIZE);
+	ret = posix_memalign((void **)&buf_rd, (size_t)page_size,
+		blksz > page_size ? blksz : (size_t)page_size);
+	if (ret) {
+		fprintf(stderr, "posix_memalign(buf_rd, %d, %d) failed: %d\n",
+			blksz, blksz, ret);
+		exit(EXIT_FAILURE);
+	}
+	memset(buf_rd, 0, blksz);
+	ret = posix_memalign((void **)&buf_wr, (size_t)page_size,
+		blksz > page_size ? blksz : (size_t)page_size);
+	if (ret) {
+		fprintf(stderr, "posix_memalign(buf_wr, %d, %d) failed: %d\n",
+			blksz, blksz, ret);
+		exit(EXIT_FAILURE);
+	}
+	memset(buf_wr, 0, blksz);
+
+	fd_rd = open(filename, flag_rd);
+	if (fd_rd < 0) {
+		perror("open readonly for read");
+		exit(EXIT_FAILURE);
+	}
+
+	fd_wr = open(filename, flag_wr);
+	if (fd_wr < 0) {
+		perror("open writeonly for direct write");
+		exit(EXIT_FAILURE);
+	}
+
+#define log(format, ...) 			\
+	if (verbose) {				\
+		printf("[%d:%d] ", n_child, i);	\
+		printf(format, __VA_ARGS__);	\
+	}
+
+
+	/* seek, write, read and verify */
+	for (i = 0; i < nr_iter; i++) {
+		memset(buf_wr, i + 1, blksz);
+		log("pwrite(fd_wr, %p, %d, %lu)\n", buf_wr, blksz, seekoff);
+		if (pwrite(fd_wr, buf_wr, blksz, seekoff) != blksz) {
+			perror("direct write");
+			exit(EXIT_FAILURE);
+		}
+
+		/* make sure buffer write hits disk before direct read */
+		if (!(flag_wr & O_DIRECT)) {
+			if (fsync(fd_wr) < 0) {
+				perror("fsync(fd_wr)");
+				exit(EXIT_FAILURE);
+			}
+		}
+
+		log("pread(fd_rd, %p, %d, %lu)\n", buf_rd, blksz, seekoff);
+		if (pread(fd_rd, buf_rd, blksz, seekoff) != blksz) {
+			perror("buffer read");
+			exit(EXIT_FAILURE);
+		}
+		if (cmpbuf(buf_wr, buf_rd, blksz) != 0) {
+			fprintf(stderr, "[%d:%d] FAIL - comparison failed, "
+				"offset %d\n", n_child, i, (int)seekoff);
+			if (verbose)
+				dumpbuf(buf_rd, blksz, blksz);
+			exit(EXIT_FAILURE);
+		}
+	}
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+	int nr_iter = 1;
+	int nr_child = 1;
+	int blksz = DEF_BLKSZ;
+	int fd, i, ret = 0;
+	int flag_rd = O_RDONLY;
+	int flag_wr = O_WRONLY;
+	int do_trunc = 0;
+	int pre_fill = 0;
+	int pre_alloc = 0;
+	pid_t pid;
+	pid_t *pids;
+	off_t offset = 0;
+	char *filename = NULL;
+
+	while ((i = getopt(argc, argv, "b:i:n:f:Fpo:tvrw")) != -1) {
+		switch (i) {
+		case 'b':
+			if ((blksz = atoi(optarg)) <= 0) {
+				fprintf(stderr, "blksz must be > 0\n");
+				exit(EXIT_FAILURE);
+			}
+			if (blksz % 512 != 0) {
+				fprintf(stderr, "blksz must be multiple of 512\n");
+				exit(EXIT_FAILURE);
+			}
+			break;
+		case 'i':
+			if ((nr_iter = atoi(optarg)) <= 0) {
+				fprintf(stderr, "iterations must be > 0\n");
+				exit(EXIT_FAILURE);
+			}
+			break;
+		case 'n':
+			if ((nr_child = atoi(optarg)) <= 0) {
+				fprintf(stderr, "no of children must be > 0\n");
+				exit(EXIT_FAILURE);
+			}
+			break;
+		case 'f':
+			filename = optarg;
+			break;
+		case 'F':
+			pre_fill = 1;
+			break;
+		case 'p':
+			pre_alloc = 1;
+			break;
+		case 'r':
+			flag_rd |= O_DIRECT;
+			break;
+		case 'w':
+			flag_wr |= O_DIRECT;
+			break;
+		case 't':
+			do_trunc = 1;
+			break;
+		case 'o':
+			if ((offset = atol(optarg)) < 0) {
+				fprintf(stderr, "offset must be >= 0\n");
+				exit(EXIT_FAILURE);
+			}
+			break;
+		case 'v':
+			verbose = 1;
+			break;
+		case 'h':	/* fall through */
+		default:
+			usage(argv[0]);
+		}
+	}
+
+	if (filename == NULL)
+		usage(argv[0]);
+	if (pre_fill && pre_alloc) {
+		fprintf(stderr, "Error: -F and -p are both specified\n");
+		exit(EXIT_FAILURE);
+	}
+
+	pids = malloc(nr_child * sizeof(pid_t));
+	if (!pids) {
+		fprintf(stderr, "failed to malloc memory for pids\n");
+		exit(EXIT_FAILURE);
+	}
+	memset(pids, 0, nr_child * sizeof(pid_t));
+
+	/* create & truncate testfile first */
+	fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0600);
+	if (fd < 0) {
+		perror("create & truncate testfile");
+		free(pids);
+		exit(EXIT_FAILURE);
+	}
+	if (do_trunc && (ftruncate(fd, blksz * nr_child) < 0)) {
+		perror("ftruncate failed");
+		free(pids);
+		exit(EXIT_FAILURE);
+	}
+	if (pre_fill) {
+		char *buf;
+		buf = malloc(blksz * nr_child);
+		memset(buf, 's', blksz * nr_child);
+		write(fd, buf, blksz * nr_child);
+		free(buf);
+	}
+	if (pre_alloc) {
+		fallocate(fd, 0, 0, blksz * nr_child);
+	}
+	fsync(fd);
+	close(fd);
+
+	/* fork workers */
+	for (i = 0; i < nr_child; i++) {
+		pid = fork();
+		if (pid < 0) {
+			perror("fork");
+			kill_children(pids, nr_child);
+			free(pids);
+			exit(EXIT_FAILURE);
+		} else if (pid == 0) {
+			/* never returns */
+			run_test(filename, i, blksz, offset, nr_iter,
+				 flag_rd, flag_wr);
+		} else {
+			pids[i] = pid;
+		}
+	}
+
+	ret = wait_children(pids, nr_child);
+	free(pids);
+	exit(ret);
+}
diff --git a/tests/generic/418 b/tests/generic/418
new file mode 100755
index 0000000..1fa782e
--- /dev/null
+++ b/tests/generic/418
@@ -0,0 +1,122 @@ 
+#! /bin/bash
+# FS QA Test 418
+#
+# Test pagecache invalidation in buffer/direct write/read combination.
+#
+# Fork N children, each child writes to and reads from its own region of the
+# same test file, and check if what it reads is what it writes. The test region
+# is determined by N * blksz. Write and read operation can be either direct or
+# buffered.
+#
+# Regression test for commit c771c14baa33 ("iomap: invalidate page caches
+# should be after iomap_dio_complete() in direct write")
+#
+#-----------------------------------------------------------------------
+# Copyright (c) 2017 Red Hat Inc.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#-----------------------------------------------------------------------
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+	cd /
+	rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+_supported_fs generic
+_supported_os Linux
+_require_test
+_require_odirect
+_require_block_device $TEST_DEV
+_require_test_program "dio-invalidate-cache"
+_require_test_program "feature"
+
+diotest=$here/src/dio-invalidate-cache
+testfile=$TEST_DIR/$seq-diotest
+sectorsize=`blockdev --getss $TEST_DEV`
+pagesize=`src/feature -s`
+
+# test case array, test different write/read combinations
+# -r: use direct read
+# -w: use direct write
+# -t: truncate file to final size before test, i.e. write to hole
+# -p: fallocate whole file before test, i.e. write to allocated but unwritten extents
+# -F: fulfill whole file before test, i.e. write to allocated & written extents
+t_cases=(
+	"-w"
+	"-wt"
+	"-wp"
+	"-wF"
+	"-r"
+	"-rt"
+	"-rp"
+	"-rF"
+	"-rw"
+	"-rwt"
+	"-rwp"
+	"-rwF"
+)
+
+runtest()
+{
+	local i=0
+	local tc=""
+	local loop=$1
+	shift
+
+	for tc in ${t_cases[*]}; do
+		echo "diotest $tc $*" >> $seqres.full
+		i=0
+		while [ $i -lt $loop ]; do
+			$diotest $tc $* -f $testfile
+			if [ $? -ne 0 ]; then
+				echo "diotest $tc $* failed at loop $i" | \
+					tee -a $seqres.full
+				break
+			fi
+			let i=i+1
+		done
+	done
+}
+
+while [ $sectorsize -le $((pagesize * 2)) ]; do
+	# reproducer for the original bug
+	runtest $((10 * LOAD_FACTOR)) -b $sectorsize -n 3 -i 1
+	# try more processes and iterations
+	runtest $((5 * LOAD_FACTOR))  -b $sectorsize -n 8 -i 4
+	sectorsize=$((sectorsize * 2))
+done
+echo "Silence is golden"
+
+# success, all done
+status=0
+exit
diff --git a/tests/generic/418.out b/tests/generic/418.out
new file mode 100644
index 0000000..954de31
--- /dev/null
+++ b/tests/generic/418.out
@@ -0,0 +1,2 @@ 
+QA output created by 418
+Silence is golden
diff --git a/tests/generic/group b/tests/generic/group
index f0096bb..0a272b7 100644
--- a/tests/generic/group
+++ b/tests/generic/group
@@ -420,3 +420,4 @@ 
 415 auto clone
 416 auto enospc
 417 auto quick shutdown log
+418 auto rw