Message ID | 20250107160617.222775-2-axboe@kernel.dk (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add RWF_DONTCACHE support | expand |
On Tue, Jan 07, 2025 at 09:05:14AM -0700, Jens Axboe wrote: > Using RWF_DONTCACHE tells the kernel that any page cache instantiated > by this operation should get pruned once the operation completes. If > data is in cache prior to the operation it will remain there. > > Add ops for testing both the read and write side of this. If the kernel > being tested doesn't support RWF_DONTCACHE, then operations are performed > with regular read/write buffered IO. > > See the kernel posting adding support: > > https://lore.kernel.org/linux-fsdevel/20241220154831.1086649-1-axboe@kernel.dk/ > > Signed-off-by: Jens Axboe <axboe@kernel.dk> Looks good to me now, Reviewed-by: "Darrick J. Wong" <djwong@kernel.org> --D > --- > ltp/fsstress.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 144 insertions(+) > > diff --git a/ltp/fsstress.c b/ltp/fsstress.c > index 3d248ee25791..c5554d651b67 100644 > --- a/ltp/fsstress.c > +++ b/ltp/fsstress.c > @@ -82,6 +82,16 @@ static int renameat2(int dfd1, const char *path1, > #define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ > #endif > > +#ifndef RWF_DONTCACHE > +#define RWF_DONTCACHE 0x80 > +#endif > + > +/* > + * Default to RWF_DONTCACHE being available, turn it off if -EOPNOTSUPP > + * is seen during issue. > + */ > +static int have_rwf_dontcache = 1; > + > #define FILELEN_MAX (32*4096) > > typedef enum { > @@ -117,6 +127,7 @@ typedef enum { > OP_COLLAPSE, > OP_INSERT, > OP_READ, > + OP_READ_DONTCACHE, > OP_READLINK, > OP_READV, > OP_REMOVEFATTR, > @@ -143,6 +154,7 @@ typedef enum { > OP_URING_READ, > OP_URING_WRITE, > OP_WRITE, > + OP_WRITE_DONTCACHE, > OP_WRITEV, > OP_EXCHANGE_RANGE, > OP_LAST > @@ -248,6 +260,7 @@ void zero_f(opnum_t, long); > void collapse_f(opnum_t, long); > void insert_f(opnum_t, long); > void unshare_f(opnum_t, long); > +void read_dontcache_f(opnum_t, long); > void read_f(opnum_t, long); > void readlink_f(opnum_t, long); > void readv_f(opnum_t, long); > @@ -273,6 +286,7 @@ void unlink_f(opnum_t, long); > void unresvsp_f(opnum_t, long); > void uring_read_f(opnum_t, long); > void uring_write_f(opnum_t, long); > +void write_dontcache_f(opnum_t, long); > void write_f(opnum_t, long); > void writev_f(opnum_t, long); > void exchangerange_f(opnum_t, long); > @@ -315,6 +329,7 @@ struct opdesc ops[OP_LAST] = { > [OP_COLLAPSE] = {"collapse", collapse_f, 1, 1 }, > [OP_INSERT] = {"insert", insert_f, 1, 1 }, > [OP_READ] = {"read", read_f, 1, 0 }, > + [OP_READ_DONTCACHE] = {"read_dontcache", read_dontcache_f, 1, 0 }, > [OP_READLINK] = {"readlink", readlink_f, 1, 0 }, > [OP_READV] = {"readv", readv_f, 1, 0 }, > /* remove (delete) extended attribute */ > @@ -346,6 +361,7 @@ struct opdesc ops[OP_LAST] = { > [OP_URING_WRITE] = {"uring_write", uring_write_f, 1, 1 }, > [OP_WRITE] = {"write", write_f, 4, 1 }, > [OP_WRITEV] = {"writev", writev_f, 4, 1 }, > + [OP_WRITE_DONTCACHE]= {"write_dontcache", write_dontcache_f,4, 1 }, > [OP_EXCHANGE_RANGE]= {"exchangerange", exchangerange_f, 2, 1 }, > }, *ops_end; > > @@ -4635,6 +4651,73 @@ readv_f(opnum_t opno, long r) > close(fd); > } > > +void > +read_dontcache_f(opnum_t opno, long r) > +{ > + int e; > + pathname_t f; > + int fd; > + int64_t lr; > + off64_t off; > + struct stat64 stb; > + int v; > + char st[1024]; > + struct iovec iov; > + int flags; > + > + init_pathname(&f); > + if (!get_fname(FT_REGFILE, r, &f, NULL, NULL, &v)) { > + if (v) > + printf("%d/%lld: read - no filename\n", procid, opno); > + free_pathname(&f); > + return; > + } > + fd = open_path(&f, O_RDONLY); > + e = fd < 0 ? errno : 0; > + check_cwd(); > + if (fd < 0) { > + if (v) > + printf("%d/%lld: read - open %s failed %d\n", > + procid, opno, f.path, e); > + free_pathname(&f); > + return; > + } > + if (fstat64(fd, &stb) < 0) { > + if (v) > + printf("%d/%lld: read - fstat64 %s failed %d\n", > + procid, opno, f.path, errno); > + free_pathname(&f); > + close(fd); > + return; > + } > + inode_info(st, sizeof(st), &stb, v); > + if (stb.st_size == 0) { > + if (v) > + printf("%d/%lld: read - %s%s zero size\n", procid, opno, > + f.path, st); > + free_pathname(&f); > + close(fd); > + return; > + } > + lr = ((int64_t)random() << 32) + random(); > + off = (off64_t)(lr % stb.st_size); > + iov.iov_len = (random() % FILELEN_MAX) + 1; > + iov.iov_base = malloc(iov.iov_len); > + flags = have_rwf_dontcache ? RWF_DONTCACHE : 0; > + e = preadv2(fd, &iov, 1, off, flags) < 0 ? errno : 0; > + if (have_rwf_dontcache && e == EOPNOTSUPP) { > + have_rwf_dontcache = 0; > + e = preadv2(fd, &iov, 1, off, 0) < 0 ? errno : 0; > + } > + free(iov.iov_base); > + if (v) > + printf("%d/%lld: read dontcache %s%s [%lld,%d] %d\n", > + procid, opno, f.path, st, (long long)off, > + (int)iov.iov_len, e); > + free_pathname(&f); > + close(fd); > +} > + > void > removefattr_f(opnum_t opno, long r) > { > @@ -5509,6 +5592,67 @@ writev_f(opnum_t opno, long r) > close(fd); > } > > +void > +write_dontcache_f(opnum_t opno, long r) > +{ > + int e; > + pathname_t f; > + int fd; > + int64_t lr; > + off64_t off; > + struct stat64 stb; > + int v; > + char st[1024]; > + struct iovec iov; > + int flags; > + > + init_pathname(&f); > + if (!get_fname(FT_REGm, r, &f, NULL, NULL, &v)) { > + if (v) > + printf("%d/%lld: write - no filename\n", procid, opno); > + free_pathname(&f); > + return; > + } > + fd = open_path(&f, O_WRONLY); > + e = fd < 0 ? errno : 0; > + check_cwd(); > + if (fd < 0) { > + if (v) > + printf("%d/%lld: write - open %s failed %d\n", > + procid, opno, f.path, e); > + free_pathname(&f); > + return; > + } > + if (fstat64(fd, &stb) < 0) { > + if (v) > + printf("%d/%lld: write - fstat64 %s failed %d\n", > + procid, opno, f.path, errno); > + free_pathname(&f); > + close(fd); > + return; > + } > + inode_info(st, sizeof(st), &stb, v); > + lr = ((int64_t)random() << 32) + random(); > + off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE)); > + off %= maxfsize; > + iov.iov_len = (random() % FILELEN_MAX) + 1; > + iov.iov_base = malloc(iov.iov_len); > + memset(iov.iov_base, nameseq & 0xff, iov.iov_len); > + flags = have_rwf_dontcache ? RWF_DONTCACHE : 0; > + e = pwritev2(fd, &iov, 1, off, flags) < 0 ? errno : 0; > + if (have_rwf_dontcache && e == EOPNOTSUPP) { > + have_rwf_dontcache = 0; > + e = pwritev2(fd, &iov, 1, off, 0) < 0 ? errno : 0; > + } > + free(iov.iov_base); > + if (v) > + printf("%d/%lld: write dontcache %s%s [%lld,%d] %d\n", > + procid, opno, f.path, st, (long long)off, > + (int)iov.iov_len, e); > + free_pathname(&f); > + close(fd); > +} > + > char * > xattr_flag_to_string(int flag) > { > -- > 2.47.1 > >
diff --git a/ltp/fsstress.c b/ltp/fsstress.c index 3d248ee25791..c5554d651b67 100644 --- a/ltp/fsstress.c +++ b/ltp/fsstress.c @@ -82,6 +82,16 @@ static int renameat2(int dfd1, const char *path1, #define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ #endif +#ifndef RWF_DONTCACHE +#define RWF_DONTCACHE 0x80 +#endif + +/* + * Default to RWF_DONTCACHE being available, turn it off if -EOPNOTSUPP + * is seen during issue. + */ +static int have_rwf_dontcache = 1; + #define FILELEN_MAX (32*4096) typedef enum { @@ -117,6 +127,7 @@ typedef enum { OP_COLLAPSE, OP_INSERT, OP_READ, + OP_READ_DONTCACHE, OP_READLINK, OP_READV, OP_REMOVEFATTR, @@ -143,6 +154,7 @@ typedef enum { OP_URING_READ, OP_URING_WRITE, OP_WRITE, + OP_WRITE_DONTCACHE, OP_WRITEV, OP_EXCHANGE_RANGE, OP_LAST @@ -248,6 +260,7 @@ void zero_f(opnum_t, long); void collapse_f(opnum_t, long); void insert_f(opnum_t, long); void unshare_f(opnum_t, long); +void read_dontcache_f(opnum_t, long); void read_f(opnum_t, long); void readlink_f(opnum_t, long); void readv_f(opnum_t, long); @@ -273,6 +286,7 @@ void unlink_f(opnum_t, long); void unresvsp_f(opnum_t, long); void uring_read_f(opnum_t, long); void uring_write_f(opnum_t, long); +void write_dontcache_f(opnum_t, long); void write_f(opnum_t, long); void writev_f(opnum_t, long); void exchangerange_f(opnum_t, long); @@ -315,6 +329,7 @@ struct opdesc ops[OP_LAST] = { [OP_COLLAPSE] = {"collapse", collapse_f, 1, 1 }, [OP_INSERT] = {"insert", insert_f, 1, 1 }, [OP_READ] = {"read", read_f, 1, 0 }, + [OP_READ_DONTCACHE] = {"read_dontcache", read_dontcache_f, 1, 0 }, [OP_READLINK] = {"readlink", readlink_f, 1, 0 }, [OP_READV] = {"readv", readv_f, 1, 0 }, /* remove (delete) extended attribute */ @@ -346,6 +361,7 @@ struct opdesc ops[OP_LAST] = { [OP_URING_WRITE] = {"uring_write", uring_write_f, 1, 1 }, [OP_WRITE] = {"write", write_f, 4, 1 }, [OP_WRITEV] = {"writev", writev_f, 4, 1 }, + [OP_WRITE_DONTCACHE]= {"write_dontcache", write_dontcache_f,4, 1 }, [OP_EXCHANGE_RANGE]= {"exchangerange", exchangerange_f, 2, 1 }, }, *ops_end; @@ -4635,6 +4651,73 @@ readv_f(opnum_t opno, long r) close(fd); } +void +read_dontcache_f(opnum_t opno, long r) +{ + int e; + pathname_t f; + int fd; + int64_t lr; + off64_t off; + struct stat64 stb; + int v; + char st[1024]; + struct iovec iov; + int flags; + + init_pathname(&f); + if (!get_fname(FT_REGFILE, r, &f, NULL, NULL, &v)) { + if (v) + printf("%d/%lld: read - no filename\n", procid, opno); + free_pathname(&f); + return; + } + fd = open_path(&f, O_RDONLY); + e = fd < 0 ? errno : 0; + check_cwd(); + if (fd < 0) { + if (v) + printf("%d/%lld: read - open %s failed %d\n", + procid, opno, f.path, e); + free_pathname(&f); + return; + } + if (fstat64(fd, &stb) < 0) { + if (v) + printf("%d/%lld: read - fstat64 %s failed %d\n", + procid, opno, f.path, errno); + free_pathname(&f); + close(fd); + return; + } + inode_info(st, sizeof(st), &stb, v); + if (stb.st_size == 0) { + if (v) + printf("%d/%lld: read - %s%s zero size\n", procid, opno, + f.path, st); + free_pathname(&f); + close(fd); + return; + } + lr = ((int64_t)random() << 32) + random(); + off = (off64_t)(lr % stb.st_size); + iov.iov_len = (random() % FILELEN_MAX) + 1; + iov.iov_base = malloc(iov.iov_len); + flags = have_rwf_dontcache ? RWF_DONTCACHE : 0; + e = preadv2(fd, &iov, 1, off, flags) < 0 ? errno : 0; + if (have_rwf_dontcache && e == EOPNOTSUPP) { + have_rwf_dontcache = 0; + e = preadv2(fd, &iov, 1, off, 0) < 0 ? errno : 0; + } + free(iov.iov_base); + if (v) + printf("%d/%lld: read dontcache %s%s [%lld,%d] %d\n", + procid, opno, f.path, st, (long long)off, + (int)iov.iov_len, e); + free_pathname(&f); + close(fd); +} + void removefattr_f(opnum_t opno, long r) { @@ -5509,6 +5592,67 @@ writev_f(opnum_t opno, long r) close(fd); } +void +write_dontcache_f(opnum_t opno, long r) +{ + int e; + pathname_t f; + int fd; + int64_t lr; + off64_t off; + struct stat64 stb; + int v; + char st[1024]; + struct iovec iov; + int flags; + + init_pathname(&f); + if (!get_fname(FT_REGm, r, &f, NULL, NULL, &v)) { + if (v) + printf("%d/%lld: write - no filename\n", procid, opno); + free_pathname(&f); + return; + } + fd = open_path(&f, O_WRONLY); + e = fd < 0 ? errno : 0; + check_cwd(); + if (fd < 0) { + if (v) + printf("%d/%lld: write - open %s failed %d\n", + procid, opno, f.path, e); + free_pathname(&f); + return; + } + if (fstat64(fd, &stb) < 0) { + if (v) + printf("%d/%lld: write - fstat64 %s failed %d\n", + procid, opno, f.path, errno); + free_pathname(&f); + close(fd); + return; + } + inode_info(st, sizeof(st), &stb, v); + lr = ((int64_t)random() << 32) + random(); + off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE)); + off %= maxfsize; + iov.iov_len = (random() % FILELEN_MAX) + 1; + iov.iov_base = malloc(iov.iov_len); + memset(iov.iov_base, nameseq & 0xff, iov.iov_len); + flags = have_rwf_dontcache ? RWF_DONTCACHE : 0; + e = pwritev2(fd, &iov, 1, off, flags) < 0 ? errno : 0; + if (have_rwf_dontcache && e == EOPNOTSUPP) { + have_rwf_dontcache = 0; + e = pwritev2(fd, &iov, 1, off, 0) < 0 ? errno : 0; + } + free(iov.iov_base); + if (v) + printf("%d/%lld: write dontcache %s%s [%lld,%d] %d\n", + procid, opno, f.path, st, (long long)off, + (int)iov.iov_len, e); + free_pathname(&f); + close(fd); +} + char * xattr_flag_to_string(int flag) {
Using RWF_DONTCACHE tells the kernel that any page cache instantiated by this operation should get pruned once the operation completes. If data is in cache prior to the operation it will remain there. Add ops for testing both the read and write side of this. If the kernel being tested doesn't support RWF_DONTCACHE, then operations are performed with regular read/write buffered IO. See the kernel posting adding support: https://lore.kernel.org/linux-fsdevel/20241220154831.1086649-1-axboe@kernel.dk/ Signed-off-by: Jens Axboe <axboe@kernel.dk> --- ltp/fsstress.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+)