[v2] BTRFS/NFSD: provide more unique inode number for btrfs export

Message ID	162969155423.9892.18322100025025288277@noble.neil.brown.name (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-nfs-owner@kernel.org> Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 From: "NeilBrown" <neilb@suse.de> To: "Chris Mason" <clm@fb.com>, "David Sterba" <dsterba@suse.com>, "Christoph Hellwig" <hch@infradead.org>, "Josef Bacik" <josef@toxicpanda.com>, "J. Bruce Fields" <bfields@fieldses.org>, "Chuck Lever" <chuck.lever@oracle.com> Cc: "Roman Mamedov" <rm@romanrm.net>, "Goffredo Baroncelli" <kreijack@libero.it>, "Alexander Viro" <viro@zeniv.linux.org.uk>, linux-fsdevel@vger.kernel.org, linux-nfs@vger.kernel.org, linux-btrfs@vger.kernel.org Subject: [PATCH v2] BTRFS/NFSD: provide more unique inode number for btrfs export In-reply-to: <162906585094.1695.15815972140753474778@noble.neil.brown.name> References: <162742539595.32498.13687924366155737575.stgit@noble.brown>, <162881913686.1695.12479588032010502384@noble.neil.brown.name>, <bf49ef31-0c86-62c8-7862-719935764036@libero.it>, <20210816003505.7b3e9861@natsu>, <162906585094.1695.15815972140753474778@noble.neil.brown.name> Date: Mon, 23 Aug 2021 14:05:54 +1000 Message-id: <162969155423.9892.18322100025025288277@noble.neil.brown.name> Precedence: bulk
Series	[v2] BTRFS/NFSD: provide more unique inode number for btrfs export \| expand [v2] BTRFS/NFSD: provide more unique inode number for btrfs export

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0117d867ecf8..989fdf2032d5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9195,6 +9195,10 @@ static int btrfs_getattr(struct user_namespace *mnt_userns, generic_fillattr(&init_user_ns, inode, stat); stat->dev = BTRFS_I(inode)->root->anon_dev; + if (BTRFS_I(inode)->root->root_key.objectid != BTRFS_FS_TREE_OBJECTID) + stat->ino_uniquifier = + swab64(BTRFS_I(inode)->root->root_key.objectid); + spin_lock(&BTRFS_I(inode)->lock); delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes; inode_bytes = inode_get_bytes(inode); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 0a5ebc52e6a9..19d14f11f79a 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -340,6 +340,7 @@ svcxdr_encode_fattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, { struct user_namespace *userns = nfsd_user_namespace(rqstp); __be32 *p; + u64 ino; u64 fsid; p = xdr_reserve_space(xdr, XDR_UNIT * 21); @@ -377,7 +378,8 @@ svcxdr_encode_fattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, p = xdr_encode_hyper(p, fsid); /* fileid */ - p = xdr_encode_hyper(p, stat->ino); + ino = nfsd_uniquify_ino(fhp, stat); + p = xdr_encode_hyper(p, ino); p = encode_nfstime3(p, &stat->atime); p = encode_nfstime3(p, &stat->mtime); @@ -1151,6 +1153,17 @@ svcxdr_encode_entry3_common(struct nfsd3_readdirres *resp, const char *name, if (xdr_stream_encode_item_present(xdr) < 0) return false; /* fileid */ + if (!resp->dir_have_uniquifier) { + struct kstat stat; + if (fh_getattr(&resp->fh, &stat) == nfs_ok) + resp->dir_ino_uniquifier = + nfsd_ino_uniquifier(&resp->fh, &stat); + else + resp->dir_ino_uniquifier = 0; + resp->dir_have_uniquifier = true; + } + if (resp->dir_ino_uniquifier != ino) + ino ^= resp->dir_ino_uniquifier; if (xdr_stream_encode_u64(xdr, ino) < 0) return false; /* name */ diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 7abeccb975b2..5ed894ceebb0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3114,10 +3114,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { + u64 ino = nfsd_uniquify_ino(fhp, &stat); p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - p = xdr_encode_hyper(p, stat.ino); + p = xdr_encode_hyper(p, ino); } if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { p = xdr_reserve_space(xdr, 8); @@ -3274,7 +3275,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 8); if (!p) - goto out_resource; + goto out_resource; /* * Get parent's attributes if not ignoring crossmount * and this is the root of a cross-mounted filesystem. @@ -3284,7 +3285,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, err = get_parent_attributes(exp, &parent_stat); if (err) goto out_nfserr; - ino = parent_stat.ino; + ino = nfsd_uniquify_ino(fhp, &parent_stat); } p = xdr_encode_hyper(p, ino); } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index c475d2271f9c..e97ed957a379 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -172,7 +172,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (--data_left < 0) return error; - if (fh->fh_auth_type != 0) + if ((fh->fh_options & ~NFSD_FH_OPTION_ALL) != 0) return error; len = key_len(fh->fh_fsid_type) / 4; if (len == 0) @@ -569,6 +569,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, struct inode * inode = d_inode(dentry); dev_t ex_dev = exp_sb(exp)->s_dev; + u8 options = 0; dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n", MAJOR(ex_dev), MINOR(ex_dev), @@ -585,6 +586,14 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, /* If we have a ref_fh, then copy the fh_no_wcc setting from it. */ fhp->fh_no_wcc = ref_fh ? ref_fh->fh_no_wcc : false; + if (ref_fh && ref_fh->fh_export == exp) { + options = ref_fh->fh_handle.fh_options; + } else { + /* Set options as needed */ + if (exp->ex_path.mnt->mnt_sb->s_magic == BTRFS_SUPER_MAGIC) + options |= NFSD_FH_OPTION_INO_UNIQUIFY; + } + if (ref_fh == fhp) fh_put(ref_fh); @@ -615,7 +624,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, } else { fhp->fh_handle.fh_size = key_len(fhp->fh_handle.fh_fsid_type) + 4; - fhp->fh_handle.fh_auth_type = 0; + fhp->fh_handle.fh_options = options; mk_fsid(fhp->fh_handle.fh_fsid_type, fhp->fh_handle.fh_fsid, diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 6106697adc04..1144a98c2951 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -84,6 +84,28 @@ enum fsid_source { }; extern enum fsid_source fsid_source(const struct svc_fh *fhp); +enum nfsd_fh_options { + NFSD_FH_OPTION_INO_UNIQUIFY = 1, /* BTRFS only */ + + NFSD_FH_OPTION_ALL = 1 +}; + +static inline u64 nfsd_ino_uniquifier(const struct svc_fh *fhp, + const struct kstat *stat) +{ + if (fhp->fh_handle.fh_options & NFSD_FH_OPTION_INO_UNIQUIFY) + return stat->ino_uniquifier; + return 0; +} + +static inline u64 nfsd_uniquify_ino(const struct svc_fh *fhp, + const struct kstat *stat) +{ + u64 u = nfsd_ino_uniquifier(fhp, stat); + if (u != stat->ino) + return stat->ino ^ u; + return stat->ino; +} /* * This might look a little large to "inline" but in all calls except diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 933008382bbe..d9b6c8314bbb 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -179,6 +179,8 @@ struct nfsd3_readdirres { struct xdr_buf dirlist; struct svc_fh scratch; struct readdir_cd common; + u64 dir_ino_uniquifier; + bool dir_have_uniquifier; unsigned int cookie_offset; struct svc_rqst * rqstp; diff --git a/include/linux/stat.h b/include/linux/stat.h index fff27e603814..0f3f74d302f8 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -46,6 +46,24 @@ struct kstat { struct timespec64 btime; /* File creation time */ u64 blocks; u64 mnt_id; + /* + * BTRFS does not provide unique inode numbers within a filesystem, + * depending on a synthetic 'dev' to provide uniqueness. + * NFSd cannot make use of this 'dev' number so clients often see + * duplicate inode numbers. + * For BTRFS, 'ino' is unlikely to use the high bits until the filesystem + * has created a great many inodes. + * It puts another number in ino_uniquifier which: + * - has most entropy in the high bits + * - is different precisely when 'dev' is different + * - is stable across unmount/remount + * NFSd can xor this with 'ino' to get a substantially more unique + * number for reporting to the client. + * The ino_uniquifier for a directory can reasonably be applied + * to inode numbers reported by the readdir filldir callback. + * It is NOT currently exported to user-space. + */ + u64 ino_uniquifier; }; #endif diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h index 427294dd56a1..59311df4b476 100644 --- a/include/uapi/linux/nfsd/nfsfh.h +++ b/include/uapi/linux/nfsd/nfsfh.h @@ -38,11 +38,17 @@ struct nfs_fhbase_old { * The file handle starts with a sequence of four-byte words. * The first word contains a version number (1) and three descriptor bytes * that tell how the remaining 3 variable length fields should be handled. - * These three bytes are auth_type, fsid_type and fileid_type. + * These three bytes are options, fsid_type and fileid_type. * * All four-byte values are in host-byte-order. * - * The auth_type field is deprecated and must be set to 0. + * The options field (previously auth_type) can be used when nfsd behaviour + * needs to change in a non-compatible way, usually for some specific + * filesystem. Options should only be set in filehandles for filesystems which + * need them. + * Current values: + * 1 - BTRFS only. Cause stat->ino_uniquifier to be used to improve inode + * number uniqueness. * * The fsid_type identifies how the filesystem (or export point) is * encoded. @@ -67,7 +73,7 @@ struct nfs_fhbase_new { union { struct { __u8 fb_version_aux; /* == 1, even => nfs_fhbase_old */ - __u8 fb_auth_type_aux; + __u8 fb_options_aux; __u8 fb_fsid_type_aux; __u8 fb_fileid_type_aux; __u32 fb_auth[1]; @@ -76,7 +82,7 @@ struct nfs_fhbase_new { }; struct { __u8 fb_version; /* == 1, even => nfs_fhbase_old */ - __u8 fb_auth_type; + __u8 fb_options; __u8 fb_fsid_type; __u8 fb_fileid_type; __u32 fb_auth_flex[]; /* flexible-array member */ @@ -106,11 +112,11 @@ struct knfsd_fh { #define fh_version fh_base.fh_new.fb_version #define fh_fsid_type fh_base.fh_new.fb_fsid_type -#define fh_auth_type fh_base.fh_new.fb_auth_type +#define fh_options fh_base.fh_new.fb_options #define fh_fileid_type fh_base.fh_new.fb_fileid_type #define fh_fsid fh_base.fh_new.fb_auth_flex /* Do not use, provided for userspace compatiblity. */ -#define fh_auth fh_base.fh_new.fb_auth +#define fh_auth fh_base.fh_new.fb_options #endif /* _UAPI_LINUX_NFSD_FH_H */

[v2] BTRFS/NFSD: provide more unique inode number for btrfs export

Commit Message

Patch