diff mbox series

[v3,20/20] fanotify: no external fh buffer in fanotify_name_event

Message ID 20200708111156.24659-20-amir73il@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v3,01/20] fsnotify: Rearrange fast path to minimise overhead when there is no watcher | expand

Commit Message

Amir Goldstein July 8, 2020, 11:11 a.m. UTC
The fanotify_fh struct has an inline buffer of size 12 which is enough
to store the most common local filesystem file handles (e.g. ext4, xfs).
For file handles that do not fit in the inline buffer (e.g. btrfs), an
external buffer is allocated to store the file handle.

When allocating a variable size fanotify_name_event, there is no point
in allocating also an external fh buffer when file handle does not fit
in the inline buffer.

Check required size for encoding fh, preallocate an event buffer
sufficient to contain both file handle and name and store the name after
the file handle.

At this time, when not reporting name in event, we still allocate
the fixed size fanotify_fid_event and an external buffer for large
file handles, but fanotify_alloc_name_event() has already been prepared
to accept a NULL file_name.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/notify/fanotify/fanotify.c | 68 +++++++++++++++++++++++++----------
 fs/notify/fanotify/fanotify.h | 12 ++++---
 2 files changed, 57 insertions(+), 23 deletions(-)

Comments

Jan Kara July 15, 2020, 3:34 p.m. UTC | #1
On Wed 08-07-20 14:11:55, Amir Goldstein wrote:
> The fanotify_fh struct has an inline buffer of size 12 which is enough
> to store the most common local filesystem file handles (e.g. ext4, xfs).
> For file handles that do not fit in the inline buffer (e.g. btrfs), an
> external buffer is allocated to store the file handle.
> 
> When allocating a variable size fanotify_name_event, there is no point
> in allocating also an external fh buffer when file handle does not fit
> in the inline buffer.
> 
> Check required size for encoding fh, preallocate an event buffer
> sufficient to contain both file handle and name and store the name after
> the file handle.
> 
> At this time, when not reporting name in event, we still allocate
> the fixed size fanotify_fid_event and an external buffer for large
> file handles, but fanotify_alloc_name_event() has already been prepared
> to accept a NULL file_name.
> 
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>

Just one tiny nit below:

> @@ -305,27 +323,34 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
>   * Return 0 on failure to encode.
>   */
>  static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
> -			      gfp_t gfp)
> +			      unsigned int fh_len, gfp_t gfp)
>  {
> -	int dwords, type, bytes = 0;
> +	int dwords, bytes, type = 0;
>  	char *ext_buf = NULL;
>  	void *buf = fh->buf;
>  	int err;
>  
>  	fh->type = FILEID_ROOT;
>  	fh->len = 0;
> +	fh->flags = 0;
>  	if (!inode)
>  		return 0;
>  
> -	dwords = 0;
> +	/*
> +	 * !gpf means preallocated variable size fh, but fh_len could
> +	 * be zero in that case if encoding fh len failed.
> +	 */
>  	err = -ENOENT;
> -	type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
> -	if (!dwords)
> +	if (!gfp)
> +		bytes = fh_len;
> +	else
> +		bytes = fanotify_encode_fh_len(inode);

Any reason why proper fh len is not passed in by both callers? We could
then get rid of this 'if' and 'bytes' variable.

								Honza
Amir Goldstein July 15, 2020, 4:05 p.m. UTC | #2
On Wed, Jul 15, 2020 at 6:34 PM Jan Kara <jack@suse.cz> wrote:
>
> On Wed 08-07-20 14:11:55, Amir Goldstein wrote:
> > The fanotify_fh struct has an inline buffer of size 12 which is enough
> > to store the most common local filesystem file handles (e.g. ext4, xfs).
> > For file handles that do not fit in the inline buffer (e.g. btrfs), an
> > external buffer is allocated to store the file handle.
> >
> > When allocating a variable size fanotify_name_event, there is no point
> > in allocating also an external fh buffer when file handle does not fit
> > in the inline buffer.
> >
> > Check required size for encoding fh, preallocate an event buffer
> > sufficient to contain both file handle and name and store the name after
> > the file handle.
> >
> > At this time, when not reporting name in event, we still allocate
> > the fixed size fanotify_fid_event and an external buffer for large
> > file handles, but fanotify_alloc_name_event() has already been prepared
> > to accept a NULL file_name.
> >
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
>
> Just one tiny nit below:
>
> > @@ -305,27 +323,34 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
> >   * Return 0 on failure to encode.
> >   */
> >  static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
> > -                           gfp_t gfp)
> > +                           unsigned int fh_len, gfp_t gfp)
> >  {
> > -     int dwords, type, bytes = 0;
> > +     int dwords, bytes, type = 0;
> >       char *ext_buf = NULL;
> >       void *buf = fh->buf;
> >       int err;
> >
> >       fh->type = FILEID_ROOT;
> >       fh->len = 0;
> > +     fh->flags = 0;
> >       if (!inode)
> >               return 0;
> >
> > -     dwords = 0;
> > +     /*
> > +      * !gpf means preallocated variable size fh, but fh_len could
> > +      * be zero in that case if encoding fh len failed.
> > +      */
> >       err = -ENOENT;
> > -     type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
> > -     if (!dwords)
> > +     if (!gfp)
> > +             bytes = fh_len;
> > +     else
> > +             bytes = fanotify_encode_fh_len(inode);
>
> Any reason why proper fh len is not passed in by both callers?

No good reason.
It's just how the function evolved and I missed this simplification.

> We could then get rid of this 'if' and 'bytes' variable.

Yap. sounds good.
I will test and push the branches.
Let me know if you want me to re-post anything.

Thanks,
Amir.
Amir Goldstein July 15, 2020, 4:22 p.m. UTC | #3
> > Any reason why proper fh len is not passed in by both callers?
>
> No good reason.
> It's just how the function evolved and I missed this simplification.
>
> > We could then get rid of this 'if' and 'bytes' variable.
>
> Yap. sounds good.
> I will test and push the branches.
> Let me know if you want me to re-post anything.
>

Pushed this nit fix to prep series branch fanotify_prep
and complete tested series to branch fanotify_name_fid.

Thanks,
Amir.
Jan Kara July 15, 2020, 4:24 p.m. UTC | #4
On Wed 15-07-20 19:05:52, Amir Goldstein wrote:
> On Wed, Jul 15, 2020 at 6:34 PM Jan Kara <jack@suse.cz> wrote:
> >
> > On Wed 08-07-20 14:11:55, Amir Goldstein wrote:
> > > The fanotify_fh struct has an inline buffer of size 12 which is enough
> > > to store the most common local filesystem file handles (e.g. ext4, xfs).
> > > For file handles that do not fit in the inline buffer (e.g. btrfs), an
> > > external buffer is allocated to store the file handle.
> > >
> > > When allocating a variable size fanotify_name_event, there is no point
> > > in allocating also an external fh buffer when file handle does not fit
> > > in the inline buffer.
> > >
> > > Check required size for encoding fh, preallocate an event buffer
> > > sufficient to contain both file handle and name and store the name after
> > > the file handle.
> > >
> > > At this time, when not reporting name in event, we still allocate
> > > the fixed size fanotify_fid_event and an external buffer for large
> > > file handles, but fanotify_alloc_name_event() has already been prepared
> > > to accept a NULL file_name.
> > >
> > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> >
> > Just one tiny nit below:
> >
> > > @@ -305,27 +323,34 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
> > >   * Return 0 on failure to encode.
> > >   */
> > >  static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
> > > -                           gfp_t gfp)
> > > +                           unsigned int fh_len, gfp_t gfp)
> > >  {
> > > -     int dwords, type, bytes = 0;
> > > +     int dwords, bytes, type = 0;
> > >       char *ext_buf = NULL;
> > >       void *buf = fh->buf;
> > >       int err;
> > >
> > >       fh->type = FILEID_ROOT;
> > >       fh->len = 0;
> > > +     fh->flags = 0;
> > >       if (!inode)
> > >               return 0;
> > >
> > > -     dwords = 0;
> > > +     /*
> > > +      * !gpf means preallocated variable size fh, but fh_len could
> > > +      * be zero in that case if encoding fh len failed.
> > > +      */
> > >       err = -ENOENT;
> > > -     type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
> > > -     if (!dwords)
> > > +     if (!gfp)
> > > +             bytes = fh_len;
> > > +     else
> > > +             bytes = fanotify_encode_fh_len(inode);
> >
> > Any reason why proper fh len is not passed in by both callers?
> 
> No good reason.
> It's just how the function evolved and I missed this simplification.
> 
> > We could then get rid of this 'if' and 'bytes' variable.
> 
> Yap. sounds good.
> I will test and push the branches.
> Let me know if you want me to re-post anything.

So I've just picked up patches 1-9 (I took patches 8 and 9 from your git)
and 17 to my fsnotify branch because they are completely stand-alone
cleanups and I didn't see a reason to delay them further. All the other
patches in this series look fine to me but I didn't pick them up yet
because they are more tightly related to the name event series and could
possibly change. So I'll pick them up once I feel name event series is more
stable...

								Honza
Amir Goldstein July 15, 2020, 5:44 p.m. UTC | #5
> > Yap. sounds good.
> > I will test and push the branches.
> > Let me know if you want me to re-post anything.
>
> So I've just picked up patches 1-9 (I took patches 8 and 9 from your git)
> and 17 to my fsnotify branch because they are completely stand-alone
> cleanups and I didn't see a reason to delay them further. All the other
> patches in this series look fine to me but I didn't pick them up yet
> because they are more tightly related to the name event series and could
> possibly change. So I'll pick them up once I feel name event series is more
> stable...
>

Fair enough.
I rebased on top of your branch tested and pushed
fanotify_prep/fanotify_name_fid.

Thanks,
Amir.
diff mbox series

Patch

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 4b0bc4afe6ff..4833d4c88122 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -298,6 +298,24 @@  static u32 fanotify_group_event_mask(struct fsnotify_group *group,
 	return test_mask & user_mask;
 }
 
+/*
+ * Check size needed to encode fanotify_fh.
+ *
+ * Return size of encoded fh without fanotify_fh header.
+ * Return 0 on failure to encode.
+ */
+static int fanotify_encode_fh_len(struct inode *inode)
+{
+	int dwords = 0;
+
+	if (!inode)
+		return 0;
+
+	exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
+
+	return dwords << 2;
+}
+
 /*
  * Encode fanotify_fh.
  *
@@ -305,27 +323,34 @@  static u32 fanotify_group_event_mask(struct fsnotify_group *group,
  * Return 0 on failure to encode.
  */
 static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
-			      gfp_t gfp)
+			      unsigned int fh_len, gfp_t gfp)
 {
-	int dwords, type, bytes = 0;
+	int dwords, bytes, type = 0;
 	char *ext_buf = NULL;
 	void *buf = fh->buf;
 	int err;
 
 	fh->type = FILEID_ROOT;
 	fh->len = 0;
+	fh->flags = 0;
 	if (!inode)
 		return 0;
 
-	dwords = 0;
+	/*
+	 * !gpf means preallocated variable size fh, but fh_len could
+	 * be zero in that case if encoding fh len failed.
+	 */
 	err = -ENOENT;
-	type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
-	if (!dwords)
+	if (!gfp)
+		bytes = fh_len;
+	else
+		bytes = fanotify_encode_fh_len(inode);
+	if (bytes < 4 || WARN_ON_ONCE(bytes % 4))
 		goto out_err;
 
-	bytes = dwords << 2;
-	if (bytes > FANOTIFY_INLINE_FH_LEN) {
-		/* Treat failure to allocate fh as failure to allocate event */
+	/* No external buffer in a variable size allocated fh */
+	if (gfp && bytes > FANOTIFY_INLINE_FH_LEN) {
+		/* Treat failure to allocate fh as failure to encode fh */
 		err = -ENOMEM;
 		ext_buf = kmalloc(bytes, gfp);
 		if (!ext_buf)
@@ -333,8 +358,10 @@  static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
 
 		*fanotify_fh_ext_buf_ptr(fh) = ext_buf;
 		buf = ext_buf;
+		fh->flags |= FANOTIFY_FH_FLAG_EXT_BUF;
 	}
 
+	dwords = bytes >> 2;
 	type = exportfs_encode_inode_fh(inode, buf, &dwords, NULL);
 	err = -EINVAL;
 	if (!type || type == FILEID_INVALID || bytes != dwords << 2)
@@ -419,7 +446,7 @@  struct fanotify_event *fanotify_alloc_fid_event(struct inode *id,
 
 	ffe->fae.type = FANOTIFY_EVENT_TYPE_FID;
 	ffe->fsid = *fsid;
-	fanotify_encode_fh(&ffe->object_fh, id, gfp);
+	fanotify_encode_fh(&ffe->object_fh, id, 0, gfp);
 
 	return &ffe->fae;
 }
@@ -432,8 +459,13 @@  struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
 	struct fanotify_name_event *fne;
 	struct fanotify_info *info;
 	struct fanotify_fh *dfh;
+	unsigned int dir_fh_len = fanotify_encode_fh_len(id);
+	unsigned int size;
 
-	fne = kmalloc(sizeof(*fne) + file_name->len + 1, gfp);
+	size = sizeof(*fne) + FANOTIFY_FH_HDR_LEN + dir_fh_len;
+	if (file_name)
+		size += file_name->len + 1;
+	fne = kmalloc(size, gfp);
 	if (!fne)
 		return NULL;
 
@@ -442,8 +474,13 @@  struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
 	info = &fne->info;
 	fanotify_info_init(info);
 	dfh = fanotify_info_dir_fh(info);
-	info->dir_fh_totlen = fanotify_encode_fh(dfh, id, gfp);
-	fanotify_info_copy_name(info, file_name);
+	info->dir_fh_totlen = fanotify_encode_fh(dfh, id, dir_fh_len, 0);
+	if (file_name)
+		fanotify_info_copy_name(info, file_name);
+
+	pr_debug("%s: ino=%lu size=%u dir_fh_len=%u name_len=%u name='%.*s'\n",
+		 __func__, id->i_ino, size, dir_fh_len,
+		 info->name_len, info->name_len, fanotify_info_name(info));
 
 	return &fne->fae;
 }
@@ -658,12 +695,7 @@  static void fanotify_free_fid_event(struct fanotify_event *event)
 
 static void fanotify_free_name_event(struct fanotify_event *event)
 {
-	struct fanotify_name_event *fne = FANOTIFY_NE(event);
-	struct fanotify_fh *dfh = fanotify_info_dir_fh(&fne->info);
-
-	if (fanotify_fh_has_ext_buf(dfh))
-		kfree(fanotify_fh_ext_buf(dfh));
-	kfree(fne);
+	kfree(FANOTIFY_NE(event));
 }
 
 static void fanotify_free_event(struct fsnotify_event *fsn_event)
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 5e104fc56abb..12c204b1489f 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -29,8 +29,10 @@  enum {
 struct fanotify_fh {
 	u8 type;
 	u8 len;
-	u8 pad[2];
-	unsigned char buf[FANOTIFY_INLINE_FH_LEN];
+#define FANOTIFY_FH_FLAG_EXT_BUF 1
+	u8 flags;
+	u8 pad;
+	unsigned char buf[];
 } __aligned(4);
 
 /* Variable size struct for dir file handle + child file handle + name */
@@ -50,7 +52,7 @@  struct fanotify_info {
 
 static inline bool fanotify_fh_has_ext_buf(struct fanotify_fh *fh)
 {
-	return fh->len > FANOTIFY_INLINE_FH_LEN;
+	return (fh->flags & FANOTIFY_FH_FLAG_EXT_BUF);
 }
 
 static inline char **fanotify_fh_ext_buf_ptr(struct fanotify_fh *fh)
@@ -154,6 +156,8 @@  struct fanotify_fid_event {
 	struct fanotify_event fae;
 	__kernel_fsid_t fsid;
 	struct fanotify_fh object_fh;
+	/* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */
+	unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN];
 };
 
 static inline struct fanotify_fid_event *
@@ -166,8 +170,6 @@  struct fanotify_name_event {
 	struct fanotify_event fae;
 	__kernel_fsid_t fsid;
 	struct fanotify_info info;
-	/* Reserve space in info.buf[] - access with fanotify_info_dir_fh() */
-	struct fanotify_fh _dir_fh;
 };
 
 static inline struct fanotify_name_event *