diff mbox series

[6/7] fanotify: mix event info into merge key hash

Message ID 20210202162010.305971-7-amir73il@gmail.com (mailing list archive)
State New, archived
Headers show
Series Performance improvement for fanotify merge | expand

Commit Message

Amir Goldstein Feb. 2, 2021, 4:20 p.m. UTC
Improve the balance of hashed queue lists by mixing more event info
relevant for merge.

For example, all FAN_CREATE name events in the same dir used to have the
same merge key based on the dir inode.  With this change the created
file name is mixed into the merge key.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/notify/fanotify/fanotify.c | 33 +++++++++++++++++++++++++++------
 fs/notify/fanotify/fanotify.h | 20 +++++++++++++++++---
 2 files changed, 44 insertions(+), 9 deletions(-)

Comments

Jan Kara Feb. 16, 2021, 3:39 p.m. UTC | #1
On Tue 02-02-21 18:20:09, Amir Goldstein wrote:
> Improve the balance of hashed queue lists by mixing more event info
> relevant for merge.
> 
> For example, all FAN_CREATE name events in the same dir used to have the
> same merge key based on the dir inode.  With this change the created
> file name is mixed into the merge key.
> 
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/notify/fanotify/fanotify.c | 33 +++++++++++++++++++++++++++------
>  fs/notify/fanotify/fanotify.h | 20 +++++++++++++++++---
>  2 files changed, 44 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> index 6d3807012851..b19fef1c6f64 100644
> --- a/fs/notify/fanotify/fanotify.c
> +++ b/fs/notify/fanotify/fanotify.c
...
> @@ -476,8 +485,11 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id,
>  
>  	ffe->fae.type = FANOTIFY_EVENT_TYPE_FID;
>  	ffe->fsid = *fsid;
> -	fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id),
> -			   gfp);
> +	fh = &ffe->object_fh;
> +	fanotify_encode_fh(fh, id, fanotify_encode_fh_len(id), gfp);
> +
> +	/* Mix fsid+fid info into event merge key */
> +	ffe->fae.info_hash = full_name_hash(ffe->fskey, fanotify_fh_buf(fh), fh->len);

Is it really sensible to hash FID with full_name_hash()? It would make more
sense to treat it as binary data, not strings...

>  	return &ffe->fae;
>  }
> @@ -517,6 +529,9 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
>  	if (file_name)
>  		fanotify_info_copy_name(info, file_name);
>  
> +	/* Mix fsid+dfid+name+fid info into event merge key */
> +	fne->fae.info_hash = full_name_hash(fne->fskey, info->buf, fanotify_info_len(info));
> +

Similarly here...

>  	pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n",
>  		 __func__, id->i_ino, size, dir_fh_len, child_fh_len,
>  		 info->name_len, info->name_len, fanotify_info_name(info));
> @@ -539,6 +554,8 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
>  	struct mem_cgroup *old_memcg;
>  	struct inode *child = NULL;
>  	bool name_event = false;
> +	unsigned int hash = 0;
> +	struct pid *pid;
>  
>  	if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) {
>  		/*
> @@ -606,13 +623,17 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
>  	 * Use the victim inode instead of the watching inode as the id for
>  	 * event queue, so event reported on parent is merged with event
>  	 * reported on child when both directory and child watches exist.
> -	 * Reduce object id to 32bit hash for hashed queue merge.
> +	 * Reduce object id and event info to 32bit hash for hashed queue merge.
>  	 */
> -	fanotify_init_event(event, hash_ptr(id, 32), mask);
> +	hash = event->info_hash ^ hash_ptr(id, 32);
>  	if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
> -		event->pid = get_pid(task_pid(current));
> +		pid = get_pid(task_pid(current));
>  	else
> -		event->pid = get_pid(task_tgid(current));
> +		pid = get_pid(task_tgid(current));
> +	/* Mix pid info into event merge key */
> +	hash ^= hash_ptr(pid, 32);

hash_32() here?

> +	fanotify_init_event(event, hash, mask);
> +	event->pid = pid;
>  
>  out:
>  	set_active_memcg(old_memcg);
> diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> index 2e856372ffc8..522fb1a68b30 100644
> --- a/fs/notify/fanotify/fanotify.h
> +++ b/fs/notify/fanotify/fanotify.h
> @@ -115,6 +115,11 @@ static inline void fanotify_info_init(struct fanotify_info *info)
>  	info->name_len = 0;
>  }
>  
> +static inline unsigned int fanotify_info_len(struct fanotify_info *info)
> +{
> +	return info->dir_fh_totlen + info->file_fh_totlen + info->name_len;
> +}
> +
>  static inline void fanotify_info_copy_name(struct fanotify_info *info,
>  					   const struct qstr *name)
>  {
> @@ -138,7 +143,10 @@ enum fanotify_event_type {
>  };
>  
>  struct fanotify_event {
> -	struct fsnotify_event fse;
> +	union {
> +		struct fsnotify_event fse;
> +		unsigned int info_hash;
> +	};
>  	u32 mask;
>  	enum fanotify_event_type type;
>  	struct pid *pid;

How is this ever safe? info_hash will likely overlay with 'list' in
fsnotify_event.

> @@ -154,7 +162,10 @@ static inline void fanotify_init_event(struct fanotify_event *event,
>  
>  struct fanotify_fid_event {
>  	struct fanotify_event fae;
> -	__kernel_fsid_t fsid;
> +	union {
> +		__kernel_fsid_t fsid;
> +		void *fskey;	/* 64 or 32 bits of fsid used for salt */
> +	};
>  	struct fanotify_fh object_fh;
>  	/* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */
>  	unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN];
> @@ -168,7 +179,10 @@ FANOTIFY_FE(struct fanotify_event *event)
>  
>  struct fanotify_name_event {
>  	struct fanotify_event fae;
> -	__kernel_fsid_t fsid;
> +	union {
> +		__kernel_fsid_t fsid;
> +		void *fskey;	/* 64 or 32 bits of fsid used for salt */
> +	};
>  	struct fanotify_info info;
>  };

What games are you playing here with the unions? I presume you can remove
these 'fskey' unions and just use (void *)(event->fsid) at appropriate
places? IMO much more comprehensible...

								Honza
Amir Goldstein Feb. 17, 2021, 10:13 a.m. UTC | #2
On Tue, Feb 16, 2021 at 5:39 PM Jan Kara <jack@suse.cz> wrote:
>
> On Tue 02-02-21 18:20:09, Amir Goldstein wrote:
> > Improve the balance of hashed queue lists by mixing more event info
> > relevant for merge.
> >
> > For example, all FAN_CREATE name events in the same dir used to have the
> > same merge key based on the dir inode.  With this change the created
> > file name is mixed into the merge key.
> >
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > ---
> >  fs/notify/fanotify/fanotify.c | 33 +++++++++++++++++++++++++++------
> >  fs/notify/fanotify/fanotify.h | 20 +++++++++++++++++---
> >  2 files changed, 44 insertions(+), 9 deletions(-)
> >
> > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> > index 6d3807012851..b19fef1c6f64 100644
> > --- a/fs/notify/fanotify/fanotify.c
> > +++ b/fs/notify/fanotify/fanotify.c
> ...
> > @@ -476,8 +485,11 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id,
> >
> >       ffe->fae.type = FANOTIFY_EVENT_TYPE_FID;
> >       ffe->fsid = *fsid;
> > -     fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id),
> > -                        gfp);
> > +     fh = &ffe->object_fh;
> > +     fanotify_encode_fh(fh, id, fanotify_encode_fh_len(id), gfp);
> > +
> > +     /* Mix fsid+fid info into event merge key */
> > +     ffe->fae.info_hash = full_name_hash(ffe->fskey, fanotify_fh_buf(fh), fh->len);
>
> Is it really sensible to hash FID with full_name_hash()? It would make more
> sense to treat it as binary data, not strings...

See the actual implementation of full_name_hash() for 64bit.
it treats the string as an array of ulong, which is quite perfect for FID.

>
> >       return &ffe->fae;
> >  }
> > @@ -517,6 +529,9 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
> >       if (file_name)
> >               fanotify_info_copy_name(info, file_name);
> >
> > +     /* Mix fsid+dfid+name+fid info into event merge key */
> > +     fne->fae.info_hash = full_name_hash(fne->fskey, info->buf, fanotify_info_len(info));
> > +
>
> Similarly here...
>
> >       pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n",
> >                __func__, id->i_ino, size, dir_fh_len, child_fh_len,
> >                info->name_len, info->name_len, fanotify_info_name(info));
> > @@ -539,6 +554,8 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
> >       struct mem_cgroup *old_memcg;
> >       struct inode *child = NULL;
> >       bool name_event = false;
> > +     unsigned int hash = 0;
> > +     struct pid *pid;
> >
> >       if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) {
> >               /*
> > @@ -606,13 +623,17 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
> >        * Use the victim inode instead of the watching inode as the id for
> >        * event queue, so event reported on parent is merged with event
> >        * reported on child when both directory and child watches exist.
> > -      * Reduce object id to 32bit hash for hashed queue merge.
> > +      * Reduce object id and event info to 32bit hash for hashed queue merge.
> >        */
> > -     fanotify_init_event(event, hash_ptr(id, 32), mask);
> > +     hash = event->info_hash ^ hash_ptr(id, 32);
> >       if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
> > -             event->pid = get_pid(task_pid(current));
> > +             pid = get_pid(task_pid(current));
> >       else
> > -             event->pid = get_pid(task_tgid(current));
> > +             pid = get_pid(task_tgid(current));
> > +     /* Mix pid info into event merge key */
> > +     hash ^= hash_ptr(pid, 32);
>
> hash_32() here?

I don't think so.
hash_32() looses the high bits of ptr before mixing them.
hash_ptr(pid, 32) looses the *low* bits which contain less entropy
after mixing all 64bits of ptr.

>
> > +     fanotify_init_event(event, hash, mask);
> > +     event->pid = pid;
> >
> >  out:
> >       set_active_memcg(old_memcg);
> > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> > index 2e856372ffc8..522fb1a68b30 100644
> > --- a/fs/notify/fanotify/fanotify.h
> > +++ b/fs/notify/fanotify/fanotify.h
> > @@ -115,6 +115,11 @@ static inline void fanotify_info_init(struct fanotify_info *info)
> >       info->name_len = 0;
> >  }
> >
> > +static inline unsigned int fanotify_info_len(struct fanotify_info *info)
> > +{
> > +     return info->dir_fh_totlen + info->file_fh_totlen + info->name_len;
> > +}
> > +
> >  static inline void fanotify_info_copy_name(struct fanotify_info *info,
> >                                          const struct qstr *name)
> >  {
> > @@ -138,7 +143,10 @@ enum fanotify_event_type {
> >  };
> >
> >  struct fanotify_event {
> > -     struct fsnotify_event fse;
> > +     union {
> > +             struct fsnotify_event fse;
> > +             unsigned int info_hash;
> > +     };
> >       u32 mask;
> >       enum fanotify_event_type type;
> >       struct pid *pid;
>
> How is this ever safe? info_hash will likely overlay with 'list' in
> fsnotify_event.

Oh yeh. That's an ugly hack. Sorry for that.
I wanted to avoid adding an arg unsigned int *info_hash to all
fanotify_alloc_*_event() helpers, so I used this uninitialized space
in event instead.
I'll do it the proper way.

>
> > @@ -154,7 +162,10 @@ static inline void fanotify_init_event(struct fanotify_event *event,
> >
> >  struct fanotify_fid_event {
> >       struct fanotify_event fae;
> > -     __kernel_fsid_t fsid;
> > +     union {
> > +             __kernel_fsid_t fsid;
> > +             void *fskey;    /* 64 or 32 bits of fsid used for salt */
> > +     };
> >       struct fanotify_fh object_fh;
> >       /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */
> >       unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN];
> > @@ -168,7 +179,10 @@ FANOTIFY_FE(struct fanotify_event *event)
> >
> >  struct fanotify_name_event {
> >       struct fanotify_event fae;
> > -     __kernel_fsid_t fsid;
> > +     union {
> > +             __kernel_fsid_t fsid;
> > +             void *fskey;    /* 64 or 32 bits of fsid used for salt */
> > +     };
> >       struct fanotify_info info;
> >  };
>
> What games are you playing here with the unions? I presume you can remove
> these 'fskey' unions and just use (void *)(event->fsid) at appropriate
> places? IMO much more comprehensible...

Ok.

Thanks,
Amir.
Amir Goldstein Feb. 18, 2021, 10:46 a.m. UTC | #3
On Wed, Feb 17, 2021 at 12:13 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Tue, Feb 16, 2021 at 5:39 PM Jan Kara <jack@suse.cz> wrote:
> >
> > On Tue 02-02-21 18:20:09, Amir Goldstein wrote:
> > > Improve the balance of hashed queue lists by mixing more event info
> > > relevant for merge.
> > >
> > > For example, all FAN_CREATE name events in the same dir used to have the
> > > same merge key based on the dir inode.  With this change the created
> > > file name is mixed into the merge key.
> > >
> > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > ---
> > >  fs/notify/fanotify/fanotify.c | 33 +++++++++++++++++++++++++++------
> > >  fs/notify/fanotify/fanotify.h | 20 +++++++++++++++++---
> > >  2 files changed, 44 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> > > index 6d3807012851..b19fef1c6f64 100644
> > > --- a/fs/notify/fanotify/fanotify.c
> > > +++ b/fs/notify/fanotify/fanotify.c
> > ...
> > > @@ -476,8 +485,11 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id,
> > >
> > >       ffe->fae.type = FANOTIFY_EVENT_TYPE_FID;
> > >       ffe->fsid = *fsid;
> > > -     fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id),
> > > -                        gfp);
> > > +     fh = &ffe->object_fh;
> > > +     fanotify_encode_fh(fh, id, fanotify_encode_fh_len(id), gfp);
> > > +
> > > +     /* Mix fsid+fid info into event merge key */
> > > +     ffe->fae.info_hash = full_name_hash(ffe->fskey, fanotify_fh_buf(fh), fh->len);
> >
> > Is it really sensible to hash FID with full_name_hash()? It would make more
> > sense to treat it as binary data, not strings...
>
> See the actual implementation of full_name_hash() for 64bit.
> it treats the string as an array of ulong, which is quite perfect for FID.
>
> >
> > >       return &ffe->fae;
> > >  }
> > > @@ -517,6 +529,9 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
> > >       if (file_name)
> > >               fanotify_info_copy_name(info, file_name);
> > >
> > > +     /* Mix fsid+dfid+name+fid info into event merge key */
> > > +     fne->fae.info_hash = full_name_hash(fne->fskey, info->buf, fanotify_info_len(info));
> > > +
> >
> > Similarly here...
> >
> > >       pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n",
> > >                __func__, id->i_ino, size, dir_fh_len, child_fh_len,
> > >                info->name_len, info->name_len, fanotify_info_name(info));
> > > @@ -539,6 +554,8 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
> > >       struct mem_cgroup *old_memcg;
> > >       struct inode *child = NULL;
> > >       bool name_event = false;
> > > +     unsigned int hash = 0;
> > > +     struct pid *pid;
> > >
> > >       if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) {
> > >               /*
> > > @@ -606,13 +623,17 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
> > >        * Use the victim inode instead of the watching inode as the id for
> > >        * event queue, so event reported on parent is merged with event
> > >        * reported on child when both directory and child watches exist.
> > > -      * Reduce object id to 32bit hash for hashed queue merge.
> > > +      * Reduce object id and event info to 32bit hash for hashed queue merge.
> > >        */
> > > -     fanotify_init_event(event, hash_ptr(id, 32), mask);
> > > +     hash = event->info_hash ^ hash_ptr(id, 32);
> > >       if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
> > > -             event->pid = get_pid(task_pid(current));
> > > +             pid = get_pid(task_pid(current));
> > >       else
> > > -             event->pid = get_pid(task_tgid(current));
> > > +             pid = get_pid(task_tgid(current));
> > > +     /* Mix pid info into event merge key */
> > > +     hash ^= hash_ptr(pid, 32);
> >
> > hash_32() here?
>
> I don't think so.
> hash_32() looses the high bits of ptr before mixing them.
> hash_ptr(pid, 32) looses the *low* bits which contain less entropy
> after mixing all 64bits of ptr.
>
> >
> > > +     fanotify_init_event(event, hash, mask);
> > > +     event->pid = pid;
> > >
> > >  out:
> > >       set_active_memcg(old_memcg);
> > > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> > > index 2e856372ffc8..522fb1a68b30 100644
> > > --- a/fs/notify/fanotify/fanotify.h
> > > +++ b/fs/notify/fanotify/fanotify.h
> > > @@ -115,6 +115,11 @@ static inline void fanotify_info_init(struct fanotify_info *info)
> > >       info->name_len = 0;
> > >  }
> > >
> > > +static inline unsigned int fanotify_info_len(struct fanotify_info *info)
> > > +{
> > > +     return info->dir_fh_totlen + info->file_fh_totlen + info->name_len;
> > > +}
> > > +
> > >  static inline void fanotify_info_copy_name(struct fanotify_info *info,
> > >                                          const struct qstr *name)
> > >  {
> > > @@ -138,7 +143,10 @@ enum fanotify_event_type {
> > >  };
> > >
> > >  struct fanotify_event {
> > > -     struct fsnotify_event fse;
> > > +     union {
> > > +             struct fsnotify_event fse;
> > > +             unsigned int info_hash;
> > > +     };
> > >       u32 mask;
> > >       enum fanotify_event_type type;
> > >       struct pid *pid;
> >
> > How is this ever safe? info_hash will likely overlay with 'list' in
> > fsnotify_event.
>
> Oh yeh. That's an ugly hack. Sorry for that.
> I wanted to avoid adding an arg unsigned int *info_hash to all
> fanotify_alloc_*_event() helpers, so I used this uninitialized space
> in event instead.
> I'll do it the proper way.
>
> >
> > > @@ -154,7 +162,10 @@ static inline void fanotify_init_event(struct fanotify_event *event,
> > >
> > >  struct fanotify_fid_event {
> > >       struct fanotify_event fae;
> > > -     __kernel_fsid_t fsid;
> > > +     union {
> > > +             __kernel_fsid_t fsid;
> > > +             void *fskey;    /* 64 or 32 bits of fsid used for salt */
> > > +     };
> > >       struct fanotify_fh object_fh;
> > >       /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */
> > >       unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN];
> > > @@ -168,7 +179,10 @@ FANOTIFY_FE(struct fanotify_event *event)
> > >
> > >  struct fanotify_name_event {
> > >       struct fanotify_event fae;
> > > -     __kernel_fsid_t fsid;
> > > +     union {
> > > +             __kernel_fsid_t fsid;
> > > +             void *fskey;    /* 64 or 32 bits of fsid used for salt */
> > > +     };
> > >       struct fanotify_info info;
> > >  };
> >
> > What games are you playing here with the unions? I presume you can remove
> > these 'fskey' unions and just use (void *)(event->fsid) at appropriate
> > places? IMO much more comprehensible...
>

FYI, this is what the open coded conversion looks like:

(void *)*(long *)event->fsid.val

Not so comprehensible... but I used to open coded conversion anyway.

Thanks,
Amir.
Jan Kara Feb. 18, 2021, 11:11 a.m. UTC | #4
On Thu 18-02-21 12:46:48, Amir Goldstein wrote:
> On Wed, Feb 17, 2021 at 12:13 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > > > @@ -154,7 +162,10 @@ static inline void fanotify_init_event(struct fanotify_event *event,
> > > >
> > > >  struct fanotify_fid_event {
> > > >       struct fanotify_event fae;
> > > > -     __kernel_fsid_t fsid;
> > > > +     union {
> > > > +             __kernel_fsid_t fsid;
> > > > +             void *fskey;    /* 64 or 32 bits of fsid used for salt */
> > > > +     };
> > > >       struct fanotify_fh object_fh;
> > > >       /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */
> > > >       unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN];
> > > > @@ -168,7 +179,10 @@ FANOTIFY_FE(struct fanotify_event *event)
> > > >
> > > >  struct fanotify_name_event {
> > > >       struct fanotify_event fae;
> > > > -     __kernel_fsid_t fsid;
> > > > +     union {
> > > > +             __kernel_fsid_t fsid;
> > > > +             void *fskey;    /* 64 or 32 bits of fsid used for salt */
> > > > +     };
> > > >       struct fanotify_info info;
> > > >  };
> > >
> > > What games are you playing here with the unions? I presume you can remove
> > > these 'fskey' unions and just use (void *)(event->fsid) at appropriate
> > > places? IMO much more comprehensible...
> >
> 
> FYI, this is what the open coded conversion looks like:
> 
> (void *)*(long *)event->fsid.val

Not great but at least fairly localized. I'd just note that this doesn't quite
work on 32-bit archs (sizeof(long) != sizeof(__kernel_fsid_t) there). Maybe
we could just use

hash_32(event->fsid.val[0]) ^ hash_32(event->fsid.val[1])

for mixing into the 'key' value and thus avoid all these games?

								Honza
Amir Goldstein Feb. 18, 2021, 12:17 p.m. UTC | #5
On Thu, Feb 18, 2021 at 1:11 PM Jan Kara <jack@suse.cz> wrote:
>
> On Thu 18-02-21 12:46:48, Amir Goldstein wrote:
> > On Wed, Feb 17, 2021 at 12:13 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > > > > @@ -154,7 +162,10 @@ static inline void fanotify_init_event(struct fanotify_event *event,
> > > > >
> > > > >  struct fanotify_fid_event {
> > > > >       struct fanotify_event fae;
> > > > > -     __kernel_fsid_t fsid;
> > > > > +     union {
> > > > > +             __kernel_fsid_t fsid;
> > > > > +             void *fskey;    /* 64 or 32 bits of fsid used for salt */
> > > > > +     };
> > > > >       struct fanotify_fh object_fh;
> > > > >       /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */
> > > > >       unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN];
> > > > > @@ -168,7 +179,10 @@ FANOTIFY_FE(struct fanotify_event *event)
> > > > >
> > > > >  struct fanotify_name_event {
> > > > >       struct fanotify_event fae;
> > > > > -     __kernel_fsid_t fsid;
> > > > > +     union {
> > > > > +             __kernel_fsid_t fsid;
> > > > > +             void *fskey;    /* 64 or 32 bits of fsid used for salt */
> > > > > +     };
> > > > >       struct fanotify_info info;
> > > > >  };
> > > >
> > > > What games are you playing here with the unions? I presume you can remove
> > > > these 'fskey' unions and just use (void *)(event->fsid) at appropriate
> > > > places? IMO much more comprehensible...
> > >
> >
> > FYI, this is what the open coded conversion looks like:
> >
> > (void *)*(long *)event->fsid.val
>
> Not great but at least fairly localized. I'd just note that this doesn't quite
> work on 32-bit archs (sizeof(long) != sizeof(__kernel_fsid_t) there). Maybe
> we could just use
>
> hash_32(event->fsid.val[0]) ^ hash_32(event->fsid.val[1])
>
> for mixing into the 'key' value and thus avoid all these games?
>

Makes sense.

Thanks,
Amir.
diff mbox series

Patch

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 6d3807012851..b19fef1c6f64 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -14,6 +14,7 @@ 
 #include <linux/audit.h>
 #include <linux/sched/mm.h>
 #include <linux/statfs.h>
+#include <linux/stringhash.h>
 
 #include "fanotify.h"
 
@@ -443,6 +444,10 @@  static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
 	pevent->path = *path;
 	path_get(path);
 
+	/* Mix path info into event merge key */
+	pevent->fae.info_hash = hash_ptr(path->dentry, 32) ^
+				hash_ptr(path->mnt, 32);
+
 	return &pevent->fae;
 }
 
@@ -461,6 +466,9 @@  static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path,
 	pevent->path = *path;
 	path_get(path);
 
+	/* Permission events are not merged */
+	pevent->fae.info_hash = 0;
+
 	return &pevent->fae;
 }
 
@@ -469,6 +477,7 @@  static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id,
 						       gfp_t gfp)
 {
 	struct fanotify_fid_event *ffe;
+	struct fanotify_fh *fh;
 
 	ffe = kmem_cache_alloc(fanotify_fid_event_cachep, gfp);
 	if (!ffe)
@@ -476,8 +485,11 @@  static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id,
 
 	ffe->fae.type = FANOTIFY_EVENT_TYPE_FID;
 	ffe->fsid = *fsid;
-	fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id),
-			   gfp);
+	fh = &ffe->object_fh;
+	fanotify_encode_fh(fh, id, fanotify_encode_fh_len(id), gfp);
+
+	/* Mix fsid+fid info into event merge key */
+	ffe->fae.info_hash = full_name_hash(ffe->fskey, fanotify_fh_buf(fh), fh->len);
 
 	return &ffe->fae;
 }
@@ -517,6 +529,9 @@  static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
 	if (file_name)
 		fanotify_info_copy_name(info, file_name);
 
+	/* Mix fsid+dfid+name+fid info into event merge key */
+	fne->fae.info_hash = full_name_hash(fne->fskey, info->buf, fanotify_info_len(info));
+
 	pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n",
 		 __func__, id->i_ino, size, dir_fh_len, child_fh_len,
 		 info->name_len, info->name_len, fanotify_info_name(info));
@@ -539,6 +554,8 @@  static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
 	struct mem_cgroup *old_memcg;
 	struct inode *child = NULL;
 	bool name_event = false;
+	unsigned int hash = 0;
+	struct pid *pid;
 
 	if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) {
 		/*
@@ -606,13 +623,17 @@  static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
 	 * Use the victim inode instead of the watching inode as the id for
 	 * event queue, so event reported on parent is merged with event
 	 * reported on child when both directory and child watches exist.
-	 * Reduce object id to 32bit hash for hashed queue merge.
+	 * Reduce object id and event info to 32bit hash for hashed queue merge.
 	 */
-	fanotify_init_event(event, hash_ptr(id, 32), mask);
+	hash = event->info_hash ^ hash_ptr(id, 32);
 	if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
-		event->pid = get_pid(task_pid(current));
+		pid = get_pid(task_pid(current));
 	else
-		event->pid = get_pid(task_tgid(current));
+		pid = get_pid(task_tgid(current));
+	/* Mix pid info into event merge key */
+	hash ^= hash_ptr(pid, 32);
+	fanotify_init_event(event, hash, mask);
+	event->pid = pid;
 
 out:
 	set_active_memcg(old_memcg);
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 2e856372ffc8..522fb1a68b30 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -115,6 +115,11 @@  static inline void fanotify_info_init(struct fanotify_info *info)
 	info->name_len = 0;
 }
 
+static inline unsigned int fanotify_info_len(struct fanotify_info *info)
+{
+	return info->dir_fh_totlen + info->file_fh_totlen + info->name_len;
+}
+
 static inline void fanotify_info_copy_name(struct fanotify_info *info,
 					   const struct qstr *name)
 {
@@ -138,7 +143,10 @@  enum fanotify_event_type {
 };
 
 struct fanotify_event {
-	struct fsnotify_event fse;
+	union {
+		struct fsnotify_event fse;
+		unsigned int info_hash;
+	};
 	u32 mask;
 	enum fanotify_event_type type;
 	struct pid *pid;
@@ -154,7 +162,10 @@  static inline void fanotify_init_event(struct fanotify_event *event,
 
 struct fanotify_fid_event {
 	struct fanotify_event fae;
-	__kernel_fsid_t fsid;
+	union {
+		__kernel_fsid_t fsid;
+		void *fskey;	/* 64 or 32 bits of fsid used for salt */
+	};
 	struct fanotify_fh object_fh;
 	/* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */
 	unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN];
@@ -168,7 +179,10 @@  FANOTIFY_FE(struct fanotify_event *event)
 
 struct fanotify_name_event {
 	struct fanotify_event fae;
-	__kernel_fsid_t fsid;
+	union {
+		__kernel_fsid_t fsid;
+		void *fskey;	/* 64 or 32 bits of fsid used for salt */
+	};
 	struct fanotify_info info;
 };