Message ID | 20210202162010.305971-7-amir73il@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Performance improvement for fanotify merge | expand |
On Tue 02-02-21 18:20:09, Amir Goldstein wrote: > Improve the balance of hashed queue lists by mixing more event info > relevant for merge. > > For example, all FAN_CREATE name events in the same dir used to have the > same merge key based on the dir inode. With this change the created > file name is mixed into the merge key. > > Signed-off-by: Amir Goldstein <amir73il@gmail.com> > --- > fs/notify/fanotify/fanotify.c | 33 +++++++++++++++++++++++++++------ > fs/notify/fanotify/fanotify.h | 20 +++++++++++++++++--- > 2 files changed, 44 insertions(+), 9 deletions(-) > > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c > index 6d3807012851..b19fef1c6f64 100644 > --- a/fs/notify/fanotify/fanotify.c > +++ b/fs/notify/fanotify/fanotify.c ... > @@ -476,8 +485,11 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, > > ffe->fae.type = FANOTIFY_EVENT_TYPE_FID; > ffe->fsid = *fsid; > - fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id), > - gfp); > + fh = &ffe->object_fh; > + fanotify_encode_fh(fh, id, fanotify_encode_fh_len(id), gfp); > + > + /* Mix fsid+fid info into event merge key */ > + ffe->fae.info_hash = full_name_hash(ffe->fskey, fanotify_fh_buf(fh), fh->len); Is it really sensible to hash FID with full_name_hash()? It would make more sense to treat it as binary data, not strings... > return &ffe->fae; > } > @@ -517,6 +529,9 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id, > if (file_name) > fanotify_info_copy_name(info, file_name); > > + /* Mix fsid+dfid+name+fid info into event merge key */ > + fne->fae.info_hash = full_name_hash(fne->fskey, info->buf, fanotify_info_len(info)); > + Similarly here... > pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n", > __func__, id->i_ino, size, dir_fh_len, child_fh_len, > info->name_len, info->name_len, fanotify_info_name(info)); > @@ -539,6 +554,8 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, > struct mem_cgroup *old_memcg; > struct inode *child = NULL; > bool name_event = false; > + unsigned int hash = 0; > + struct pid *pid; > > if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { > /* > @@ -606,13 +623,17 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, > * Use the victim inode instead of the watching inode as the id for > * event queue, so event reported on parent is merged with event > * reported on child when both directory and child watches exist. > - * Reduce object id to 32bit hash for hashed queue merge. > + * Reduce object id and event info to 32bit hash for hashed queue merge. > */ > - fanotify_init_event(event, hash_ptr(id, 32), mask); > + hash = event->info_hash ^ hash_ptr(id, 32); > if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) > - event->pid = get_pid(task_pid(current)); > + pid = get_pid(task_pid(current)); > else > - event->pid = get_pid(task_tgid(current)); > + pid = get_pid(task_tgid(current)); > + /* Mix pid info into event merge key */ > + hash ^= hash_ptr(pid, 32); hash_32() here? > + fanotify_init_event(event, hash, mask); > + event->pid = pid; > > out: > set_active_memcg(old_memcg); > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h > index 2e856372ffc8..522fb1a68b30 100644 > --- a/fs/notify/fanotify/fanotify.h > +++ b/fs/notify/fanotify/fanotify.h > @@ -115,6 +115,11 @@ static inline void fanotify_info_init(struct fanotify_info *info) > info->name_len = 0; > } > > +static inline unsigned int fanotify_info_len(struct fanotify_info *info) > +{ > + return info->dir_fh_totlen + info->file_fh_totlen + info->name_len; > +} > + > static inline void fanotify_info_copy_name(struct fanotify_info *info, > const struct qstr *name) > { > @@ -138,7 +143,10 @@ enum fanotify_event_type { > }; > > struct fanotify_event { > - struct fsnotify_event fse; > + union { > + struct fsnotify_event fse; > + unsigned int info_hash; > + }; > u32 mask; > enum fanotify_event_type type; > struct pid *pid; How is this ever safe? info_hash will likely overlay with 'list' in fsnotify_event. > @@ -154,7 +162,10 @@ static inline void fanotify_init_event(struct fanotify_event *event, > > struct fanotify_fid_event { > struct fanotify_event fae; > - __kernel_fsid_t fsid; > + union { > + __kernel_fsid_t fsid; > + void *fskey; /* 64 or 32 bits of fsid used for salt */ > + }; > struct fanotify_fh object_fh; > /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */ > unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN]; > @@ -168,7 +179,10 @@ FANOTIFY_FE(struct fanotify_event *event) > > struct fanotify_name_event { > struct fanotify_event fae; > - __kernel_fsid_t fsid; > + union { > + __kernel_fsid_t fsid; > + void *fskey; /* 64 or 32 bits of fsid used for salt */ > + }; > struct fanotify_info info; > }; What games are you playing here with the unions? I presume you can remove these 'fskey' unions and just use (void *)(event->fsid) at appropriate places? IMO much more comprehensible... Honza
On Tue, Feb 16, 2021 at 5:39 PM Jan Kara <jack@suse.cz> wrote: > > On Tue 02-02-21 18:20:09, Amir Goldstein wrote: > > Improve the balance of hashed queue lists by mixing more event info > > relevant for merge. > > > > For example, all FAN_CREATE name events in the same dir used to have the > > same merge key based on the dir inode. With this change the created > > file name is mixed into the merge key. > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com> > > --- > > fs/notify/fanotify/fanotify.c | 33 +++++++++++++++++++++++++++------ > > fs/notify/fanotify/fanotify.h | 20 +++++++++++++++++--- > > 2 files changed, 44 insertions(+), 9 deletions(-) > > > > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c > > index 6d3807012851..b19fef1c6f64 100644 > > --- a/fs/notify/fanotify/fanotify.c > > +++ b/fs/notify/fanotify/fanotify.c > ... > > @@ -476,8 +485,11 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, > > > > ffe->fae.type = FANOTIFY_EVENT_TYPE_FID; > > ffe->fsid = *fsid; > > - fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id), > > - gfp); > > + fh = &ffe->object_fh; > > + fanotify_encode_fh(fh, id, fanotify_encode_fh_len(id), gfp); > > + > > + /* Mix fsid+fid info into event merge key */ > > + ffe->fae.info_hash = full_name_hash(ffe->fskey, fanotify_fh_buf(fh), fh->len); > > Is it really sensible to hash FID with full_name_hash()? It would make more > sense to treat it as binary data, not strings... See the actual implementation of full_name_hash() for 64bit. it treats the string as an array of ulong, which is quite perfect for FID. > > > return &ffe->fae; > > } > > @@ -517,6 +529,9 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id, > > if (file_name) > > fanotify_info_copy_name(info, file_name); > > > > + /* Mix fsid+dfid+name+fid info into event merge key */ > > + fne->fae.info_hash = full_name_hash(fne->fskey, info->buf, fanotify_info_len(info)); > > + > > Similarly here... > > > pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n", > > __func__, id->i_ino, size, dir_fh_len, child_fh_len, > > info->name_len, info->name_len, fanotify_info_name(info)); > > @@ -539,6 +554,8 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, > > struct mem_cgroup *old_memcg; > > struct inode *child = NULL; > > bool name_event = false; > > + unsigned int hash = 0; > > + struct pid *pid; > > > > if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { > > /* > > @@ -606,13 +623,17 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, > > * Use the victim inode instead of the watching inode as the id for > > * event queue, so event reported on parent is merged with event > > * reported on child when both directory and child watches exist. > > - * Reduce object id to 32bit hash for hashed queue merge. > > + * Reduce object id and event info to 32bit hash for hashed queue merge. > > */ > > - fanotify_init_event(event, hash_ptr(id, 32), mask); > > + hash = event->info_hash ^ hash_ptr(id, 32); > > if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) > > - event->pid = get_pid(task_pid(current)); > > + pid = get_pid(task_pid(current)); > > else > > - event->pid = get_pid(task_tgid(current)); > > + pid = get_pid(task_tgid(current)); > > + /* Mix pid info into event merge key */ > > + hash ^= hash_ptr(pid, 32); > > hash_32() here? I don't think so. hash_32() looses the high bits of ptr before mixing them. hash_ptr(pid, 32) looses the *low* bits which contain less entropy after mixing all 64bits of ptr. > > > + fanotify_init_event(event, hash, mask); > > + event->pid = pid; > > > > out: > > set_active_memcg(old_memcg); > > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h > > index 2e856372ffc8..522fb1a68b30 100644 > > --- a/fs/notify/fanotify/fanotify.h > > +++ b/fs/notify/fanotify/fanotify.h > > @@ -115,6 +115,11 @@ static inline void fanotify_info_init(struct fanotify_info *info) > > info->name_len = 0; > > } > > > > +static inline unsigned int fanotify_info_len(struct fanotify_info *info) > > +{ > > + return info->dir_fh_totlen + info->file_fh_totlen + info->name_len; > > +} > > + > > static inline void fanotify_info_copy_name(struct fanotify_info *info, > > const struct qstr *name) > > { > > @@ -138,7 +143,10 @@ enum fanotify_event_type { > > }; > > > > struct fanotify_event { > > - struct fsnotify_event fse; > > + union { > > + struct fsnotify_event fse; > > + unsigned int info_hash; > > + }; > > u32 mask; > > enum fanotify_event_type type; > > struct pid *pid; > > How is this ever safe? info_hash will likely overlay with 'list' in > fsnotify_event. Oh yeh. That's an ugly hack. Sorry for that. I wanted to avoid adding an arg unsigned int *info_hash to all fanotify_alloc_*_event() helpers, so I used this uninitialized space in event instead. I'll do it the proper way. > > > @@ -154,7 +162,10 @@ static inline void fanotify_init_event(struct fanotify_event *event, > > > > struct fanotify_fid_event { > > struct fanotify_event fae; > > - __kernel_fsid_t fsid; > > + union { > > + __kernel_fsid_t fsid; > > + void *fskey; /* 64 or 32 bits of fsid used for salt */ > > + }; > > struct fanotify_fh object_fh; > > /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */ > > unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN]; > > @@ -168,7 +179,10 @@ FANOTIFY_FE(struct fanotify_event *event) > > > > struct fanotify_name_event { > > struct fanotify_event fae; > > - __kernel_fsid_t fsid; > > + union { > > + __kernel_fsid_t fsid; > > + void *fskey; /* 64 or 32 bits of fsid used for salt */ > > + }; > > struct fanotify_info info; > > }; > > What games are you playing here with the unions? I presume you can remove > these 'fskey' unions and just use (void *)(event->fsid) at appropriate > places? IMO much more comprehensible... Ok. Thanks, Amir.
On Wed, Feb 17, 2021 at 12:13 PM Amir Goldstein <amir73il@gmail.com> wrote: > > On Tue, Feb 16, 2021 at 5:39 PM Jan Kara <jack@suse.cz> wrote: > > > > On Tue 02-02-21 18:20:09, Amir Goldstein wrote: > > > Improve the balance of hashed queue lists by mixing more event info > > > relevant for merge. > > > > > > For example, all FAN_CREATE name events in the same dir used to have the > > > same merge key based on the dir inode. With this change the created > > > file name is mixed into the merge key. > > > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com> > > > --- > > > fs/notify/fanotify/fanotify.c | 33 +++++++++++++++++++++++++++------ > > > fs/notify/fanotify/fanotify.h | 20 +++++++++++++++++--- > > > 2 files changed, 44 insertions(+), 9 deletions(-) > > > > > > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c > > > index 6d3807012851..b19fef1c6f64 100644 > > > --- a/fs/notify/fanotify/fanotify.c > > > +++ b/fs/notify/fanotify/fanotify.c > > ... > > > @@ -476,8 +485,11 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, > > > > > > ffe->fae.type = FANOTIFY_EVENT_TYPE_FID; > > > ffe->fsid = *fsid; > > > - fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id), > > > - gfp); > > > + fh = &ffe->object_fh; > > > + fanotify_encode_fh(fh, id, fanotify_encode_fh_len(id), gfp); > > > + > > > + /* Mix fsid+fid info into event merge key */ > > > + ffe->fae.info_hash = full_name_hash(ffe->fskey, fanotify_fh_buf(fh), fh->len); > > > > Is it really sensible to hash FID with full_name_hash()? It would make more > > sense to treat it as binary data, not strings... > > See the actual implementation of full_name_hash() for 64bit. > it treats the string as an array of ulong, which is quite perfect for FID. > > > > > > return &ffe->fae; > > > } > > > @@ -517,6 +529,9 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id, > > > if (file_name) > > > fanotify_info_copy_name(info, file_name); > > > > > > + /* Mix fsid+dfid+name+fid info into event merge key */ > > > + fne->fae.info_hash = full_name_hash(fne->fskey, info->buf, fanotify_info_len(info)); > > > + > > > > Similarly here... > > > > > pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n", > > > __func__, id->i_ino, size, dir_fh_len, child_fh_len, > > > info->name_len, info->name_len, fanotify_info_name(info)); > > > @@ -539,6 +554,8 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, > > > struct mem_cgroup *old_memcg; > > > struct inode *child = NULL; > > > bool name_event = false; > > > + unsigned int hash = 0; > > > + struct pid *pid; > > > > > > if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { > > > /* > > > @@ -606,13 +623,17 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, > > > * Use the victim inode instead of the watching inode as the id for > > > * event queue, so event reported on parent is merged with event > > > * reported on child when both directory and child watches exist. > > > - * Reduce object id to 32bit hash for hashed queue merge. > > > + * Reduce object id and event info to 32bit hash for hashed queue merge. > > > */ > > > - fanotify_init_event(event, hash_ptr(id, 32), mask); > > > + hash = event->info_hash ^ hash_ptr(id, 32); > > > if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) > > > - event->pid = get_pid(task_pid(current)); > > > + pid = get_pid(task_pid(current)); > > > else > > > - event->pid = get_pid(task_tgid(current)); > > > + pid = get_pid(task_tgid(current)); > > > + /* Mix pid info into event merge key */ > > > + hash ^= hash_ptr(pid, 32); > > > > hash_32() here? > > I don't think so. > hash_32() looses the high bits of ptr before mixing them. > hash_ptr(pid, 32) looses the *low* bits which contain less entropy > after mixing all 64bits of ptr. > > > > > > + fanotify_init_event(event, hash, mask); > > > + event->pid = pid; > > > > > > out: > > > set_active_memcg(old_memcg); > > > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h > > > index 2e856372ffc8..522fb1a68b30 100644 > > > --- a/fs/notify/fanotify/fanotify.h > > > +++ b/fs/notify/fanotify/fanotify.h > > > @@ -115,6 +115,11 @@ static inline void fanotify_info_init(struct fanotify_info *info) > > > info->name_len = 0; > > > } > > > > > > +static inline unsigned int fanotify_info_len(struct fanotify_info *info) > > > +{ > > > + return info->dir_fh_totlen + info->file_fh_totlen + info->name_len; > > > +} > > > + > > > static inline void fanotify_info_copy_name(struct fanotify_info *info, > > > const struct qstr *name) > > > { > > > @@ -138,7 +143,10 @@ enum fanotify_event_type { > > > }; > > > > > > struct fanotify_event { > > > - struct fsnotify_event fse; > > > + union { > > > + struct fsnotify_event fse; > > > + unsigned int info_hash; > > > + }; > > > u32 mask; > > > enum fanotify_event_type type; > > > struct pid *pid; > > > > How is this ever safe? info_hash will likely overlay with 'list' in > > fsnotify_event. > > Oh yeh. That's an ugly hack. Sorry for that. > I wanted to avoid adding an arg unsigned int *info_hash to all > fanotify_alloc_*_event() helpers, so I used this uninitialized space > in event instead. > I'll do it the proper way. > > > > > > @@ -154,7 +162,10 @@ static inline void fanotify_init_event(struct fanotify_event *event, > > > > > > struct fanotify_fid_event { > > > struct fanotify_event fae; > > > - __kernel_fsid_t fsid; > > > + union { > > > + __kernel_fsid_t fsid; > > > + void *fskey; /* 64 or 32 bits of fsid used for salt */ > > > + }; > > > struct fanotify_fh object_fh; > > > /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */ > > > unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN]; > > > @@ -168,7 +179,10 @@ FANOTIFY_FE(struct fanotify_event *event) > > > > > > struct fanotify_name_event { > > > struct fanotify_event fae; > > > - __kernel_fsid_t fsid; > > > + union { > > > + __kernel_fsid_t fsid; > > > + void *fskey; /* 64 or 32 bits of fsid used for salt */ > > > + }; > > > struct fanotify_info info; > > > }; > > > > What games are you playing here with the unions? I presume you can remove > > these 'fskey' unions and just use (void *)(event->fsid) at appropriate > > places? IMO much more comprehensible... > FYI, this is what the open coded conversion looks like: (void *)*(long *)event->fsid.val Not so comprehensible... but I used to open coded conversion anyway. Thanks, Amir.
On Thu 18-02-21 12:46:48, Amir Goldstein wrote: > On Wed, Feb 17, 2021 at 12:13 PM Amir Goldstein <amir73il@gmail.com> wrote: > > > > @@ -154,7 +162,10 @@ static inline void fanotify_init_event(struct fanotify_event *event, > > > > > > > > struct fanotify_fid_event { > > > > struct fanotify_event fae; > > > > - __kernel_fsid_t fsid; > > > > + union { > > > > + __kernel_fsid_t fsid; > > > > + void *fskey; /* 64 or 32 bits of fsid used for salt */ > > > > + }; > > > > struct fanotify_fh object_fh; > > > > /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */ > > > > unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN]; > > > > @@ -168,7 +179,10 @@ FANOTIFY_FE(struct fanotify_event *event) > > > > > > > > struct fanotify_name_event { > > > > struct fanotify_event fae; > > > > - __kernel_fsid_t fsid; > > > > + union { > > > > + __kernel_fsid_t fsid; > > > > + void *fskey; /* 64 or 32 bits of fsid used for salt */ > > > > + }; > > > > struct fanotify_info info; > > > > }; > > > > > > What games are you playing here with the unions? I presume you can remove > > > these 'fskey' unions and just use (void *)(event->fsid) at appropriate > > > places? IMO much more comprehensible... > > > > FYI, this is what the open coded conversion looks like: > > (void *)*(long *)event->fsid.val Not great but at least fairly localized. I'd just note that this doesn't quite work on 32-bit archs (sizeof(long) != sizeof(__kernel_fsid_t) there). Maybe we could just use hash_32(event->fsid.val[0]) ^ hash_32(event->fsid.val[1]) for mixing into the 'key' value and thus avoid all these games? Honza
On Thu, Feb 18, 2021 at 1:11 PM Jan Kara <jack@suse.cz> wrote: > > On Thu 18-02-21 12:46:48, Amir Goldstein wrote: > > On Wed, Feb 17, 2021 at 12:13 PM Amir Goldstein <amir73il@gmail.com> wrote: > > > > > @@ -154,7 +162,10 @@ static inline void fanotify_init_event(struct fanotify_event *event, > > > > > > > > > > struct fanotify_fid_event { > > > > > struct fanotify_event fae; > > > > > - __kernel_fsid_t fsid; > > > > > + union { > > > > > + __kernel_fsid_t fsid; > > > > > + void *fskey; /* 64 or 32 bits of fsid used for salt */ > > > > > + }; > > > > > struct fanotify_fh object_fh; > > > > > /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */ > > > > > unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN]; > > > > > @@ -168,7 +179,10 @@ FANOTIFY_FE(struct fanotify_event *event) > > > > > > > > > > struct fanotify_name_event { > > > > > struct fanotify_event fae; > > > > > - __kernel_fsid_t fsid; > > > > > + union { > > > > > + __kernel_fsid_t fsid; > > > > > + void *fskey; /* 64 or 32 bits of fsid used for salt */ > > > > > + }; > > > > > struct fanotify_info info; > > > > > }; > > > > > > > > What games are you playing here with the unions? I presume you can remove > > > > these 'fskey' unions and just use (void *)(event->fsid) at appropriate > > > > places? IMO much more comprehensible... > > > > > > > FYI, this is what the open coded conversion looks like: > > > > (void *)*(long *)event->fsid.val > > Not great but at least fairly localized. I'd just note that this doesn't quite > work on 32-bit archs (sizeof(long) != sizeof(__kernel_fsid_t) there). Maybe > we could just use > > hash_32(event->fsid.val[0]) ^ hash_32(event->fsid.val[1]) > > for mixing into the 'key' value and thus avoid all these games? > Makes sense. Thanks, Amir.
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 6d3807012851..b19fef1c6f64 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -14,6 +14,7 @@ #include <linux/audit.h> #include <linux/sched/mm.h> #include <linux/statfs.h> +#include <linux/stringhash.h> #include "fanotify.h" @@ -443,6 +444,10 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, pevent->path = *path; path_get(path); + /* Mix path info into event merge key */ + pevent->fae.info_hash = hash_ptr(path->dentry, 32) ^ + hash_ptr(path->mnt, 32); + return &pevent->fae; } @@ -461,6 +466,9 @@ static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path, pevent->path = *path; path_get(path); + /* Permission events are not merged */ + pevent->fae.info_hash = 0; + return &pevent->fae; } @@ -469,6 +477,7 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, gfp_t gfp) { struct fanotify_fid_event *ffe; + struct fanotify_fh *fh; ffe = kmem_cache_alloc(fanotify_fid_event_cachep, gfp); if (!ffe) @@ -476,8 +485,11 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, ffe->fae.type = FANOTIFY_EVENT_TYPE_FID; ffe->fsid = *fsid; - fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id), - gfp); + fh = &ffe->object_fh; + fanotify_encode_fh(fh, id, fanotify_encode_fh_len(id), gfp); + + /* Mix fsid+fid info into event merge key */ + ffe->fae.info_hash = full_name_hash(ffe->fskey, fanotify_fh_buf(fh), fh->len); return &ffe->fae; } @@ -517,6 +529,9 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id, if (file_name) fanotify_info_copy_name(info, file_name); + /* Mix fsid+dfid+name+fid info into event merge key */ + fne->fae.info_hash = full_name_hash(fne->fskey, info->buf, fanotify_info_len(info)); + pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n", __func__, id->i_ino, size, dir_fh_len, child_fh_len, info->name_len, info->name_len, fanotify_info_name(info)); @@ -539,6 +554,8 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, struct mem_cgroup *old_memcg; struct inode *child = NULL; bool name_event = false; + unsigned int hash = 0; + struct pid *pid; if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { /* @@ -606,13 +623,17 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, * Use the victim inode instead of the watching inode as the id for * event queue, so event reported on parent is merged with event * reported on child when both directory and child watches exist. - * Reduce object id to 32bit hash for hashed queue merge. + * Reduce object id and event info to 32bit hash for hashed queue merge. */ - fanotify_init_event(event, hash_ptr(id, 32), mask); + hash = event->info_hash ^ hash_ptr(id, 32); if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) - event->pid = get_pid(task_pid(current)); + pid = get_pid(task_pid(current)); else - event->pid = get_pid(task_tgid(current)); + pid = get_pid(task_tgid(current)); + /* Mix pid info into event merge key */ + hash ^= hash_ptr(pid, 32); + fanotify_init_event(event, hash, mask); + event->pid = pid; out: set_active_memcg(old_memcg); diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 2e856372ffc8..522fb1a68b30 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -115,6 +115,11 @@ static inline void fanotify_info_init(struct fanotify_info *info) info->name_len = 0; } +static inline unsigned int fanotify_info_len(struct fanotify_info *info) +{ + return info->dir_fh_totlen + info->file_fh_totlen + info->name_len; +} + static inline void fanotify_info_copy_name(struct fanotify_info *info, const struct qstr *name) { @@ -138,7 +143,10 @@ enum fanotify_event_type { }; struct fanotify_event { - struct fsnotify_event fse; + union { + struct fsnotify_event fse; + unsigned int info_hash; + }; u32 mask; enum fanotify_event_type type; struct pid *pid; @@ -154,7 +162,10 @@ static inline void fanotify_init_event(struct fanotify_event *event, struct fanotify_fid_event { struct fanotify_event fae; - __kernel_fsid_t fsid; + union { + __kernel_fsid_t fsid; + void *fskey; /* 64 or 32 bits of fsid used for salt */ + }; struct fanotify_fh object_fh; /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */ unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN]; @@ -168,7 +179,10 @@ FANOTIFY_FE(struct fanotify_event *event) struct fanotify_name_event { struct fanotify_event fae; - __kernel_fsid_t fsid; + union { + __kernel_fsid_t fsid; + void *fskey; /* 64 or 32 bits of fsid used for salt */ + }; struct fanotify_info info; };
Improve the balance of hashed queue lists by mixing more event info relevant for merge. For example, all FAN_CREATE name events in the same dir used to have the same merge key based on the dir inode. With this change the created file name is mixed into the merge key. Signed-off-by: Amir Goldstein <amir73il@gmail.com> --- fs/notify/fanotify/fanotify.c | 33 +++++++++++++++++++++++++++------ fs/notify/fanotify/fanotify.h | 20 +++++++++++++++++--- 2 files changed, 44 insertions(+), 9 deletions(-)