diff mbox series

[6/6] fanotify: Use interruptible wait when waiting for permission events

Message ID 20190213145443.26836-7-jack@suse.cz (mailing list archive)
State New, archived
Headers show
Series fanotify: Make wait for permission event response interruptible | expand

Commit Message

Jan Kara Feb. 13, 2019, 2:54 p.m. UTC
When waiting for response to fanotify permission events, we currently
use uninterruptible waits. That makes code simple however it can cause
lots of processes to end up in uninterruptible sleep with hard reboot
being the only alternative in case fanotify listener process stops
responding (e.g. due to a bug in its implementation). Uninterruptible
sleep also makes system hibernation fail if the listener gets frozen
before the process generating fanotify permission event.

Fix these problems by using interruptible sleep for waiting for response
to fanotify event. This is slightly tricky though - we have to
detect when the event got already reported to userspace as in that
case we must not free the event. Instead we push the responsibility for
freeing the event to the process that will write response to the
event.

Reported-by: Orion Poplawski <orion@nwra.com>
Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c | 35 ++++++++++++++++++++++++++++++++---
 fs/notify/fanotify/fanotify.h |  3 ++-
 2 files changed, 34 insertions(+), 4 deletions(-)

Comments

Amir Goldstein Feb. 13, 2019, 9:02 p.m. UTC | #1
On Wed, Feb 13, 2019 at 4:54 PM Jan Kara <jack@suse.cz> wrote:
>
> When waiting for response to fanotify permission events, we currently
> use uninterruptible waits. That makes code simple however it can cause
> lots of processes to end up in uninterruptible sleep with hard reboot
> being the only alternative in case fanotify listener process stops
> responding (e.g. due to a bug in its implementation). Uninterruptible
> sleep also makes system hibernation fail if the listener gets frozen
> before the process generating fanotify permission event.
>
> Fix these problems by using interruptible sleep for waiting for response
> to fanotify event. This is slightly tricky though - we have to
> detect when the event got already reported to userspace as in that
> case we must not free the event. Instead we push the responsibility for
> freeing the event to the process that will write response to the
> event.
>
> Reported-by: Orion Poplawski <orion@nwra.com>
> Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>  fs/notify/fanotify/fanotify.c | 35 ++++++++++++++++++++++++++++++++---
>  fs/notify/fanotify/fanotify.h |  3 ++-
>  2 files changed, 34 insertions(+), 4 deletions(-)
>
> diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> index e725831be161..2e40d5d8660b 100644
> --- a/fs/notify/fanotify/fanotify.c
> +++ b/fs/notify/fanotify/fanotify.c
> @@ -57,6 +57,13 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
>         return 0;
>  }
>
> +/*
> + * Wait for response to permission event. The function also takes care of
> + * freeing the permission event (or offloads that in case the wait is canceled
> + * by a signal). The function returns 0 in case access got allowed by userspace,
> + * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case
> + * the wait got interrupted by a signal.
> + */
>  static int fanotify_get_response(struct fsnotify_group *group,
>                                  struct fanotify_perm_event_info *event,
>                                  struct fsnotify_iter_info *iter_info)
> @@ -65,8 +72,29 @@ static int fanotify_get_response(struct fsnotify_group *group,
>
>         pr_debug("%s: group=%p event=%p\n", __func__, group, event);
>
> -       wait_event(group->fanotify_data.access_waitq,
> -                  event->state == FAN_EVENT_ANSWERED);
> +       ret = wait_event_interruptible(group->fanotify_data.access_waitq,
> +                                      event->state == FAN_EVENT_ANSWERED);
> +       /* Signal pending? */
> +       if (ret < 0) {
> +               spin_lock(&group->notification_lock);
> +               /* Event reported to userspace and no answer yet? */
> +               if (event->state == FAN_EVENT_REPORTED) {
> +                       /* Event will get freed once userspace answers to it */

Did you forget to commit the
if (event->state == FAN_EVENT_CANCELED)
code?

> +                       event->state = FAN_EVENT_CANCELED;
> +                       spin_unlock(&group->notification_lock);
> +                       return ret;
> +               }
> +               /* Event not yet reported? Just remove it. */
> +               if (event->state == FAN_EVENT_INIT)
> +                       fsnotify_remove_queued_event(group, &event->fae.fse);
> +               /*
> +                * Event may be also answered in case signal delivery raced
> +                * with wakeup. In that case we have nothing to do besides
> +                * freeing the event and reporting error.
> +                */
> +               spin_unlock(&group->notification_lock);
> +               goto out;
> +       }
>
>         /* userspace responded, convert to something usable */
>         switch (event->response & ~FAN_AUDIT) {
> @@ -84,6 +112,8 @@ static int fanotify_get_response(struct fsnotify_group *group,
>
>         pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
>                  group, event, ret);
> +out:
> +       fsnotify_destroy_event(group, &event->fae.fse);
>
>         return ret;
>  }
> @@ -255,7 +285,6 @@ static int fanotify_handle_event(struct fsnotify_group *group,
>         } else if (fanotify_is_perm_event(mask)) {
>                 ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event),
>                                             iter_info);
> -               fsnotify_destroy_event(group, fsn_event);
>         }
>  finish:
>         if (fanotify_is_perm_event(mask))
> diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> index 98d58939777c..bbdd2adfbf0f 100644
> --- a/fs/notify/fanotify/fanotify.h
> +++ b/fs/notify/fanotify/fanotify.h
> @@ -26,7 +26,8 @@ struct fanotify_event_info {
>  enum {
>         FAN_EVENT_INIT,
>         FAN_EVENT_REPORTED,
> -       FAN_EVENT_ANSWERED
> +       FAN_EVENT_ANSWERED,
> +       FAN_EVENT_CANCELED,
>  };
>
>  /*
> --
> 2.16.4
>
Jan Kara Feb. 14, 2019, 6:01 p.m. UTC | #2
On Wed 13-02-19 23:02:17, Amir Goldstein wrote:
> On Wed, Feb 13, 2019 at 4:54 PM Jan Kara <jack@suse.cz> wrote:
> >
> > When waiting for response to fanotify permission events, we currently
> > use uninterruptible waits. That makes code simple however it can cause
> > lots of processes to end up in uninterruptible sleep with hard reboot
> > being the only alternative in case fanotify listener process stops
> > responding (e.g. due to a bug in its implementation). Uninterruptible
> > sleep also makes system hibernation fail if the listener gets frozen
> > before the process generating fanotify permission event.
> >
> > Fix these problems by using interruptible sleep for waiting for response
> > to fanotify event. This is slightly tricky though - we have to
> > detect when the event got already reported to userspace as in that
> > case we must not free the event. Instead we push the responsibility for
> > freeing the event to the process that will write response to the
> > event.
> >
> > Reported-by: Orion Poplawski <orion@nwra.com>
> > Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> > Signed-off-by: Jan Kara <jack@suse.cz>
> > ---
> >  fs/notify/fanotify/fanotify.c | 35 ++++++++++++++++++++++++++++++++---
> >  fs/notify/fanotify/fanotify.h |  3 ++-
> >  2 files changed, 34 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> > index e725831be161..2e40d5d8660b 100644
> > --- a/fs/notify/fanotify/fanotify.c
> > +++ b/fs/notify/fanotify/fanotify.c
> > @@ -57,6 +57,13 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
> >         return 0;
> >  }
> >
> > +/*
> > + * Wait for response to permission event. The function also takes care of
> > + * freeing the permission event (or offloads that in case the wait is canceled
> > + * by a signal). The function returns 0 in case access got allowed by userspace,
> > + * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case
> > + * the wait got interrupted by a signal.
> > + */
> >  static int fanotify_get_response(struct fsnotify_group *group,
> >                                  struct fanotify_perm_event_info *event,
> >                                  struct fsnotify_iter_info *iter_info)
> > @@ -65,8 +72,29 @@ static int fanotify_get_response(struct fsnotify_group *group,
> >
> >         pr_debug("%s: group=%p event=%p\n", __func__, group, event);
> >
> > -       wait_event(group->fanotify_data.access_waitq,
> > -                  event->state == FAN_EVENT_ANSWERED);
> > +       ret = wait_event_interruptible(group->fanotify_data.access_waitq,
> > +                                      event->state == FAN_EVENT_ANSWERED);
> > +       /* Signal pending? */
> > +       if (ret < 0) {
> > +               spin_lock(&group->notification_lock);
> > +               /* Event reported to userspace and no answer yet? */
> > +               if (event->state == FAN_EVENT_REPORTED) {
> > +                       /* Event will get freed once userspace answers to it */
> 
> Did you forget to commit the
> if (event->state == FAN_EVENT_CANCELED)
> code?

Yeah, somehow I forgot that bit. Thanks for noticing! Attached is a fixed
version of the patch.

								Honza
Amir Goldstein Feb. 14, 2019, 6:53 p.m. UTC | #3
On Thu, Feb 14, 2019 at 8:01 PM Jan Kara <jack@suse.cz> wrote:
>
> On Wed 13-02-19 23:02:17, Amir Goldstein wrote:
> > On Wed, Feb 13, 2019 at 4:54 PM Jan Kara <jack@suse.cz> wrote:
> > >
> > > When waiting for response to fanotify permission events, we currently
> > > use uninterruptible waits. That makes code simple however it can cause
> > > lots of processes to end up in uninterruptible sleep with hard reboot
> > > being the only alternative in case fanotify listener process stops
> > > responding (e.g. due to a bug in its implementation). Uninterruptible
> > > sleep also makes system hibernation fail if the listener gets frozen
> > > before the process generating fanotify permission event.
> > >
> > > Fix these problems by using interruptible sleep for waiting for response
> > > to fanotify event. This is slightly tricky though - we have to
> > > detect when the event got already reported to userspace as in that
> > > case we must not free the event. Instead we push the responsibility for
> > > freeing the event to the process that will write response to the
> > > event.
> > >
> > > Reported-by: Orion Poplawski <orion@nwra.com>
> > > Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> > > Signed-off-by: Jan Kara <jack@suse.cz>
> > > ---
> > >  fs/notify/fanotify/fanotify.c | 35 ++++++++++++++++++++++++++++++++---
> > >  fs/notify/fanotify/fanotify.h |  3 ++-
> > >  2 files changed, 34 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> > > index e725831be161..2e40d5d8660b 100644
> > > --- a/fs/notify/fanotify/fanotify.c
> > > +++ b/fs/notify/fanotify/fanotify.c
> > > @@ -57,6 +57,13 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
> > >         return 0;
> > >  }
> > >
> > > +/*
> > > + * Wait for response to permission event. The function also takes care of
> > > + * freeing the permission event (or offloads that in case the wait is canceled
> > > + * by a signal). The function returns 0 in case access got allowed by userspace,
> > > + * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case
> > > + * the wait got interrupted by a signal.
> > > + */
> > >  static int fanotify_get_response(struct fsnotify_group *group,
> > >                                  struct fanotify_perm_event_info *event,
> > >                                  struct fsnotify_iter_info *iter_info)
> > > @@ -65,8 +72,29 @@ static int fanotify_get_response(struct fsnotify_group *group,
> > >
> > >         pr_debug("%s: group=%p event=%p\n", __func__, group, event);
> > >
> > > -       wait_event(group->fanotify_data.access_waitq,
> > > -                  event->state == FAN_EVENT_ANSWERED);
> > > +       ret = wait_event_interruptible(group->fanotify_data.access_waitq,
> > > +                                      event->state == FAN_EVENT_ANSWERED);
> > > +       /* Signal pending? */
> > > +       if (ret < 0) {
> > > +               spin_lock(&group->notification_lock);
> > > +               /* Event reported to userspace and no answer yet? */
> > > +               if (event->state == FAN_EVENT_REPORTED) {
> > > +                       /* Event will get freed once userspace answers to it */
> >
> > Did you forget to commit the
> > if (event->state == FAN_EVENT_CANCELED)
> > code?
>
> Yeah, somehow I forgot that bit. Thanks for noticing! Attached is a fixed
> version of the patch.
>

Looks ok.

Feel free to add:
Reviewed-by: Amir Goldstein <amir73il@gmail.com>

Thanks,
Amir.
Jan Kara Feb. 18, 2019, 11:04 a.m. UTC | #4
On Thu 14-02-19 20:53:38, Amir Goldstein wrote:
> On Thu, Feb 14, 2019 at 8:01 PM Jan Kara <jack@suse.cz> wrote:
> >
> > On Wed 13-02-19 23:02:17, Amir Goldstein wrote:
> > > On Wed, Feb 13, 2019 at 4:54 PM Jan Kara <jack@suse.cz> wrote:
> > > >
> > > > When waiting for response to fanotify permission events, we currently
> > > > use uninterruptible waits. That makes code simple however it can cause
> > > > lots of processes to end up in uninterruptible sleep with hard reboot
> > > > being the only alternative in case fanotify listener process stops
> > > > responding (e.g. due to a bug in its implementation). Uninterruptible
> > > > sleep also makes system hibernation fail if the listener gets frozen
> > > > before the process generating fanotify permission event.
> > > >
> > > > Fix these problems by using interruptible sleep for waiting for response
> > > > to fanotify event. This is slightly tricky though - we have to
> > > > detect when the event got already reported to userspace as in that
> > > > case we must not free the event. Instead we push the responsibility for
> > > > freeing the event to the process that will write response to the
> > > > event.
> > > >
> > > > Reported-by: Orion Poplawski <orion@nwra.com>
> > > > Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> > > > Signed-off-by: Jan Kara <jack@suse.cz>
> > > > ---
> > > >  fs/notify/fanotify/fanotify.c | 35 ++++++++++++++++++++++++++++++++---
> > > >  fs/notify/fanotify/fanotify.h |  3 ++-
> > > >  2 files changed, 34 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> > > > index e725831be161..2e40d5d8660b 100644
> > > > --- a/fs/notify/fanotify/fanotify.c
> > > > +++ b/fs/notify/fanotify/fanotify.c
> > > > @@ -57,6 +57,13 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
> > > >         return 0;
> > > >  }
> > > >
> > > > +/*
> > > > + * Wait for response to permission event. The function also takes care of
> > > > + * freeing the permission event (or offloads that in case the wait is canceled
> > > > + * by a signal). The function returns 0 in case access got allowed by userspace,
> > > > + * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case
> > > > + * the wait got interrupted by a signal.
> > > > + */
> > > >  static int fanotify_get_response(struct fsnotify_group *group,
> > > >                                  struct fanotify_perm_event_info *event,
> > > >                                  struct fsnotify_iter_info *iter_info)
> > > > @@ -65,8 +72,29 @@ static int fanotify_get_response(struct fsnotify_group *group,
> > > >
> > > >         pr_debug("%s: group=%p event=%p\n", __func__, group, event);
> > > >
> > > > -       wait_event(group->fanotify_data.access_waitq,
> > > > -                  event->state == FAN_EVENT_ANSWERED);
> > > > +       ret = wait_event_interruptible(group->fanotify_data.access_waitq,
> > > > +                                      event->state == FAN_EVENT_ANSWERED);
> > > > +       /* Signal pending? */
> > > > +       if (ret < 0) {
> > > > +               spin_lock(&group->notification_lock);
> > > > +               /* Event reported to userspace and no answer yet? */
> > > > +               if (event->state == FAN_EVENT_REPORTED) {
> > > > +                       /* Event will get freed once userspace answers to it */
> > >
> > > Did you forget to commit the
> > > if (event->state == FAN_EVENT_CANCELED)
> > > code?
> >
> > Yeah, somehow I forgot that bit. Thanks for noticing! Attached is a fixed
> > version of the patch.
> >
> 
> Looks ok.
> 
> Feel free to add:
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>

Thanks for review Amir! I'll queue patches to my tree then.

								Honza
diff mbox series

Patch

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index e725831be161..2e40d5d8660b 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -57,6 +57,13 @@  static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
 	return 0;
 }
 
+/*
+ * Wait for response to permission event. The function also takes care of
+ * freeing the permission event (or offloads that in case the wait is canceled
+ * by a signal). The function returns 0 in case access got allowed by userspace,
+ * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case
+ * the wait got interrupted by a signal.
+ */
 static int fanotify_get_response(struct fsnotify_group *group,
 				 struct fanotify_perm_event_info *event,
 				 struct fsnotify_iter_info *iter_info)
@@ -65,8 +72,29 @@  static int fanotify_get_response(struct fsnotify_group *group,
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	wait_event(group->fanotify_data.access_waitq,
-		   event->state == FAN_EVENT_ANSWERED);
+	ret = wait_event_interruptible(group->fanotify_data.access_waitq,
+				       event->state == FAN_EVENT_ANSWERED);
+	/* Signal pending? */
+	if (ret < 0) {
+		spin_lock(&group->notification_lock);
+		/* Event reported to userspace and no answer yet? */
+		if (event->state == FAN_EVENT_REPORTED) {
+			/* Event will get freed once userspace answers to it */
+			event->state = FAN_EVENT_CANCELED;
+			spin_unlock(&group->notification_lock);
+			return ret;
+		}
+		/* Event not yet reported? Just remove it. */
+		if (event->state == FAN_EVENT_INIT)
+			fsnotify_remove_queued_event(group, &event->fae.fse);
+		/*
+		 * Event may be also answered in case signal delivery raced
+		 * with wakeup. In that case we have nothing to do besides
+		 * freeing the event and reporting error.
+		 */
+		spin_unlock(&group->notification_lock);
+		goto out;
+	}
 
 	/* userspace responded, convert to something usable */
 	switch (event->response & ~FAN_AUDIT) {
@@ -84,6 +112,8 @@  static int fanotify_get_response(struct fsnotify_group *group,
 
 	pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
 		 group, event, ret);
+out:
+	fsnotify_destroy_event(group, &event->fae.fse);
 	
 	return ret;
 }
@@ -255,7 +285,6 @@  static int fanotify_handle_event(struct fsnotify_group *group,
 	} else if (fanotify_is_perm_event(mask)) {
 		ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event),
 					    iter_info);
-		fsnotify_destroy_event(group, fsn_event);
 	}
 finish:
 	if (fanotify_is_perm_event(mask))
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 98d58939777c..bbdd2adfbf0f 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -26,7 +26,8 @@  struct fanotify_event_info {
 enum {
 	FAN_EVENT_INIT,
 	FAN_EVENT_REPORTED,
-	FAN_EVENT_ANSWERED
+	FAN_EVENT_ANSWERED,
+	FAN_EVENT_CANCELED,
 };
 
 /*