diff mbox series

[v8,03/10] eventfd: Increase the recursion depth of eventfd_signal()

Message ID 20210615141331.407-4-xieyongji@bytedance.com (mailing list archive)
State New
Headers show
Series Introduce VDUSE - vDPA Device in Userspace | expand

Commit Message

Xie Yongji June 15, 2021, 2:13 p.m. UTC
Increase the recursion depth of eventfd_signal() to 1. This
is the maximum recursion depth we have found so far, which
can be triggered with the following call chain:

    kvm_io_bus_write                        [kvm]
      --> ioeventfd_write                   [kvm]
        --> eventfd_signal                  [eventfd]
          --> vhost_poll_wakeup             [vhost]
            --> vduse_vdpa_kick_vq          [vduse]
              --> eventfd_signal            [eventfd]

Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 fs/eventfd.c            | 2 +-
 include/linux/eventfd.h | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

Comments

He Zhe June 17, 2021, 8:33 a.m. UTC | #1
On 6/15/21 10:13 PM, Xie Yongji wrote:
> Increase the recursion depth of eventfd_signal() to 1. This
> is the maximum recursion depth we have found so far, which
> can be triggered with the following call chain:
>
>     kvm_io_bus_write                        [kvm]
>       --> ioeventfd_write                   [kvm]
>         --> eventfd_signal                  [eventfd]
>           --> vhost_poll_wakeup             [vhost]
>             --> vduse_vdpa_kick_vq          [vduse]
>               --> eventfd_signal            [eventfd]
>
> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> Acked-by: Jason Wang <jasowang@redhat.com>

The fix had been posted one year ago.

https://lore.kernel.org/lkml/20200410114720.24838-1-zhe.he@windriver.com/


> ---
>  fs/eventfd.c            | 2 +-
>  include/linux/eventfd.h | 5 ++++-
>  2 files changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/fs/eventfd.c b/fs/eventfd.c
> index e265b6dd4f34..cc7cd1dbedd3 100644
> --- a/fs/eventfd.c
> +++ b/fs/eventfd.c
> @@ -71,7 +71,7 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
>  	 * it returns true, the eventfd_signal() call should be deferred to a
>  	 * safe context.
>  	 */
> -	if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
> +	if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count) > EFD_WAKE_DEPTH))
>  		return 0;
>  
>  	spin_lock_irqsave(&ctx->wqh.lock, flags);
> diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
> index fa0a524baed0..886d99cd38ef 100644
> --- a/include/linux/eventfd.h
> +++ b/include/linux/eventfd.h
> @@ -29,6 +29,9 @@
>  #define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
>  #define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE)
>  
> +/* Maximum recursion depth */
> +#define EFD_WAKE_DEPTH 1
> +
>  struct eventfd_ctx;
>  struct file;
>  
> @@ -47,7 +50,7 @@ DECLARE_PER_CPU(int, eventfd_wake_count);
>  
>  static inline bool eventfd_signal_count(void)
>  {
> -	return this_cpu_read(eventfd_wake_count);
> +	return this_cpu_read(eventfd_wake_count) > EFD_WAKE_DEPTH;

count is just count. How deep is acceptable should be put
where eventfd_signal_count is called.


Zhe

>  }
>  
>  #else /* CONFIG_EVENTFD */
Xie Yongji June 18, 2021, 3:29 a.m. UTC | #2
On Thu, Jun 17, 2021 at 4:34 PM He Zhe <zhe.he@windriver.com> wrote:
>
>
>
> On 6/15/21 10:13 PM, Xie Yongji wrote:
> > Increase the recursion depth of eventfd_signal() to 1. This
> > is the maximum recursion depth we have found so far, which
> > can be triggered with the following call chain:
> >
> >     kvm_io_bus_write                        [kvm]
> >       --> ioeventfd_write                   [kvm]
> >         --> eventfd_signal                  [eventfd]
> >           --> vhost_poll_wakeup             [vhost]
> >             --> vduse_vdpa_kick_vq          [vduse]
> >               --> eventfd_signal            [eventfd]
> >
> > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > Acked-by: Jason Wang <jasowang@redhat.com>
>
> The fix had been posted one year ago.
>
> https://lore.kernel.org/lkml/20200410114720.24838-1-zhe.he@windriver.com/
>

OK, so it seems to be a fix for the RT system if my understanding is
correct? Any reason why it's not merged? I'm happy to rebase my series
on your patch if you'd like to repost it.

BTW, I also notice another thread for this issue:

https://lore.kernel.org/linux-fsdevel/DM6PR11MB420291B550A10853403C7592FF349@DM6PR11MB4202.namprd11.prod.outlook.com/T/

>
> > ---
> >  fs/eventfd.c            | 2 +-
> >  include/linux/eventfd.h | 5 ++++-
> >  2 files changed, 5 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/eventfd.c b/fs/eventfd.c
> > index e265b6dd4f34..cc7cd1dbedd3 100644
> > --- a/fs/eventfd.c
> > +++ b/fs/eventfd.c
> > @@ -71,7 +71,7 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
> >        * it returns true, the eventfd_signal() call should be deferred to a
> >        * safe context.
> >        */
> > -     if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
> > +     if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count) > EFD_WAKE_DEPTH))
> >               return 0;
> >
> >       spin_lock_irqsave(&ctx->wqh.lock, flags);
> > diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
> > index fa0a524baed0..886d99cd38ef 100644
> > --- a/include/linux/eventfd.h
> > +++ b/include/linux/eventfd.h
> > @@ -29,6 +29,9 @@
> >  #define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
> >  #define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE)
> >
> > +/* Maximum recursion depth */
> > +#define EFD_WAKE_DEPTH 1
> > +
> >  struct eventfd_ctx;
> >  struct file;
> >
> > @@ -47,7 +50,7 @@ DECLARE_PER_CPU(int, eventfd_wake_count);
> >
> >  static inline bool eventfd_signal_count(void)
> >  {
> > -     return this_cpu_read(eventfd_wake_count);
> > +     return this_cpu_read(eventfd_wake_count) > EFD_WAKE_DEPTH;
>
> count is just count. How deep is acceptable should be put
> where eventfd_signal_count is called.
>

The return value of this function is boolean rather than integer.
Please see the comments in eventfd_signal():

"then it should check eventfd_signal_count() before calling this
function. If it returns true, the eventfd_signal() call should be
deferred to a safe context."

Thanks,
Yongji
He Zhe June 18, 2021, 8:41 a.m. UTC | #3
On 6/18/21 11:29 AM, Yongji Xie wrote:
> On Thu, Jun 17, 2021 at 4:34 PM He Zhe <zhe.he@windriver.com> wrote:
>>
>>
>> On 6/15/21 10:13 PM, Xie Yongji wrote:
>>> Increase the recursion depth of eventfd_signal() to 1. This
>>> is the maximum recursion depth we have found so far, which
>>> can be triggered with the following call chain:
>>>
>>>     kvm_io_bus_write                        [kvm]
>>>       --> ioeventfd_write                   [kvm]
>>>         --> eventfd_signal                  [eventfd]
>>>           --> vhost_poll_wakeup             [vhost]
>>>             --> vduse_vdpa_kick_vq          [vduse]
>>>               --> eventfd_signal            [eventfd]
>>>
>>> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
>>> Acked-by: Jason Wang <jasowang@redhat.com>
>> The fix had been posted one year ago.
>>
>> https://lore.kernel.org/lkml/20200410114720.24838-1-zhe.he@windriver.com/
>>
> OK, so it seems to be a fix for the RT system if my understanding is
> correct? Any reason why it's not merged? I'm happy to rebase my series
> on your patch if you'd like to repost it.

It works for both mainline and RT kernel. The folks just reproduced in their RT
environments.

This patch somehow hasn't got maintainer's reply, so not merged yet.

And OK, I'll resend the patch.

>
> BTW, I also notice another thread for this issue:
>
> https://lore.kernel.org/linux-fsdevel/DM6PR11MB420291B550A10853403C7592FF349@DM6PR11MB4202.namprd11.prod.outlook.com/T/

This is the same way as my v1

https://lore.kernel.org/lkml/3b4aa4cb-0e76-89c2-c48a-cf24e1a36bc2@kernel.dk/

which was not what the maintainer wanted.


>
>>> ---
>>>  fs/eventfd.c            | 2 +-
>>>  include/linux/eventfd.h | 5 ++++-
>>>  2 files changed, 5 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/fs/eventfd.c b/fs/eventfd.c
>>> index e265b6dd4f34..cc7cd1dbedd3 100644
>>> --- a/fs/eventfd.c
>>> +++ b/fs/eventfd.c
>>> @@ -71,7 +71,7 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
>>>        * it returns true, the eventfd_signal() call should be deferred to a
>>>        * safe context.
>>>        */
>>> -     if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
>>> +     if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count) > EFD_WAKE_DEPTH))
>>>               return 0;
>>>
>>>       spin_lock_irqsave(&ctx->wqh.lock, flags);
>>> diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
>>> index fa0a524baed0..886d99cd38ef 100644
>>> --- a/include/linux/eventfd.h
>>> +++ b/include/linux/eventfd.h
>>> @@ -29,6 +29,9 @@
>>>  #define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
>>>  #define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE)
>>>
>>> +/* Maximum recursion depth */
>>> +#define EFD_WAKE_DEPTH 1
>>> +
>>>  struct eventfd_ctx;
>>>  struct file;
>>>
>>> @@ -47,7 +50,7 @@ DECLARE_PER_CPU(int, eventfd_wake_count);
>>>
>>>  static inline bool eventfd_signal_count(void)
>>>  {
>>> -     return this_cpu_read(eventfd_wake_count);
>>> +     return this_cpu_read(eventfd_wake_count) > EFD_WAKE_DEPTH;
>> count is just count. How deep is acceptable should be put
>> where eventfd_signal_count is called.
>>
> The return value of this function is boolean rather than integer.
> Please see the comments in eventfd_signal():
>
> "then it should check eventfd_signal_count() before calling this
> function. If it returns true, the eventfd_signal() call should be
> deferred to a safe context."

OK. Now that the maintainer comments as such we can use it accordingly,
though I still got the feeling that the function name and the type of the return
value don't match.


Thanks,
Zhe

>
> Thanks,
> Yongji
diff mbox series

Patch

diff --git a/fs/eventfd.c b/fs/eventfd.c
index e265b6dd4f34..cc7cd1dbedd3 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -71,7 +71,7 @@  __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
 	 * it returns true, the eventfd_signal() call should be deferred to a
 	 * safe context.
 	 */
-	if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
+	if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count) > EFD_WAKE_DEPTH))
 		return 0;
 
 	spin_lock_irqsave(&ctx->wqh.lock, flags);
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index fa0a524baed0..886d99cd38ef 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -29,6 +29,9 @@ 
 #define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 #define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE)
 
+/* Maximum recursion depth */
+#define EFD_WAKE_DEPTH 1
+
 struct eventfd_ctx;
 struct file;
 
@@ -47,7 +50,7 @@  DECLARE_PER_CPU(int, eventfd_wake_count);
 
 static inline bool eventfd_signal_count(void)
 {
-	return this_cpu_read(eventfd_wake_count);
+	return this_cpu_read(eventfd_wake_count) > EFD_WAKE_DEPTH;
 }
 
 #else /* CONFIG_EVENTFD */