diff mbox series

[for-next,04/12] io_uring: reschedule retargeting at shutdown of ring

Message ID 20221031134126.82928-5-dylany@meta.com (mailing list archive)
State New
Headers show
Series io_uring: retarget rsrc nodes periodically | expand

Commit Message

Dylan Yudaken Oct. 31, 2022, 1:41 p.m. UTC
When the ring shuts down, instead of waiting for the work to release it's
reference, just reschedule it to now and get the reference back that way.

Signed-off-by: Dylan Yudaken <dylany@meta.com>
---
 io_uring/io_uring.c |  1 +
 io_uring/rsrc.c     | 26 +++++++++++++++++++++-----
 io_uring/rsrc.h     |  1 +
 3 files changed, 23 insertions(+), 5 deletions(-)

Comments

Jens Axboe Oct. 31, 2022, 4:02 p.m. UTC | #1
On 10/31/22 7:41 AM, Dylan Yudaken wrote:
> diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
> index 8d0d40713a63..40b37899e943 100644
> --- a/io_uring/rsrc.c
> +++ b/io_uring/rsrc.c
> @@ -248,12 +248,20 @@ static unsigned int io_rsrc_retarget_table(struct io_ring_ctx *ctx,
>  	return refs;
>  }
>  
> -static void io_rsrc_retarget_schedule(struct io_ring_ctx *ctx)
> +static void io_rsrc_retarget_schedule(struct io_ring_ctx *ctx, bool delay)
>  	__must_hold(&ctx->uring_lock)
>  {
> -	percpu_ref_get(&ctx->refs);
> -	mod_delayed_work(system_wq, &ctx->rsrc_retarget_work, 60 * HZ);
> -	ctx->rsrc_retarget_scheduled = true;
> +	unsigned long del;
> +
> +	if (delay)
> +		del = 60 * HZ;
> +	else
> +		del = 0;
> +
> +	if (likely(!mod_delayed_work(system_wq, &ctx->rsrc_retarget_work, del))) {
> +		percpu_ref_get(&ctx->refs);
> +		ctx->rsrc_retarget_scheduled = true;
> +	}
>  }

What happens for del == 0 and the work running ala:

CPU 0				CPU 1
mod_delayed_work(.., 0);
				delayed_work runs
					put ctx
percpu_ref_get(ctx)

Also I think that likely() needs to get dropped.
Dylan Yudaken Oct. 31, 2022, 4:44 p.m. UTC | #2
On Mon, 2022-10-31 at 10:02 -0600, Jens Axboe wrote:
> On 10/31/22 7:41 AM, Dylan Yudaken wrote:
> > diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
> > index 8d0d40713a63..40b37899e943 100644
> > --- a/io_uring/rsrc.c
> > +++ b/io_uring/rsrc.c
> > @@ -248,12 +248,20 @@ static unsigned int
> > io_rsrc_retarget_table(struct io_ring_ctx *ctx,
> >         return refs;
> >  }
> >  
> > -static void io_rsrc_retarget_schedule(struct io_ring_ctx *ctx)
> > +static void io_rsrc_retarget_schedule(struct io_ring_ctx *ctx,
> > bool delay)
> >         __must_hold(&ctx->uring_lock)
> >  {
> > -       percpu_ref_get(&ctx->refs);
> > -       mod_delayed_work(system_wq, &ctx->rsrc_retarget_work, 60 *
> > HZ);
> > -       ctx->rsrc_retarget_scheduled = true;
> > +       unsigned long del;
> > +
> > +       if (delay)
> > +               del = 60 * HZ;
> > +       else
> > +               del = 0;
> > +
> > +       if (likely(!mod_delayed_work(system_wq, &ctx-
> > >rsrc_retarget_work, del))) {
> > +               percpu_ref_get(&ctx->refs);
> > +               ctx->rsrc_retarget_scheduled = true;
> > +       }
> >  }
> 
> What happens for del == 0 and the work running ala:
> 
> CPU 0                           CPU 1
> mod_delayed_work(.., 0);
>                                 delayed_work runs
>                                         put ctx
> percpu_ref_get(ctx)

The work takes the lock before put(ctx), and CPU 0 only releases the
lock after calling get(ctx) so it should be ok.

> 
> Also I think that likely() needs to get dropped.
> 

It's not a big thing, but the only time it will be enqueued is on ring
shutdown if there is an outstanding enqueue. Other times it will not
get double enqueued as it is protected by the _scheduled bool (this is
important or else it will continually push back by 1 period and maybe
never run)
Jens Axboe Oct. 31, 2022, 7:13 p.m. UTC | #3
On 10/31/22 10:44 AM, Dylan Yudaken wrote:
> On Mon, 2022-10-31 at 10:02 -0600, Jens Axboe wrote:
>> On 10/31/22 7:41 AM, Dylan Yudaken wrote:
>>> diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
>>> index 8d0d40713a63..40b37899e943 100644
>>> --- a/io_uring/rsrc.c
>>> +++ b/io_uring/rsrc.c
>>> @@ -248,12 +248,20 @@ static unsigned int
>>> io_rsrc_retarget_table(struct io_ring_ctx *ctx,
>>>         return refs;
>>>  }
>>>  
>>> -static void io_rsrc_retarget_schedule(struct io_ring_ctx *ctx)
>>> +static void io_rsrc_retarget_schedule(struct io_ring_ctx *ctx,
>>> bool delay)
>>>         __must_hold(&ctx->uring_lock)
>>>  {
>>> -       percpu_ref_get(&ctx->refs);
>>> -       mod_delayed_work(system_wq, &ctx->rsrc_retarget_work, 60 *
>>> HZ);
>>> -       ctx->rsrc_retarget_scheduled = true;
>>> +       unsigned long del;
>>> +
>>> +       if (delay)
>>> +               del = 60 * HZ;
>>> +       else
>>> +               del = 0;
>>> +
>>> +       if (likely(!mod_delayed_work(system_wq, &ctx-
>>>> rsrc_retarget_work, del))) {
>>> +               percpu_ref_get(&ctx->refs);
>>> +               ctx->rsrc_retarget_scheduled = true;
>>> +       }
>>>  }
>>
>> What happens for del == 0 and the work running ala:
>>
>> CPU 0                           CPU 1
>> mod_delayed_work(.., 0);
>>                                 delayed_work runs
>>                                         put ctx
>> percpu_ref_get(ctx)
> 
> The work takes the lock before put(ctx), and CPU 0 only releases the
> lock after calling get(ctx) so it should be ok.

But io_ring_ctx_ref_free() would've run at that point? Maybe I'm
missing something...

In any case, would be saner to always grab that ref first. Or at
least have a proper comment as to why it's safe, because it looks
iffy.

>> Also I think that likely() needs to get dropped.
>>
> 
> It's not a big thing, but the only time it will be enqueued is on ring
> shutdown if there is an outstanding enqueue. Other times it will not
> get double enqueued as it is protected by the _scheduled bool (this is
> important or else it will continually push back by 1 period and maybe
> never run)

We've already called into this function, don't think it's worth a
likely. Same for most of the others added in this series, imho they
only really make sense for a very hot path where that branch is
inline.
Dylan Yudaken Nov. 1, 2022, 12:09 p.m. UTC | #4
On Mon, 2022-10-31 at 13:13 -0600, Jens Axboe wrote:
> On 10/31/22 10:44 AM, Dylan Yudaken wrote:
> > On Mon, 2022-10-31 at 10:02 -0600, Jens Axboe wrote:
> > > On 10/31/22 7:41 AM, Dylan Yudaken wrote:
> > > > diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
> > > > index 8d0d40713a63..40b37899e943 100644
> > > > --- a/io_uring/rsrc.c
> > > > +++ b/io_uring/rsrc.c
> > > > @@ -248,12 +248,20 @@ static unsigned int
> > > > io_rsrc_retarget_table(struct io_ring_ctx *ctx,
> > > >         return refs;
> > > >  }
> > > >  
> > > > -static void io_rsrc_retarget_schedule(struct io_ring_ctx *ctx)
> > > > +static void io_rsrc_retarget_schedule(struct io_ring_ctx *ctx,
> > > > bool delay)
> > > >         __must_hold(&ctx->uring_lock)
> > > >  {
> > > > -       percpu_ref_get(&ctx->refs);
> > > > -       mod_delayed_work(system_wq, &ctx->rsrc_retarget_work,
> > > > 60 *
> > > > HZ);
> > > > -       ctx->rsrc_retarget_scheduled = true;
> > > > +       unsigned long del;
> > > > +
> > > > +       if (delay)
> > > > +               del = 60 * HZ;
> > > > +       else
> > > > +               del = 0;
> > > > +
> > > > +       if (likely(!mod_delayed_work(system_wq, &ctx-
> > > > > rsrc_retarget_work, del))) {
> > > > +               percpu_ref_get(&ctx->refs);
> > > > +               ctx->rsrc_retarget_scheduled = true;
> > > > +       }
> > > >  }
> > > 
> > > What happens for del == 0 and the work running ala:
> > > 
> > > CPU 0                           CPU 1
> > > mod_delayed_work(.., 0);
> > >                                 delayed_work runs
> > >                                         put ctx
> > > percpu_ref_get(ctx)
> > 
> > The work takes the lock before put(ctx), and CPU 0 only releases
> > the
> > lock after calling get(ctx) so it should be ok.
> 
> But io_ring_ctx_ref_free() would've run at that point? Maybe I'm
> missing something...
> 
> In any case, would be saner to always grab that ref first. Or at
> least have a proper comment as to why it's safe, because it looks
> iffy.

I think I misunderstood - assuming a ref was already taken higher up
the stack. That is not the case, and in fact in the _exiting() calls it
is not really valid to take the reference as it may have already hit
zero. Instead we can use the cancel_delayed_work in exiting (no need to
retarget rsrc nodes at this point) and it makes things a bit cleaner.
I'll update in v2.

> 
> > > Also I think that likely() needs to get dropped.
> > > 
> > 
> > It's not a big thing, but the only time it will be enqueued is on
> > ring
> > shutdown if there is an outstanding enqueue. Other times it will
> > not
> > get double enqueued as it is protected by the _scheduled bool (this
> > is
> > important or else it will continually push back by 1 period and
> > maybe
> > never run)
> 
> We've already called into this function, don't think it's worth a
> likely. Same for most of the others added in this series, imho they
> only really make sense for a very hot path where that branch is
> inline.

Will remove it
diff mbox series

Patch

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index ea2260359c56..32eb305c4ce7 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2751,6 +2751,7 @@  static __cold void io_ring_exit_work(struct work_struct *work)
 		}
 
 		io_req_caches_free(ctx);
+		io_rsrc_retarget_exiting(ctx);
 
 		if (WARN_ON_ONCE(time_after(jiffies, timeout))) {
 			/* there is little hope left, don't run it too often */
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 8d0d40713a63..40b37899e943 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -248,12 +248,20 @@  static unsigned int io_rsrc_retarget_table(struct io_ring_ctx *ctx,
 	return refs;
 }
 
-static void io_rsrc_retarget_schedule(struct io_ring_ctx *ctx)
+static void io_rsrc_retarget_schedule(struct io_ring_ctx *ctx, bool delay)
 	__must_hold(&ctx->uring_lock)
 {
-	percpu_ref_get(&ctx->refs);
-	mod_delayed_work(system_wq, &ctx->rsrc_retarget_work, 60 * HZ);
-	ctx->rsrc_retarget_scheduled = true;
+	unsigned long del;
+
+	if (delay)
+		del = 60 * HZ;
+	else
+		del = 0;
+
+	if (likely(!mod_delayed_work(system_wq, &ctx->rsrc_retarget_work, del))) {
+		percpu_ref_get(&ctx->refs);
+		ctx->rsrc_retarget_scheduled = true;
+	}
 }
 
 static void io_retarget_rsrc_wq_cb(struct io_wq_work *work, void *data)
@@ -332,6 +340,14 @@  void io_rsrc_retarget_work(struct work_struct *work)
 	percpu_ref_put(&ctx->refs);
 }
 
+void io_rsrc_retarget_exiting(struct io_ring_ctx *ctx)
+{
+	mutex_lock(&ctx->uring_lock);
+	if (ctx->rsrc_retarget_scheduled)
+		io_rsrc_retarget_schedule(ctx, false);
+	mutex_unlock(&ctx->uring_lock);
+}
+
 void io_wait_rsrc_data(struct io_rsrc_data *data)
 {
 	if (data && !atomic_dec_and_test(&data->refs))
@@ -414,7 +430,7 @@  void io_rsrc_node_switch(struct io_ring_ctx *ctx,
 		percpu_ref_kill(&rsrc_node->refs);
 		ctx->rsrc_node = NULL;
 		if (!ctx->rsrc_retarget_scheduled)
-			io_rsrc_retarget_schedule(ctx);
+			io_rsrc_retarget_schedule(ctx, true);
 	}
 
 	if (!ctx->rsrc_node) {
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 2b94df8fd9e8..93c66475796e 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -55,6 +55,7 @@  struct io_mapped_ubuf {
 
 void io_rsrc_put_work(struct work_struct *work);
 void io_rsrc_retarget_work(struct work_struct *work);
+void io_rsrc_retarget_exiting(struct io_ring_ctx *ctx);
 void io_rsrc_refs_refill(struct io_ring_ctx *ctx);
 void io_wait_rsrc_data(struct io_rsrc_data *data);
 void io_rsrc_node_destroy(struct io_rsrc_node *ref_node);