diff mbox series

[3/6] libmultipath: io_err_stat: don't free aio memory before completion

Message ID 20231024164937.14684-4-mwilck@suse.com (mailing list archive)
State Not Applicable, archived
Delegated to: christophe varoqui
Headers show
Series multipath: aio and systemd service improvements | expand

Commit Message

Martin Wilck Oct. 24, 2023, 4:49 p.m. UTC
From: Martin Wilck <mwilck@suse.com>

It is wrong to assume that aio data structures can be reused or freed
after io_cancel(). io_cancel() will almost always return -EINPROGRESS,
anyway. Use the io_starttime field to indicate whether an io event
has been completed by the kernel. Make sure no in-flight buffers are freed.

Fixes https://github.com/opensvc/multipath-tools/issues/73.

Signed-off-by: Martin Wilck <mwilck@suse.com>
Cc: Li Xiao Keng <lixiaokeng@huawei.com>
Cc: Miao Guanqin <miaoguanqin@huawei.com>
---
 libmultipath/io_err_stat.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

Comments

Benjamin Marzinski Oct. 25, 2023, 11:33 p.m. UTC | #1
On Tue, Oct 24, 2023 at 06:49:34PM +0200, mwilck@suse.com wrote:
> From: Martin Wilck <mwilck@suse.com>
> 
> It is wrong to assume that aio data structures can be reused or freed
> after io_cancel(). io_cancel() will almost always return -EINPROGRESS,
> anyway. Use the io_starttime field to indicate whether an io event
> has been completed by the kernel. Make sure no in-flight buffers are freed.

There's a minor issue I mention below, but the bigger issue I noticed
isn't related to your fix.  It's that, unless I'm missing something,
we're not using libaio correctly in this code.  We call io_setup() to
allow CONCUR_NR_EVENT concurrent requests. But we could have up to
CONCUR_NR_EVENT * number_of_delayed_paths concurrent requests. The
easiest fix is to create an ioctx per delayed path. However this would
mean calling io_destroy() after each round of testings, which could
potentially stall this thread.
 
> Fixes https://github.com/opensvc/multipath-tools/issues/73.
> 
> Signed-off-by: Martin Wilck <mwilck@suse.com>
> Cc: Li Xiao Keng <lixiaokeng@huawei.com>
> Cc: Miao Guanqin <miaoguanqin@huawei.com>
> ---
>  libmultipath/io_err_stat.c | 26 ++++++++++++++++----------
>  1 file changed, 16 insertions(+), 10 deletions(-)
> 
> diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
> index dc1c252..c474c34 100644
> --- a/libmultipath/io_err_stat.c
> +++ b/libmultipath/io_err_stat.c
> @@ -111,10 +111,14 @@ static int init_each_dio_ctx(struct dio_ctx *ct, int blksize,
>  	return 0;
>  }
>  
> -static void deinit_each_dio_ctx(struct dio_ctx *ct)
> +static int deinit_each_dio_ctx(struct dio_ctx *ct)
>  {
> -	if (ct->buf)
> -		free(ct->buf);
> +	if (!ct->buf)
> +		return 0;
> +	if (ct->io_starttime.tv_sec != 0 || ct->io_starttime.tv_nsec != 0)
> +		return 1;
> +	free(ct->buf);
> +	return 0;
>  }
>  
>  static int setup_directio_ctx(struct io_err_stat_path *p)
> @@ -164,6 +168,7 @@ fail_close:
>  static void free_io_err_stat_path(struct io_err_stat_path *p)
>  {
>  	int i;
> +	int inflight = 0;
>  
>  	if (!p)
>  		return;
> @@ -173,8 +178,13 @@ static void free_io_err_stat_path(struct io_err_stat_path *p)
>  	cancel_inflight_io(p);

There's not much point in calling cancel_inflight_io() here, since since
we don't try to handle the done events afterwards, is there? If there
are unhandled ones, they will remain unhandled, and we will end up
leaking memory regardless of whether or not they're cancelled.

-Ben

>  	for (i = 0; i < CONCUR_NR_EVENT; i++)
> -		deinit_each_dio_ctx(p->dio_ctx_array + i);
> -	free(p->dio_ctx_array);
> +		inflight += deinit_each_dio_ctx(p->dio_ctx_array + i);
> +
> +	if (!inflight)
> +		free(p->dio_ctx_array);
> +	else
> +		io_err_stat_log(2, "%s: can't free aio space of %s, %d IOs in flight",
> +				__func__, p->devname, inflight);
>  
>  	if (p->fd > 0)
>  		close(p->fd);
> @@ -503,7 +513,7 @@ static int try_to_cancel_timeout_io(struct dio_ctx *ct, struct timespec *t,
>  	int		rc = PATH_UNCHECKED;
>  	int		r;
>  
> -	if (ct->io_starttime.tv_sec == 0)
> +	if (ct->io_starttime.tv_sec == 0 && ct->io_starttime.tv_nsec == 0)
>  		return rc;
>  	timespecsub(t, &ct->io_starttime, &difftime);
>  	if (difftime.tv_sec > IOTIMEOUT_SEC) {
> @@ -514,8 +524,6 @@ static int try_to_cancel_timeout_io(struct dio_ctx *ct, struct timespec *t,
>  		if (r)
>  			io_err_stat_log(5, "%s: io_cancel error %i",
>  					dev, errno);
> -		ct->io_starttime.tv_sec = 0;
> -		ct->io_starttime.tv_nsec = 0;
>  		rc = PATH_TIMEOUT;
>  	} else {
>  		rc = PATH_PENDING;
> @@ -559,8 +567,6 @@ static void cancel_inflight_io(struct io_err_stat_path *pp)
>  		if (r)
>  			io_err_stat_log(5, "%s: io_cancel error %d, %i",
>  					pp->devname, r, errno);
> -		ct->io_starttime.tv_sec = 0;
> -		ct->io_starttime.tv_nsec = 0;
>  	}
>  }
>  
> -- 
> 2.42.0
Martin Wilck Oct. 26, 2023, 9:12 a.m. UTC | #2
On Wed, 2023-10-25 at 19:33 -0400, Benjamin Marzinski wrote:
> On Tue, Oct 24, 2023 at 06:49:34PM +0200, mwilck@suse.com wrote:
> > From: Martin Wilck <mwilck@suse.com>
> > 
> > It is wrong to assume that aio data structures can be reused or
> > freed
> > after io_cancel(). io_cancel() will almost always return -
> > EINPROGRESS,
> > anyway. Use the io_starttime field to indicate whether an io event
> > has been completed by the kernel. Make sure no in-flight buffers
> > are freed.
> 
> There's a minor issue I mention below, but the bigger issue I noticed
> isn't related to your fix.  It's that, unless I'm missing something,
> we're not using libaio correctly in this code.  We call io_setup() to
> allow CONCUR_NR_EVENT concurrent requests. But we could have up to
> CONCUR_NR_EVENT * number_of_delayed_paths concurrent requests. The
> easiest fix is to create an ioctx per delayed path. However this
> would
> mean calling io_destroy() after each round of testings, which could
> potentially stall this thread.

Right. I suppose what's going to happen is that io_submit() will simply
fail with -EAGAIN. Note that the code logs io_submit() errors only at
log level 5. I guess if all goes well, the aio requests will terminate
quickly, and the algorithm will still "work", sort of. But if timeouts
happen (which is what this code was written for), the iocbs will be
quickly exhausted, possibly with IOs from just a single path, and
other paths won't be checked at all.

Btw, it's kind of odd to send 32 identical requests (all for block 0)
to the same device basically at the same time. I would think that with
very high probability, all IOs will either succeed or fail. The IO has
no FUA bit set, so even if it's direct IO, I assume that the device
will answer all but the first request from cache. I understand that
this is supposed to detect marginal paths, but it would make more sense
to send these IOs in random intervals than all at once. But maybe I
misunderstand something.

I've added Guan Junxiong, the original author, to the recipients list
of this email. As a first remedy against the iocb starvation, I suggest
to simply allocate a significantly larger ioctx, and increase the
loglevel of the message indicating io_submit() errors. I'll add a patch
to this set.

>  
> > Fixes https://github.com/opensvc/multipath-tools/issues/73.
> > 
> > Signed-off-by: Martin Wilck <mwilck@suse.com>
> > Cc: Li Xiao Keng <lixiaokeng@huawei.com>
> > Cc: Miao Guanqin <miaoguanqin@huawei.com>
> > ---
> >  libmultipath/io_err_stat.c | 26 ++++++++++++++++----------
> >  1 file changed, 16 insertions(+), 10 deletions(-)
> > 
> > diff --git a/libmultipath/io_err_stat.c
> > b/libmultipath/io_err_stat.c
> > index dc1c252..c474c34 100644
> > --- a/libmultipath/io_err_stat.c
> > +++ b/libmultipath/io_err_stat.c
> > @@ -111,10 +111,14 @@ static int init_each_dio_ctx(struct dio_ctx
> > *ct, int blksize,
> >  	return 0;
> >  }
> >  
> > -static void deinit_each_dio_ctx(struct dio_ctx *ct)
> > +static int deinit_each_dio_ctx(struct dio_ctx *ct)
> >  {
> > -	if (ct->buf)
> > -		free(ct->buf);
> > +	if (!ct->buf)
> > +		return 0;
> > +	if (ct->io_starttime.tv_sec != 0 || ct-
> > >io_starttime.tv_nsec != 0)
> > +		return 1;
> > +	free(ct->buf);
> > +	return 0;
> >  }
> >  
> >  static int setup_directio_ctx(struct io_err_stat_path *p)
> > @@ -164,6 +168,7 @@ fail_close:
> >  static void free_io_err_stat_path(struct io_err_stat_path *p)
> >  {
> >  	int i;
> > +	int inflight = 0;
> >  
> >  	if (!p)
> >  		return;
> > @@ -173,8 +178,13 @@ static void free_io_err_stat_path(struct
> > io_err_stat_path *p)
> >  	cancel_inflight_io(p);
> 
> There's not much point in calling cancel_inflight_io() here, since
> since
> we don't try to handle the done events afterwards, is there? If there
> are unhandled ones, they will remain unhandled, and we will end up
> leaking memory regardless of whether or not they're cancelled.

You're right. I'll have a closer look.

Martin
diff mbox series

Patch

diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
index dc1c252..c474c34 100644
--- a/libmultipath/io_err_stat.c
+++ b/libmultipath/io_err_stat.c
@@ -111,10 +111,14 @@  static int init_each_dio_ctx(struct dio_ctx *ct, int blksize,
 	return 0;
 }
 
-static void deinit_each_dio_ctx(struct dio_ctx *ct)
+static int deinit_each_dio_ctx(struct dio_ctx *ct)
 {
-	if (ct->buf)
-		free(ct->buf);
+	if (!ct->buf)
+		return 0;
+	if (ct->io_starttime.tv_sec != 0 || ct->io_starttime.tv_nsec != 0)
+		return 1;
+	free(ct->buf);
+	return 0;
 }
 
 static int setup_directio_ctx(struct io_err_stat_path *p)
@@ -164,6 +168,7 @@  fail_close:
 static void free_io_err_stat_path(struct io_err_stat_path *p)
 {
 	int i;
+	int inflight = 0;
 
 	if (!p)
 		return;
@@ -173,8 +178,13 @@  static void free_io_err_stat_path(struct io_err_stat_path *p)
 	cancel_inflight_io(p);
 
 	for (i = 0; i < CONCUR_NR_EVENT; i++)
-		deinit_each_dio_ctx(p->dio_ctx_array + i);
-	free(p->dio_ctx_array);
+		inflight += deinit_each_dio_ctx(p->dio_ctx_array + i);
+
+	if (!inflight)
+		free(p->dio_ctx_array);
+	else
+		io_err_stat_log(2, "%s: can't free aio space of %s, %d IOs in flight",
+				__func__, p->devname, inflight);
 
 	if (p->fd > 0)
 		close(p->fd);
@@ -503,7 +513,7 @@  static int try_to_cancel_timeout_io(struct dio_ctx *ct, struct timespec *t,
 	int		rc = PATH_UNCHECKED;
 	int		r;
 
-	if (ct->io_starttime.tv_sec == 0)
+	if (ct->io_starttime.tv_sec == 0 && ct->io_starttime.tv_nsec == 0)
 		return rc;
 	timespecsub(t, &ct->io_starttime, &difftime);
 	if (difftime.tv_sec > IOTIMEOUT_SEC) {
@@ -514,8 +524,6 @@  static int try_to_cancel_timeout_io(struct dio_ctx *ct, struct timespec *t,
 		if (r)
 			io_err_stat_log(5, "%s: io_cancel error %i",
 					dev, errno);
-		ct->io_starttime.tv_sec = 0;
-		ct->io_starttime.tv_nsec = 0;
 		rc = PATH_TIMEOUT;
 	} else {
 		rc = PATH_PENDING;
@@ -559,8 +567,6 @@  static void cancel_inflight_io(struct io_err_stat_path *pp)
 		if (r)
 			io_err_stat_log(5, "%s: io_cancel error %d, %i",
 					pp->devname, r, errno);
-		ct->io_starttime.tv_sec = 0;
-		ct->io_starttime.tv_nsec = 0;
 	}
 }