diff mbox series

[v2,04/14] libmultipath: io_err_stat: don't free aio memory before completion

Message ID 20231026174153.1133-5-mwilck@suse.com (mailing list archive)
State Not Applicable, archived
Delegated to: christophe varoqui
Headers show
Series multipath: aio, systemd, and documentation improvements | expand

Commit Message

Martin Wilck Oct. 26, 2023, 5:41 p.m. UTC
From: Martin Wilck <mwilck@suse.com>

It is wrong to assume that aio data structures can be reused or freed
after io_cancel(). io_cancel() will almost always return -EINPROGRESS,
anyway. Use the io_starttime field to indicate whether an io event
has been completed by the kernel. Make sure no in-flight buffers are freed.

Fixes https://github.com/opensvc/multipath-tools/issues/73.

Signed-off-by: Martin Wilck <mwilck@suse.com>
Cc: Li Xiao Keng <lixiaokeng@huawei.com>
Cc: Miao Guanqin <miaoguanqin@huawei.com>
Cc: Guan Junxiong <guanjunxiong@huawei.com>
---
 libmultipath/io_err_stat.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

Comments

Benjamin Marzinski Oct. 27, 2023, 6:55 p.m. UTC | #1
On Thu, Oct 26, 2023 at 07:41:43PM +0200, mwilck@suse.com wrote:
> From: Martin Wilck <mwilck@suse.com>
> 
> It is wrong to assume that aio data structures can be reused or freed
> after io_cancel(). io_cancel() will almost always return -EINPROGRESS,
> anyway. Use the io_starttime field to indicate whether an io event
> has been completed by the kernel. Make sure no in-flight buffers are freed.
> 
> Fixes https://github.com/opensvc/multipath-tools/issues/73.
> 
> Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
> Cc: Li Xiao Keng <lixiaokeng@huawei.com>
> Cc: Miao Guanqin <miaoguanqin@huawei.com>
> Cc: Guan Junxiong <guanjunxiong@huawei.com>
> ---
>  libmultipath/io_err_stat.c | 26 ++++++++++++++++----------
>  1 file changed, 16 insertions(+), 10 deletions(-)
> 
> diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
> index dc1c252..c474c34 100644
> --- a/libmultipath/io_err_stat.c
> +++ b/libmultipath/io_err_stat.c
> @@ -111,10 +111,14 @@ static int init_each_dio_ctx(struct dio_ctx *ct, int blksize,
>  	return 0;
>  }
>  
> -static void deinit_each_dio_ctx(struct dio_ctx *ct)
> +static int deinit_each_dio_ctx(struct dio_ctx *ct)
>  {
> -	if (ct->buf)
> -		free(ct->buf);
> +	if (!ct->buf)
> +		return 0;
> +	if (ct->io_starttime.tv_sec != 0 || ct->io_starttime.tv_nsec != 0)
> +		return 1;
> +	free(ct->buf);
> +	return 0;
>  }
>  
>  static int setup_directio_ctx(struct io_err_stat_path *p)
> @@ -164,6 +168,7 @@ fail_close:
>  static void free_io_err_stat_path(struct io_err_stat_path *p)
>  {
>  	int i;
> +	int inflight = 0;
>  
>  	if (!p)
>  		return;
> @@ -173,8 +178,13 @@ static void free_io_err_stat_path(struct io_err_stat_path *p)
>  	cancel_inflight_io(p);
>  
>  	for (i = 0; i < CONCUR_NR_EVENT; i++)
> -		deinit_each_dio_ctx(p->dio_ctx_array + i);
> -	free(p->dio_ctx_array);
> +		inflight += deinit_each_dio_ctx(p->dio_ctx_array + i);
> +
> +	if (!inflight)
> +		free(p->dio_ctx_array);
> +	else
> +		io_err_stat_log(2, "%s: can't free aio space of %s, %d IOs in flight",
> +				__func__, p->devname, inflight);
>  
>  	if (p->fd > 0)
>  		close(p->fd);
> @@ -503,7 +513,7 @@ static int try_to_cancel_timeout_io(struct dio_ctx *ct, struct timespec *t,
>  	int		rc = PATH_UNCHECKED;
>  	int		r;
>  
> -	if (ct->io_starttime.tv_sec == 0)
> +	if (ct->io_starttime.tv_sec == 0 && ct->io_starttime.tv_nsec == 0)
>  		return rc;
>  	timespecsub(t, &ct->io_starttime, &difftime);
>  	if (difftime.tv_sec > IOTIMEOUT_SEC) {
> @@ -514,8 +524,6 @@ static int try_to_cancel_timeout_io(struct dio_ctx *ct, struct timespec *t,
>  		if (r)
>  			io_err_stat_log(5, "%s: io_cancel error %i",
>  					dev, errno);
> -		ct->io_starttime.tv_sec = 0;
> -		ct->io_starttime.tv_nsec = 0;
>  		rc = PATH_TIMEOUT;
>  	} else {
>  		rc = PATH_PENDING;
> @@ -559,8 +567,6 @@ static void cancel_inflight_io(struct io_err_stat_path *pp)
>  		if (r)
>  			io_err_stat_log(5, "%s: io_cancel error %d, %i",
>  					pp->devname, r, errno);
> -		ct->io_starttime.tv_sec = 0;
> -		ct->io_starttime.tv_nsec = 0;
>  	}
>  }
>  
> -- 
> 2.42.0
diff mbox series

Patch

diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
index dc1c252..c474c34 100644
--- a/libmultipath/io_err_stat.c
+++ b/libmultipath/io_err_stat.c
@@ -111,10 +111,14 @@  static int init_each_dio_ctx(struct dio_ctx *ct, int blksize,
 	return 0;
 }
 
-static void deinit_each_dio_ctx(struct dio_ctx *ct)
+static int deinit_each_dio_ctx(struct dio_ctx *ct)
 {
-	if (ct->buf)
-		free(ct->buf);
+	if (!ct->buf)
+		return 0;
+	if (ct->io_starttime.tv_sec != 0 || ct->io_starttime.tv_nsec != 0)
+		return 1;
+	free(ct->buf);
+	return 0;
 }
 
 static int setup_directio_ctx(struct io_err_stat_path *p)
@@ -164,6 +168,7 @@  fail_close:
 static void free_io_err_stat_path(struct io_err_stat_path *p)
 {
 	int i;
+	int inflight = 0;
 
 	if (!p)
 		return;
@@ -173,8 +178,13 @@  static void free_io_err_stat_path(struct io_err_stat_path *p)
 	cancel_inflight_io(p);
 
 	for (i = 0; i < CONCUR_NR_EVENT; i++)
-		deinit_each_dio_ctx(p->dio_ctx_array + i);
-	free(p->dio_ctx_array);
+		inflight += deinit_each_dio_ctx(p->dio_ctx_array + i);
+
+	if (!inflight)
+		free(p->dio_ctx_array);
+	else
+		io_err_stat_log(2, "%s: can't free aio space of %s, %d IOs in flight",
+				__func__, p->devname, inflight);
 
 	if (p->fd > 0)
 		close(p->fd);
@@ -503,7 +513,7 @@  static int try_to_cancel_timeout_io(struct dio_ctx *ct, struct timespec *t,
 	int		rc = PATH_UNCHECKED;
 	int		r;
 
-	if (ct->io_starttime.tv_sec == 0)
+	if (ct->io_starttime.tv_sec == 0 && ct->io_starttime.tv_nsec == 0)
 		return rc;
 	timespecsub(t, &ct->io_starttime, &difftime);
 	if (difftime.tv_sec > IOTIMEOUT_SEC) {
@@ -514,8 +524,6 @@  static int try_to_cancel_timeout_io(struct dio_ctx *ct, struct timespec *t,
 		if (r)
 			io_err_stat_log(5, "%s: io_cancel error %i",
 					dev, errno);
-		ct->io_starttime.tv_sec = 0;
-		ct->io_starttime.tv_nsec = 0;
 		rc = PATH_TIMEOUT;
 	} else {
 		rc = PATH_PENDING;
@@ -559,8 +567,6 @@  static void cancel_inflight_io(struct io_err_stat_path *pp)
 		if (r)
 			io_err_stat_log(5, "%s: io_cancel error %d, %i",
 					pp->devname, r, errno);
-		ct->io_starttime.tv_sec = 0;
-		ct->io_starttime.tv_nsec = 0;
 	}
 }