diff mbox series

xfs: make xfs_log_iovec independent from xfs_log_vec and release it early

Message ID 20240623123119.3562031-1-alexjlzheng@tencent.com (mailing list archive)
State New
Headers show
Series xfs: make xfs_log_iovec independent from xfs_log_vec and release it early | expand

Commit Message

Jinliang Zheng June 23, 2024, 12:31 p.m. UTC
From: Jinliang Zheng <alexjlzheng@tencent.com>

In the current implementation, in most cases, the memory of xfs_log_vec
and xfs_log_iovec is allocated together. Therefore the life cycle of
xfs_log_iovec has to remain the same as xfs_log_vec.

But this is not necessary. When the content in xfs_log_iovec is written
to iclog by xlog_write(), it no longer needs to exist in the memory. But
xfs_log_vec is still useful, because after we flush the iclog into the
disk log space, we need to find the corresponding xfs_log_item through
the xfs_log_vec->lv_item field and add it to AIL.

This patch separates the memory allocation of xfs_log_iovec from
xfs_log_vec, and releases the memory of xfs_log_iovec in advance after
the content in xfs_log_iovec is written to iclog.

Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com>
---
 fs/xfs/xfs_log.c     |  2 ++
 fs/xfs/xfs_log.h     |  8 ++++++--
 fs/xfs/xfs_log_cil.c | 26 ++++++++++++++++----------
 3 files changed, 24 insertions(+), 12 deletions(-)

Comments

Darrick J. Wong June 24, 2024, 3:25 p.m. UTC | #1
On Sun, Jun 23, 2024 at 08:31:19PM +0800, alexjlzheng@gmail.com wrote:
> From: Jinliang Zheng <alexjlzheng@tencent.com>
> 
> In the current implementation, in most cases, the memory of xfs_log_vec
> and xfs_log_iovec is allocated together. Therefore the life cycle of
> xfs_log_iovec has to remain the same as xfs_log_vec.
> 
> But this is not necessary. When the content in xfs_log_iovec is written
> to iclog by xlog_write(), it no longer needs to exist in the memory. But
> xfs_log_vec is still useful, because after we flush the iclog into the
> disk log space, we need to find the corresponding xfs_log_item through
> the xfs_log_vec->lv_item field and add it to AIL.
> 
> This patch separates the memory allocation of xfs_log_iovec from
> xfs_log_vec, and releases the memory of xfs_log_iovec in advance after
> the content in xfs_log_iovec is written to iclog.

Why would anyone care?  This makes lifecycle reasoning more complicated
but no justification is provided.

--D

> Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com>
> ---
>  fs/xfs/xfs_log.c     |  2 ++
>  fs/xfs/xfs_log.h     |  8 ++++++--
>  fs/xfs/xfs_log_cil.c | 26 ++++++++++++++++----------
>  3 files changed, 24 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index 416c15494983..f7af9550c17b 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -2526,6 +2526,8 @@ xlog_write(
>  			xlog_write_full(lv, ticket, iclog, &log_offset,
>  					 &len, &record_cnt, &data_cnt);
>  		}
> +		if (lv->lv_flags & XFS_LOG_VEC_DYNAMIC)
> +			kvfree(lv->lv_iovecp);
>  	}
>  	ASSERT(len == 0);
>  
> diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
> index d69acf881153..f052c7fdb3e9 100644
> --- a/fs/xfs/xfs_log.h
> +++ b/fs/xfs/xfs_log.h
> @@ -6,6 +6,8 @@
>  #ifndef	__XFS_LOG_H__
>  #define __XFS_LOG_H__
>  
> +#define XFS_LOG_VEC_DYNAMIC	(1 << 0)
> +
>  struct xfs_cil_ctx;
>  
>  struct xfs_log_vec {
> @@ -17,7 +19,8 @@ struct xfs_log_vec {
>  	char			*lv_buf;	/* formatted buffer */
>  	int			lv_bytes;	/* accounted space in buffer */
>  	int			lv_buf_len;	/* aligned size of buffer */
> -	int			lv_size;	/* size of allocated lv */
> +	int			lv_size;	/* size of allocated iovec + buffer */
> +	int			lv_flags;	/* lv flags */
>  };
>  
>  #define XFS_LOG_VEC_ORDERED	(-1)
> @@ -40,6 +43,7 @@ static inline void
>  xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec,
>  		int data_len)
>  {
> +	struct xfs_log_iovec	*lvec = lv->lv_iovecp;
>  	struct xlog_op_header	*oph = vec->i_addr;
>  	int			len;
>  
> @@ -69,7 +73,7 @@ xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec,
>  	vec->i_len = len;
>  
>  	/* Catch buffer overruns */
> -	ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lv + lv->lv_size);
> +	ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lvec + lv->lv_size);
>  }
>  
>  /*
> diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
> index f51cbc6405c1..3be9f86ce655 100644
> --- a/fs/xfs/xfs_log_cil.c
> +++ b/fs/xfs/xfs_log_cil.c
> @@ -219,8 +219,7 @@ static inline int
>  xlog_cil_iovec_space(
>  	uint	niovecs)
>  {
> -	return round_up((sizeof(struct xfs_log_vec) +
> -					niovecs * sizeof(struct xfs_log_iovec)),
> +	return round_up(niovecs * sizeof(struct xfs_log_iovec),
>  			sizeof(uint64_t));
>  }
>  
> @@ -279,6 +278,7 @@ xlog_cil_alloc_shadow_bufs(
>  
>  	list_for_each_entry(lip, &tp->t_items, li_trans) {
>  		struct xfs_log_vec *lv;
> +		struct xfs_log_iovec *lvec;
>  		int	niovecs = 0;
>  		int	nbytes = 0;
>  		int	buf_size;
> @@ -339,18 +339,23 @@ xlog_cil_alloc_shadow_bufs(
>  			 * the buffer, only the log vector header and the iovec
>  			 * storage.
>  			 */
> -			kvfree(lip->li_lv_shadow);
> -			lv = xlog_kvmalloc(buf_size);
> -
> -			memset(lv, 0, xlog_cil_iovec_space(niovecs));
> +			if (lip->li_lv_shadow) {
> +				kvfree(lip->li_lv_shadow->lv_iovecp);
> +				kvfree(lip->li_lv_shadow);
> +			}
> +			lv = xlog_kvmalloc(sizeof(struct xfs_log_vec));
> +			memset(lv, 0, sizeof(struct xfs_log_vec));
> +			lvec = xlog_kvmalloc(buf_size);
> +			memset(lvec, 0, xlog_cil_iovec_space(niovecs));
>  
> +			lv->lv_flags |= XFS_LOG_VEC_DYNAMIC;
>  			INIT_LIST_HEAD(&lv->lv_list);
>  			lv->lv_item = lip;
>  			lv->lv_size = buf_size;
>  			if (ordered)
>  				lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
>  			else
> -				lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
> +				lv->lv_iovecp = lvec;
>  			lip->li_lv_shadow = lv;
>  		} else {
>  			/* same or smaller, optimise common overwrite case */
> @@ -366,9 +371,9 @@ xlog_cil_alloc_shadow_bufs(
>  		lv->lv_niovecs = niovecs;
>  
>  		/* The allocated data region lies beyond the iovec region */
> -		lv->lv_buf = (char *)lv + xlog_cil_iovec_space(niovecs);
> +		lv->lv_buf = (char *)lv->lv_iovecp +
> +				xlog_cil_iovec_space(niovecs);
>  	}
> -
>  }
>  
>  /*
> @@ -502,7 +507,7 @@ xlog_cil_insert_format_items(
>  			/* reset the lv buffer information for new formatting */
>  			lv->lv_buf_len = 0;
>  			lv->lv_bytes = 0;
> -			lv->lv_buf = (char *)lv +
> +			lv->lv_buf = (char *)lv->lv_iovecp +
>  					xlog_cil_iovec_space(lv->lv_niovecs);
>  		} else {
>  			/* switch to shadow buffer! */
> @@ -1544,6 +1549,7 @@ xlog_cil_process_intents(
>  		set_bit(XFS_LI_WHITEOUT, &ilip->li_flags);
>  		trace_xfs_cil_whiteout_mark(ilip);
>  		len += ilip->li_lv->lv_bytes;
> +		kvfree(ilip->li_lv->lv_iovecp);
>  		kvfree(ilip->li_lv);
>  		ilip->li_lv = NULL;
>  
> -- 
> 2.39.3
> 
>
Jinliang Zheng June 24, 2024, 4:06 p.m. UTC | #2
On Mon, 24 Jun 2024 08:25:29 -0700, djwong@kernel.org wrote:
> On Sun, Jun 23, 2024 at 08:31:19PM +0800, alexjlzheng@gmail.com wrote:
> > From: Jinliang Zheng <alexjlzheng@tencent.com>
> > 
> > In the current implementation, in most cases, the memory of xfs_log_vec
> > and xfs_log_iovec is allocated together. Therefore the life cycle of
> > xfs_log_iovec has to remain the same as xfs_log_vec.
> > 
> > But this is not necessary. When the content in xfs_log_iovec is written
> > to iclog by xlog_write(), it no longer needs to exist in the memory. But
> > xfs_log_vec is still useful, because after we flush the iclog into the
> > disk log space, we need to find the corresponding xfs_log_item through
> > the xfs_log_vec->lv_item field and add it to AIL.
> > 
> > This patch separates the memory allocation of xfs_log_iovec from
> > xfs_log_vec, and releases the memory of xfs_log_iovec in advance after
> > the content in xfs_log_iovec is written to iclog.
> 
> Why would anyone care?  This makes lifecycle reasoning more complicated
> but no justification is provided.

xfs_log_iovec is where all log data is saved. Compared to xfs_log_vec itself,
xfs_log_iovec occupies a larger memory space.

When their memory spaces are allocated together, the memory occupied by
xfs_log_iovec can only be released after iclog is written to the disk log
space. But when xfs_log_iovec is written to iclog, its existence becomes
meaningless, because a copy of its content is already saved in iclog at this
time.

And if they are separated, we can release its memory when the data in
xfs_log_iovec is written to iclog. The interval between these two time points
is not too small.

Since xfs_log_iovec is the area that currently uses the most memory in
xfs_log_vec, this means that we have released quite a lot of memory. Freeing
memory that occupies a larger size earlier means smaller memory usage.

Jinliang Zheng

> 
> --D
> 
> > Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com>
> > ---
> >  fs/xfs/xfs_log.c     |  2 ++
> >  fs/xfs/xfs_log.h     |  8 ++++++--
> >  fs/xfs/xfs_log_cil.c | 26 ++++++++++++++++----------
> >  3 files changed, 24 insertions(+), 12 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> > index 416c15494983..f7af9550c17b 100644
> > --- a/fs/xfs/xfs_log.c
> > +++ b/fs/xfs/xfs_log.c
> > @@ -2526,6 +2526,8 @@ xlog_write(
> >  			xlog_write_full(lv, ticket, iclog, &log_offset,
> >  					 &len, &record_cnt, &data_cnt);
> >  		}
> > +		if (lv->lv_flags & XFS_LOG_VEC_DYNAMIC)
> > +			kvfree(lv->lv_iovecp);
> >  	}
> >  	ASSERT(len == 0);
> >  
> > diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
> > index d69acf881153..f052c7fdb3e9 100644
> > --- a/fs/xfs/xfs_log.h
> > +++ b/fs/xfs/xfs_log.h
> > @@ -6,6 +6,8 @@
> >  #ifndef	__XFS_LOG_H__
> >  #define __XFS_LOG_H__
> >  
> > +#define XFS_LOG_VEC_DYNAMIC	(1 << 0)
> > +
> >  struct xfs_cil_ctx;
> >  
> >  struct xfs_log_vec {
> > @@ -17,7 +19,8 @@ struct xfs_log_vec {
> >  	char			*lv_buf;	/* formatted buffer */
> >  	int			lv_bytes;	/* accounted space in buffer */
> >  	int			lv_buf_len;	/* aligned size of buffer */
> > -	int			lv_size;	/* size of allocated lv */
> > +	int			lv_size;	/* size of allocated iovec + buffer */
> > +	int			lv_flags;	/* lv flags */
> >  };
> >  
> >  #define XFS_LOG_VEC_ORDERED	(-1)
> > @@ -40,6 +43,7 @@ static inline void
> >  xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec,
> >  		int data_len)
> >  {
> > +	struct xfs_log_iovec	*lvec = lv->lv_iovecp;
> >  	struct xlog_op_header	*oph = vec->i_addr;
> >  	int			len;
> >  
> > @@ -69,7 +73,7 @@ xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec,
> >  	vec->i_len = len;
> >  
> >  	/* Catch buffer overruns */
> > -	ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lv + lv->lv_size);
> > +	ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lvec + lv->lv_size);
> >  }
> >  
> >  /*
> > diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
> > index f51cbc6405c1..3be9f86ce655 100644
> > --- a/fs/xfs/xfs_log_cil.c
> > +++ b/fs/xfs/xfs_log_cil.c
> > @@ -219,8 +219,7 @@ static inline int
> >  xlog_cil_iovec_space(
> >  	uint	niovecs)
> >  {
> > -	return round_up((sizeof(struct xfs_log_vec) +
> > -					niovecs * sizeof(struct xfs_log_iovec)),
> > +	return round_up(niovecs * sizeof(struct xfs_log_iovec),
> >  			sizeof(uint64_t));
> >  }
> >  
> > @@ -279,6 +278,7 @@ xlog_cil_alloc_shadow_bufs(
> >  
> >  	list_for_each_entry(lip, &tp->t_items, li_trans) {
> >  		struct xfs_log_vec *lv;
> > +		struct xfs_log_iovec *lvec;
> >  		int	niovecs = 0;
> >  		int	nbytes = 0;
> >  		int	buf_size;
> > @@ -339,18 +339,23 @@ xlog_cil_alloc_shadow_bufs(
> >  			 * the buffer, only the log vector header and the iovec
> >  			 * storage.
> >  			 */
> > -			kvfree(lip->li_lv_shadow);
> > -			lv = xlog_kvmalloc(buf_size);
> > -
> > -			memset(lv, 0, xlog_cil_iovec_space(niovecs));
> > +			if (lip->li_lv_shadow) {
> > +				kvfree(lip->li_lv_shadow->lv_iovecp);
> > +				kvfree(lip->li_lv_shadow);
> > +			}
> > +			lv = xlog_kvmalloc(sizeof(struct xfs_log_vec));
> > +			memset(lv, 0, sizeof(struct xfs_log_vec));
> > +			lvec = xlog_kvmalloc(buf_size);
> > +			memset(lvec, 0, xlog_cil_iovec_space(niovecs));
> >  
> > +			lv->lv_flags |= XFS_LOG_VEC_DYNAMIC;
> >  			INIT_LIST_HEAD(&lv->lv_list);
> >  			lv->lv_item = lip;
> >  			lv->lv_size = buf_size;
> >  			if (ordered)
> >  				lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
> >  			else
> > -				lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
> > +				lv->lv_iovecp = lvec;
> >  			lip->li_lv_shadow = lv;
> >  		} else {
> >  			/* same or smaller, optimise common overwrite case */
> > @@ -366,9 +371,9 @@ xlog_cil_alloc_shadow_bufs(
> >  		lv->lv_niovecs = niovecs;
> >  
> >  		/* The allocated data region lies beyond the iovec region */
> > -		lv->lv_buf = (char *)lv + xlog_cil_iovec_space(niovecs);
> > +		lv->lv_buf = (char *)lv->lv_iovecp +
> > +				xlog_cil_iovec_space(niovecs);
> >  	}
> > -
> >  }
> >  
> >  /*
> > @@ -502,7 +507,7 @@ xlog_cil_insert_format_items(
> >  			/* reset the lv buffer information for new formatting */
> >  			lv->lv_buf_len = 0;
> >  			lv->lv_bytes = 0;
> > -			lv->lv_buf = (char *)lv +
> > +			lv->lv_buf = (char *)lv->lv_iovecp +
> >  					xlog_cil_iovec_space(lv->lv_niovecs);
> >  		} else {
> >  			/* switch to shadow buffer! */
> > @@ -1544,6 +1549,7 @@ xlog_cil_process_intents(
> >  		set_bit(XFS_LI_WHITEOUT, &ilip->li_flags);
> >  		trace_xfs_cil_whiteout_mark(ilip);
> >  		len += ilip->li_lv->lv_bytes;
> > +		kvfree(ilip->li_lv->lv_iovecp);
> >  		kvfree(ilip->li_lv);
> >  		ilip->li_lv = NULL;
> >  
> > -- 
> > 2.39.3
> > 
> >
Christoph Hellwig June 25, 2024, 11:14 a.m. UTC | #3
On Tue, Jun 25, 2024 at 12:06:14AM +0800, Jinliang Zheng wrote:
> xfs_log_iovec is where all log data is saved. Compared to xfs_log_vec itself,
> xfs_log_iovec occupies a larger memory space.
> 
> When their memory spaces are allocated together, the memory occupied by
> xfs_log_iovec can only be released after iclog is written to the disk log
> space. But when xfs_log_iovec is written to iclog, its existence becomes
> meaningless, because a copy of its content is already saved in iclog at this
> time.
> 
> And if they are separated, we can release its memory when the data in
> xfs_log_iovec is written to iclog. The interval between these two time points
> is not too small.
> 
> Since xfs_log_iovec is the area that currently uses the most memory in
> xfs_log_vec, this means that we have released quite a lot of memory. Freeing
> memory that occupies a larger size earlier means smaller memory usage.

This all needs to go into the commit log.  Preferably including the
actual quantity of memory saved for a useful workload.
Jinliang Zheng June 25, 2024, 11:24 a.m. UTC | #4
On Tue, 25 Jun 2024 04:14:55 -0700, Christoph Hellwig wrote:
> On Tue, Jun 25, 2024 at 12:06:14AM +0800, Jinliang Zheng wrote:
> > xfs_log_iovec is where all log data is saved. Compared to xfs_log_vec itself,
> > xfs_log_iovec occupies a larger memory space.
> > 
> > When their memory spaces are allocated together, the memory occupied by
> > xfs_log_iovec can only be released after iclog is written to the disk log
> > space. But when xfs_log_iovec is written to iclog, its existence becomes
> > meaningless, because a copy of its content is already saved in iclog at this
> > time.
> > 
> > And if they are separated, we can release its memory when the data in
> > xfs_log_iovec is written to iclog. The interval between these two time points
> > is not too small.
> > 
> > Since xfs_log_iovec is the area that currently uses the most memory in
> > xfs_log_vec, this means that we have released quite a lot of memory. Freeing
> > memory that occupies a larger size earlier means smaller memory usage.
> 
> This all needs to go into the commit log.  Preferably including the
> actual quantity of memory saved for a useful workload.

I am sorry, but I didn't get your point. May I ask if you could clarify your
viewpoint more clearly?

Thank you very much. :)
Jinliang Zheng
Christoph Hellwig June 25, 2024, 11:33 a.m. UTC | #5
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -2526,6 +2526,8 @@ xlog_write(
>  			xlog_write_full(lv, ticket, iclog, &log_offset,
>  					 &len, &record_cnt, &data_cnt);
>  		}
> +		if (lv->lv_flags & XFS_LOG_VEC_DYNAMIC)
> +			kvfree(lv->lv_iovecp);

This should porbably be a function paramter to xlog_write, with
xlog_cil_write_chain asking for the iovecs to be freed because they
are dynamically allocated, and the other two not becaue the iovecs
are on-stack.  With that we don't need to grow a new field in
struct xfs_log_vec.

>  	list_for_each_entry(lip, &tp->t_items, li_trans) {
>  		struct xfs_log_vec *lv;
> +		struct xfs_log_iovec *lvec;
>  		int	niovecs = 0;
>  		int	nbytes = 0;
>  		int	buf_size;
> @@ -339,18 +339,23 @@ xlog_cil_alloc_shadow_bufs(
>  			 * the buffer, only the log vector header and the iovec
>  			 * storage.
>  			 */
> -			kvfree(lip->li_lv_shadow);
> -			lv = xlog_kvmalloc(buf_size);
> -
> -			memset(lv, 0, xlog_cil_iovec_space(niovecs));
> +			if (lip->li_lv_shadow) {
> +				kvfree(lip->li_lv_shadow->lv_iovecp);
> +				kvfree(lip->li_lv_shadow);
> +			}
> +			lv = xlog_kvmalloc(sizeof(struct xfs_log_vec));
> +			memset(lv, 0, sizeof(struct xfs_log_vec));
> +			lvec = xlog_kvmalloc(buf_size);
> +			memset(lvec, 0, xlog_cil_iovec_space(niovecs));

This area can use quite a bit of a redo.  The xfs_log_vec is tiny,
so it doesn't really need a vmalloc fallback but can simply use
kmalloc.

But more importantly there is no need to really it, you just
need to allocate it.  So this should probably become:

	lv = lip->li_lv_shadow;
	if (!lv) {
		/* kmalloc and initialize, set lv_size to zero */
	}

	if (buf_size > lv->lv_size) {
		/* grow case that rallocates ->lv_iovecp */
	} else {
		/* same or smaller, optimise common overwrite case */
		..
	}
Christoph Hellwig June 25, 2024, 11:34 a.m. UTC | #6
On Tue, Jun 25, 2024 at 07:24:38PM +0800, alexjlzheng@gmail.com wrote:
> I am sorry, but I didn't get your point. May I ask if you could clarify your
> viewpoint more clearly?

The explanation you gave in reply to Darrick should be covered in
the commit log for the patch.  The commit log should also contain
measurement of how much memory this saves and explain why this is a
good trafeoff vs the extra memory allocation.
kernel test robot June 25, 2024, 9:56 p.m. UTC | #7
Hi,

kernel test robot noticed the following build warnings:

[auto build test WARNING on xfs-linux/for-next]
[also build test WARNING on linus/master v6.10-rc5 next-20240625]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/alexjlzheng-gmail-com/xfs-make-xfs_log_iovec-independent-from-xfs_log_vec-and-release-it-early/20240625-192710
base:   https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git for-next
patch link:    https://lore.kernel.org/r/20240623123119.3562031-1-alexjlzheng%40tencent.com
patch subject: [PATCH] xfs: make xfs_log_iovec independent from xfs_log_vec and release it early
config: i386-buildonly-randconfig-004-20240626 (https://download.01.org/0day-ci/archive/20240626/202406260523.tGxY7QOx-lkp@intel.com/config)
compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240626/202406260523.tGxY7QOx-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202406260523.tGxY7QOx-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from fs/xfs/libxfs/xfs_da_btree.c:24:
   fs/xfs/xfs_log.h: In function 'xlog_finish_iovec':
>> fs/xfs/xfs_log.h:46:34: warning: unused variable 'lvec' [-Wunused-variable]
      46 |         struct xfs_log_iovec    *lvec = lv->lv_iovecp;
         |                                  ^~~~


vim +/lvec +46 fs/xfs/xfs_log.h

    38	
    39	void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
    40			uint type);
    41	
    42	static inline void
    43	xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec,
    44			int data_len)
    45	{
  > 46		struct xfs_log_iovec	*lvec = lv->lv_iovecp;
    47		struct xlog_op_header	*oph = vec->i_addr;
    48		int			len;
    49	
    50		/*
    51		 * Always round up the length to the correct alignment so callers don't
    52		 * need to know anything about this log vec layout requirement. This
    53		 * means we have to zero the area the data to be written does not cover.
    54		 * This is complicated by fact the payload region is offset into the
    55		 * logvec region by the opheader that tracks the payload.
    56		 */
    57		len = xlog_calc_iovec_len(data_len);
    58		if (len - data_len != 0) {
    59			char	*buf = vec->i_addr + sizeof(struct xlog_op_header);
    60	
    61			memset(buf + data_len, 0, len - data_len);
    62		}
    63	
    64		/*
    65		 * The opheader tracks aligned payload length, whilst the logvec tracks
    66		 * the overall region length.
    67		 */
    68		oph->oh_len = cpu_to_be32(len);
    69	
    70		len += sizeof(struct xlog_op_header);
    71		lv->lv_buf_len += len;
    72		lv->lv_bytes += len;
    73		vec->i_len = len;
    74	
    75		/* Catch buffer overruns */
    76		ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lvec + lv->lv_size);
    77	}
    78
Jinliang Zheng June 26, 2024, 5:07 a.m. UTC | #8
On Tue, 25 Jun 2024 04:33:00 -0700, Christoph Hellwig wrote:
> > --- a/fs/xfs/xfs_log.c
> > +++ b/fs/xfs/xfs_log.c
> > @@ -2526,6 +2526,8 @@ xlog_write(
> >  			xlog_write_full(lv, ticket, iclog, &log_offset,
> >  					 &len, &record_cnt, &data_cnt);
> >  		}
> > +		if (lv->lv_flags & XFS_LOG_VEC_DYNAMIC)
> > +			kvfree(lv->lv_iovecp);
> 
> This should porbably be a function paramter to xlog_write, with
> xlog_cil_write_chain asking for the iovecs to be freed because they
> are dynamically allocated, and the other two not becaue the iovecs
> are on-stack.  With that we don't need to grow a new field in
> struct xfs_log_vec.

xlog_write() will write all xfs_log_iovec on the lv chain linked list to iclog.
We seem to have no way to distinguish whether the xfs_log_iovec on the lv_chain
list is on the stack by adding new parameters to xlog_write().

> 
> >  	list_for_each_entry(lip, &tp->t_items, li_trans) {
> >  		struct xfs_log_vec *lv;
> > +		struct xfs_log_iovec *lvec;
> >  		int	niovecs = 0;
> >  		int	nbytes = 0;
> >  		int	buf_size;
> > @@ -339,18 +339,23 @@ xlog_cil_alloc_shadow_bufs(
> >  			 * the buffer, only the log vector header and the iovec
> >  			 * storage.
> >  			 */
> > -			kvfree(lip->li_lv_shadow);
> > -			lv = xlog_kvmalloc(buf_size);
> > -
> > -			memset(lv, 0, xlog_cil_iovec_space(niovecs));
> > +			if (lip->li_lv_shadow) {
> > +				kvfree(lip->li_lv_shadow->lv_iovecp);
> > +				kvfree(lip->li_lv_shadow);
> > +			}
> > +			lv = xlog_kvmalloc(sizeof(struct xfs_log_vec));
> > +			memset(lv, 0, sizeof(struct xfs_log_vec));
> > +			lvec = xlog_kvmalloc(buf_size);
> > +			memset(lvec, 0, xlog_cil_iovec_space(niovecs));
> 
> This area can use quite a bit of a redo.  The xfs_log_vec is tiny,
> so it doesn't really need a vmalloc fallback but can simply use
> kmalloc.
> 
> But more importantly there is no need to really it, you just
> need to allocate it.  So this should probably become:
> 
> 	lv = lip->li_lv_shadow;
> 	if (!lv) {
> 		/* kmalloc and initialize, set lv_size to zero */
> 	}
> 
> 	if (buf_size > lv->lv_size) {
> 		/* grow case that rallocates ->lv_iovecp */
> 	} else {
> 		/* same or smaller, optimise common overwrite case */
> 		..
> 	}

If we take the memory allocation of xfs_log_vec out of the if branch below, we
have to face the corner case of buf_size = 0.

But the release and reallocation of xfs_log_vec in this patch is indeed
redundant. I've optimized it in [PATCH v2] and [PATCH v3].

Link to [PATCH v3]:
- https://lore.kernel.org/linux-xfs/20240626044909.15060-1-alexjlzheng@tencent.com/T/#t

Thank you for your suggestion. :)
Jinliang Zheng
diff mbox series

Patch

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 416c15494983..f7af9550c17b 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2526,6 +2526,8 @@  xlog_write(
 			xlog_write_full(lv, ticket, iclog, &log_offset,
 					 &len, &record_cnt, &data_cnt);
 		}
+		if (lv->lv_flags & XFS_LOG_VEC_DYNAMIC)
+			kvfree(lv->lv_iovecp);
 	}
 	ASSERT(len == 0);
 
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index d69acf881153..f052c7fdb3e9 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -6,6 +6,8 @@ 
 #ifndef	__XFS_LOG_H__
 #define __XFS_LOG_H__
 
+#define XFS_LOG_VEC_DYNAMIC	(1 << 0)
+
 struct xfs_cil_ctx;
 
 struct xfs_log_vec {
@@ -17,7 +19,8 @@  struct xfs_log_vec {
 	char			*lv_buf;	/* formatted buffer */
 	int			lv_bytes;	/* accounted space in buffer */
 	int			lv_buf_len;	/* aligned size of buffer */
-	int			lv_size;	/* size of allocated lv */
+	int			lv_size;	/* size of allocated iovec + buffer */
+	int			lv_flags;	/* lv flags */
 };
 
 #define XFS_LOG_VEC_ORDERED	(-1)
@@ -40,6 +43,7 @@  static inline void
 xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec,
 		int data_len)
 {
+	struct xfs_log_iovec	*lvec = lv->lv_iovecp;
 	struct xlog_op_header	*oph = vec->i_addr;
 	int			len;
 
@@ -69,7 +73,7 @@  xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec,
 	vec->i_len = len;
 
 	/* Catch buffer overruns */
-	ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lv + lv->lv_size);
+	ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lvec + lv->lv_size);
 }
 
 /*
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index f51cbc6405c1..3be9f86ce655 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -219,8 +219,7 @@  static inline int
 xlog_cil_iovec_space(
 	uint	niovecs)
 {
-	return round_up((sizeof(struct xfs_log_vec) +
-					niovecs * sizeof(struct xfs_log_iovec)),
+	return round_up(niovecs * sizeof(struct xfs_log_iovec),
 			sizeof(uint64_t));
 }
 
@@ -279,6 +278,7 @@  xlog_cil_alloc_shadow_bufs(
 
 	list_for_each_entry(lip, &tp->t_items, li_trans) {
 		struct xfs_log_vec *lv;
+		struct xfs_log_iovec *lvec;
 		int	niovecs = 0;
 		int	nbytes = 0;
 		int	buf_size;
@@ -339,18 +339,23 @@  xlog_cil_alloc_shadow_bufs(
 			 * the buffer, only the log vector header and the iovec
 			 * storage.
 			 */
-			kvfree(lip->li_lv_shadow);
-			lv = xlog_kvmalloc(buf_size);
-
-			memset(lv, 0, xlog_cil_iovec_space(niovecs));
+			if (lip->li_lv_shadow) {
+				kvfree(lip->li_lv_shadow->lv_iovecp);
+				kvfree(lip->li_lv_shadow);
+			}
+			lv = xlog_kvmalloc(sizeof(struct xfs_log_vec));
+			memset(lv, 0, sizeof(struct xfs_log_vec));
+			lvec = xlog_kvmalloc(buf_size);
+			memset(lvec, 0, xlog_cil_iovec_space(niovecs));
 
+			lv->lv_flags |= XFS_LOG_VEC_DYNAMIC;
 			INIT_LIST_HEAD(&lv->lv_list);
 			lv->lv_item = lip;
 			lv->lv_size = buf_size;
 			if (ordered)
 				lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
 			else
-				lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
+				lv->lv_iovecp = lvec;
 			lip->li_lv_shadow = lv;
 		} else {
 			/* same or smaller, optimise common overwrite case */
@@ -366,9 +371,9 @@  xlog_cil_alloc_shadow_bufs(
 		lv->lv_niovecs = niovecs;
 
 		/* The allocated data region lies beyond the iovec region */
-		lv->lv_buf = (char *)lv + xlog_cil_iovec_space(niovecs);
+		lv->lv_buf = (char *)lv->lv_iovecp +
+				xlog_cil_iovec_space(niovecs);
 	}
-
 }
 
 /*
@@ -502,7 +507,7 @@  xlog_cil_insert_format_items(
 			/* reset the lv buffer information for new formatting */
 			lv->lv_buf_len = 0;
 			lv->lv_bytes = 0;
-			lv->lv_buf = (char *)lv +
+			lv->lv_buf = (char *)lv->lv_iovecp +
 					xlog_cil_iovec_space(lv->lv_niovecs);
 		} else {
 			/* switch to shadow buffer! */
@@ -1544,6 +1549,7 @@  xlog_cil_process_intents(
 		set_bit(XFS_LI_WHITEOUT, &ilip->li_flags);
 		trace_xfs_cil_whiteout_mark(ilip);
 		len += ilip->li_lv->lv_bytes;
+		kvfree(ilip->li_lv->lv_iovecp);
 		kvfree(ilip->li_lv);
 		ilip->li_lv = NULL;