Message ID | 20240623123119.3562031-1-alexjlzheng@tencent.com (mailing list archive) |
---|---|
State | Accepted, archived |
Headers | show |
Series | xfs: make xfs_log_iovec independent from xfs_log_vec and release it early | expand |
On Sun, Jun 23, 2024 at 08:31:19PM +0800, alexjlzheng@gmail.com wrote: > From: Jinliang Zheng <alexjlzheng@tencent.com> > > In the current implementation, in most cases, the memory of xfs_log_vec > and xfs_log_iovec is allocated together. Therefore the life cycle of > xfs_log_iovec has to remain the same as xfs_log_vec. > > But this is not necessary. When the content in xfs_log_iovec is written > to iclog by xlog_write(), it no longer needs to exist in the memory. But > xfs_log_vec is still useful, because after we flush the iclog into the > disk log space, we need to find the corresponding xfs_log_item through > the xfs_log_vec->lv_item field and add it to AIL. > > This patch separates the memory allocation of xfs_log_iovec from > xfs_log_vec, and releases the memory of xfs_log_iovec in advance after > the content in xfs_log_iovec is written to iclog. Why would anyone care? This makes lifecycle reasoning more complicated but no justification is provided. --D > Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com> > --- > fs/xfs/xfs_log.c | 2 ++ > fs/xfs/xfs_log.h | 8 ++++++-- > fs/xfs/xfs_log_cil.c | 26 ++++++++++++++++---------- > 3 files changed, 24 insertions(+), 12 deletions(-) > > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c > index 416c15494983..f7af9550c17b 100644 > --- a/fs/xfs/xfs_log.c > +++ b/fs/xfs/xfs_log.c > @@ -2526,6 +2526,8 @@ xlog_write( > xlog_write_full(lv, ticket, iclog, &log_offset, > &len, &record_cnt, &data_cnt); > } > + if (lv->lv_flags & XFS_LOG_VEC_DYNAMIC) > + kvfree(lv->lv_iovecp); > } > ASSERT(len == 0); > > diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h > index d69acf881153..f052c7fdb3e9 100644 > --- a/fs/xfs/xfs_log.h > +++ b/fs/xfs/xfs_log.h > @@ -6,6 +6,8 @@ > #ifndef __XFS_LOG_H__ > #define __XFS_LOG_H__ > > +#define XFS_LOG_VEC_DYNAMIC (1 << 0) > + > struct xfs_cil_ctx; > > struct xfs_log_vec { > @@ -17,7 +19,8 @@ struct xfs_log_vec { > char *lv_buf; /* formatted buffer */ > int lv_bytes; /* accounted space in buffer */ > int lv_buf_len; /* aligned size of buffer */ > - int lv_size; /* size of allocated lv */ > + int lv_size; /* size of allocated iovec + buffer */ > + int lv_flags; /* lv flags */ > }; > > #define XFS_LOG_VEC_ORDERED (-1) > @@ -40,6 +43,7 @@ static inline void > xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, > int data_len) > { > + struct xfs_log_iovec *lvec = lv->lv_iovecp; > struct xlog_op_header *oph = vec->i_addr; > int len; > > @@ -69,7 +73,7 @@ xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, > vec->i_len = len; > > /* Catch buffer overruns */ > - ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lv + lv->lv_size); > + ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lvec + lv->lv_size); > } > > /* > diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c > index f51cbc6405c1..3be9f86ce655 100644 > --- a/fs/xfs/xfs_log_cil.c > +++ b/fs/xfs/xfs_log_cil.c > @@ -219,8 +219,7 @@ static inline int > xlog_cil_iovec_space( > uint niovecs) > { > - return round_up((sizeof(struct xfs_log_vec) + > - niovecs * sizeof(struct xfs_log_iovec)), > + return round_up(niovecs * sizeof(struct xfs_log_iovec), > sizeof(uint64_t)); > } > > @@ -279,6 +278,7 @@ xlog_cil_alloc_shadow_bufs( > > list_for_each_entry(lip, &tp->t_items, li_trans) { > struct xfs_log_vec *lv; > + struct xfs_log_iovec *lvec; > int niovecs = 0; > int nbytes = 0; > int buf_size; > @@ -339,18 +339,23 @@ xlog_cil_alloc_shadow_bufs( > * the buffer, only the log vector header and the iovec > * storage. > */ > - kvfree(lip->li_lv_shadow); > - lv = xlog_kvmalloc(buf_size); > - > - memset(lv, 0, xlog_cil_iovec_space(niovecs)); > + if (lip->li_lv_shadow) { > + kvfree(lip->li_lv_shadow->lv_iovecp); > + kvfree(lip->li_lv_shadow); > + } > + lv = xlog_kvmalloc(sizeof(struct xfs_log_vec)); > + memset(lv, 0, sizeof(struct xfs_log_vec)); > + lvec = xlog_kvmalloc(buf_size); > + memset(lvec, 0, xlog_cil_iovec_space(niovecs)); > > + lv->lv_flags |= XFS_LOG_VEC_DYNAMIC; > INIT_LIST_HEAD(&lv->lv_list); > lv->lv_item = lip; > lv->lv_size = buf_size; > if (ordered) > lv->lv_buf_len = XFS_LOG_VEC_ORDERED; > else > - lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1]; > + lv->lv_iovecp = lvec; > lip->li_lv_shadow = lv; > } else { > /* same or smaller, optimise common overwrite case */ > @@ -366,9 +371,9 @@ xlog_cil_alloc_shadow_bufs( > lv->lv_niovecs = niovecs; > > /* The allocated data region lies beyond the iovec region */ > - lv->lv_buf = (char *)lv + xlog_cil_iovec_space(niovecs); > + lv->lv_buf = (char *)lv->lv_iovecp + > + xlog_cil_iovec_space(niovecs); > } > - > } > > /* > @@ -502,7 +507,7 @@ xlog_cil_insert_format_items( > /* reset the lv buffer information for new formatting */ > lv->lv_buf_len = 0; > lv->lv_bytes = 0; > - lv->lv_buf = (char *)lv + > + lv->lv_buf = (char *)lv->lv_iovecp + > xlog_cil_iovec_space(lv->lv_niovecs); > } else { > /* switch to shadow buffer! */ > @@ -1544,6 +1549,7 @@ xlog_cil_process_intents( > set_bit(XFS_LI_WHITEOUT, &ilip->li_flags); > trace_xfs_cil_whiteout_mark(ilip); > len += ilip->li_lv->lv_bytes; > + kvfree(ilip->li_lv->lv_iovecp); > kvfree(ilip->li_lv); > ilip->li_lv = NULL; > > -- > 2.39.3 > >
On Mon, 24 Jun 2024 08:25:29 -0700, djwong@kernel.org wrote: > On Sun, Jun 23, 2024 at 08:31:19PM +0800, alexjlzheng@gmail.com wrote: > > From: Jinliang Zheng <alexjlzheng@tencent.com> > > > > In the current implementation, in most cases, the memory of xfs_log_vec > > and xfs_log_iovec is allocated together. Therefore the life cycle of > > xfs_log_iovec has to remain the same as xfs_log_vec. > > > > But this is not necessary. When the content in xfs_log_iovec is written > > to iclog by xlog_write(), it no longer needs to exist in the memory. But > > xfs_log_vec is still useful, because after we flush the iclog into the > > disk log space, we need to find the corresponding xfs_log_item through > > the xfs_log_vec->lv_item field and add it to AIL. > > > > This patch separates the memory allocation of xfs_log_iovec from > > xfs_log_vec, and releases the memory of xfs_log_iovec in advance after > > the content in xfs_log_iovec is written to iclog. > > Why would anyone care? This makes lifecycle reasoning more complicated > but no justification is provided. xfs_log_iovec is where all log data is saved. Compared to xfs_log_vec itself, xfs_log_iovec occupies a larger memory space. When their memory spaces are allocated together, the memory occupied by xfs_log_iovec can only be released after iclog is written to the disk log space. But when xfs_log_iovec is written to iclog, its existence becomes meaningless, because a copy of its content is already saved in iclog at this time. And if they are separated, we can release its memory when the data in xfs_log_iovec is written to iclog. The interval between these two time points is not too small. Since xfs_log_iovec is the area that currently uses the most memory in xfs_log_vec, this means that we have released quite a lot of memory. Freeing memory that occupies a larger size earlier means smaller memory usage. Jinliang Zheng > > --D > > > Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com> > > --- > > fs/xfs/xfs_log.c | 2 ++ > > fs/xfs/xfs_log.h | 8 ++++++-- > > fs/xfs/xfs_log_cil.c | 26 ++++++++++++++++---------- > > 3 files changed, 24 insertions(+), 12 deletions(-) > > > > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c > > index 416c15494983..f7af9550c17b 100644 > > --- a/fs/xfs/xfs_log.c > > +++ b/fs/xfs/xfs_log.c > > @@ -2526,6 +2526,8 @@ xlog_write( > > xlog_write_full(lv, ticket, iclog, &log_offset, > > &len, &record_cnt, &data_cnt); > > } > > + if (lv->lv_flags & XFS_LOG_VEC_DYNAMIC) > > + kvfree(lv->lv_iovecp); > > } > > ASSERT(len == 0); > > > > diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h > > index d69acf881153..f052c7fdb3e9 100644 > > --- a/fs/xfs/xfs_log.h > > +++ b/fs/xfs/xfs_log.h > > @@ -6,6 +6,8 @@ > > #ifndef __XFS_LOG_H__ > > #define __XFS_LOG_H__ > > > > +#define XFS_LOG_VEC_DYNAMIC (1 << 0) > > + > > struct xfs_cil_ctx; > > > > struct xfs_log_vec { > > @@ -17,7 +19,8 @@ struct xfs_log_vec { > > char *lv_buf; /* formatted buffer */ > > int lv_bytes; /* accounted space in buffer */ > > int lv_buf_len; /* aligned size of buffer */ > > - int lv_size; /* size of allocated lv */ > > + int lv_size; /* size of allocated iovec + buffer */ > > + int lv_flags; /* lv flags */ > > }; > > > > #define XFS_LOG_VEC_ORDERED (-1) > > @@ -40,6 +43,7 @@ static inline void > > xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, > > int data_len) > > { > > + struct xfs_log_iovec *lvec = lv->lv_iovecp; > > struct xlog_op_header *oph = vec->i_addr; > > int len; > > > > @@ -69,7 +73,7 @@ xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, > > vec->i_len = len; > > > > /* Catch buffer overruns */ > > - ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lv + lv->lv_size); > > + ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lvec + lv->lv_size); > > } > > > > /* > > diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c > > index f51cbc6405c1..3be9f86ce655 100644 > > --- a/fs/xfs/xfs_log_cil.c > > +++ b/fs/xfs/xfs_log_cil.c > > @@ -219,8 +219,7 @@ static inline int > > xlog_cil_iovec_space( > > uint niovecs) > > { > > - return round_up((sizeof(struct xfs_log_vec) + > > - niovecs * sizeof(struct xfs_log_iovec)), > > + return round_up(niovecs * sizeof(struct xfs_log_iovec), > > sizeof(uint64_t)); > > } > > > > @@ -279,6 +278,7 @@ xlog_cil_alloc_shadow_bufs( > > > > list_for_each_entry(lip, &tp->t_items, li_trans) { > > struct xfs_log_vec *lv; > > + struct xfs_log_iovec *lvec; > > int niovecs = 0; > > int nbytes = 0; > > int buf_size; > > @@ -339,18 +339,23 @@ xlog_cil_alloc_shadow_bufs( > > * the buffer, only the log vector header and the iovec > > * storage. > > */ > > - kvfree(lip->li_lv_shadow); > > - lv = xlog_kvmalloc(buf_size); > > - > > - memset(lv, 0, xlog_cil_iovec_space(niovecs)); > > + if (lip->li_lv_shadow) { > > + kvfree(lip->li_lv_shadow->lv_iovecp); > > + kvfree(lip->li_lv_shadow); > > + } > > + lv = xlog_kvmalloc(sizeof(struct xfs_log_vec)); > > + memset(lv, 0, sizeof(struct xfs_log_vec)); > > + lvec = xlog_kvmalloc(buf_size); > > + memset(lvec, 0, xlog_cil_iovec_space(niovecs)); > > > > + lv->lv_flags |= XFS_LOG_VEC_DYNAMIC; > > INIT_LIST_HEAD(&lv->lv_list); > > lv->lv_item = lip; > > lv->lv_size = buf_size; > > if (ordered) > > lv->lv_buf_len = XFS_LOG_VEC_ORDERED; > > else > > - lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1]; > > + lv->lv_iovecp = lvec; > > lip->li_lv_shadow = lv; > > } else { > > /* same or smaller, optimise common overwrite case */ > > @@ -366,9 +371,9 @@ xlog_cil_alloc_shadow_bufs( > > lv->lv_niovecs = niovecs; > > > > /* The allocated data region lies beyond the iovec region */ > > - lv->lv_buf = (char *)lv + xlog_cil_iovec_space(niovecs); > > + lv->lv_buf = (char *)lv->lv_iovecp + > > + xlog_cil_iovec_space(niovecs); > > } > > - > > } > > > > /* > > @@ -502,7 +507,7 @@ xlog_cil_insert_format_items( > > /* reset the lv buffer information for new formatting */ > > lv->lv_buf_len = 0; > > lv->lv_bytes = 0; > > - lv->lv_buf = (char *)lv + > > + lv->lv_buf = (char *)lv->lv_iovecp + > > xlog_cil_iovec_space(lv->lv_niovecs); > > } else { > > /* switch to shadow buffer! */ > > @@ -1544,6 +1549,7 @@ xlog_cil_process_intents( > > set_bit(XFS_LI_WHITEOUT, &ilip->li_flags); > > trace_xfs_cil_whiteout_mark(ilip); > > len += ilip->li_lv->lv_bytes; > > + kvfree(ilip->li_lv->lv_iovecp); > > kvfree(ilip->li_lv); > > ilip->li_lv = NULL; > > > > -- > > 2.39.3 > > > >
On Tue, Jun 25, 2024 at 12:06:14AM +0800, Jinliang Zheng wrote: > xfs_log_iovec is where all log data is saved. Compared to xfs_log_vec itself, > xfs_log_iovec occupies a larger memory space. > > When their memory spaces are allocated together, the memory occupied by > xfs_log_iovec can only be released after iclog is written to the disk log > space. But when xfs_log_iovec is written to iclog, its existence becomes > meaningless, because a copy of its content is already saved in iclog at this > time. > > And if they are separated, we can release its memory when the data in > xfs_log_iovec is written to iclog. The interval between these two time points > is not too small. > > Since xfs_log_iovec is the area that currently uses the most memory in > xfs_log_vec, this means that we have released quite a lot of memory. Freeing > memory that occupies a larger size earlier means smaller memory usage. This all needs to go into the commit log. Preferably including the actual quantity of memory saved for a useful workload.
On Tue, 25 Jun 2024 04:14:55 -0700, Christoph Hellwig wrote: > On Tue, Jun 25, 2024 at 12:06:14AM +0800, Jinliang Zheng wrote: > > xfs_log_iovec is where all log data is saved. Compared to xfs_log_vec itself, > > xfs_log_iovec occupies a larger memory space. > > > > When their memory spaces are allocated together, the memory occupied by > > xfs_log_iovec can only be released after iclog is written to the disk log > > space. But when xfs_log_iovec is written to iclog, its existence becomes > > meaningless, because a copy of its content is already saved in iclog at this > > time. > > > > And if they are separated, we can release its memory when the data in > > xfs_log_iovec is written to iclog. The interval between these two time points > > is not too small. > > > > Since xfs_log_iovec is the area that currently uses the most memory in > > xfs_log_vec, this means that we have released quite a lot of memory. Freeing > > memory that occupies a larger size earlier means smaller memory usage. > > This all needs to go into the commit log. Preferably including the > actual quantity of memory saved for a useful workload. I am sorry, but I didn't get your point. May I ask if you could clarify your viewpoint more clearly? Thank you very much. :) Jinliang Zheng
> --- a/fs/xfs/xfs_log.c > +++ b/fs/xfs/xfs_log.c > @@ -2526,6 +2526,8 @@ xlog_write( > xlog_write_full(lv, ticket, iclog, &log_offset, > &len, &record_cnt, &data_cnt); > } > + if (lv->lv_flags & XFS_LOG_VEC_DYNAMIC) > + kvfree(lv->lv_iovecp); This should porbably be a function paramter to xlog_write, with xlog_cil_write_chain asking for the iovecs to be freed because they are dynamically allocated, and the other two not becaue the iovecs are on-stack. With that we don't need to grow a new field in struct xfs_log_vec. > list_for_each_entry(lip, &tp->t_items, li_trans) { > struct xfs_log_vec *lv; > + struct xfs_log_iovec *lvec; > int niovecs = 0; > int nbytes = 0; > int buf_size; > @@ -339,18 +339,23 @@ xlog_cil_alloc_shadow_bufs( > * the buffer, only the log vector header and the iovec > * storage. > */ > - kvfree(lip->li_lv_shadow); > - lv = xlog_kvmalloc(buf_size); > - > - memset(lv, 0, xlog_cil_iovec_space(niovecs)); > + if (lip->li_lv_shadow) { > + kvfree(lip->li_lv_shadow->lv_iovecp); > + kvfree(lip->li_lv_shadow); > + } > + lv = xlog_kvmalloc(sizeof(struct xfs_log_vec)); > + memset(lv, 0, sizeof(struct xfs_log_vec)); > + lvec = xlog_kvmalloc(buf_size); > + memset(lvec, 0, xlog_cil_iovec_space(niovecs)); This area can use quite a bit of a redo. The xfs_log_vec is tiny, so it doesn't really need a vmalloc fallback but can simply use kmalloc. But more importantly there is no need to really it, you just need to allocate it. So this should probably become: lv = lip->li_lv_shadow; if (!lv) { /* kmalloc and initialize, set lv_size to zero */ } if (buf_size > lv->lv_size) { /* grow case that rallocates ->lv_iovecp */ } else { /* same or smaller, optimise common overwrite case */ .. }
On Tue, Jun 25, 2024 at 07:24:38PM +0800, alexjlzheng@gmail.com wrote: > I am sorry, but I didn't get your point. May I ask if you could clarify your > viewpoint more clearly? The explanation you gave in reply to Darrick should be covered in the commit log for the patch. The commit log should also contain measurement of how much memory this saves and explain why this is a good trafeoff vs the extra memory allocation.
Hi, kernel test robot noticed the following build warnings: [auto build test WARNING on xfs-linux/for-next] [also build test WARNING on linus/master v6.10-rc5 next-20240625] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/alexjlzheng-gmail-com/xfs-make-xfs_log_iovec-independent-from-xfs_log_vec-and-release-it-early/20240625-192710 base: https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git for-next patch link: https://lore.kernel.org/r/20240623123119.3562031-1-alexjlzheng%40tencent.com patch subject: [PATCH] xfs: make xfs_log_iovec independent from xfs_log_vec and release it early config: i386-buildonly-randconfig-004-20240626 (https://download.01.org/0day-ci/archive/20240626/202406260523.tGxY7QOx-lkp@intel.com/config) compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240626/202406260523.tGxY7QOx-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202406260523.tGxY7QOx-lkp@intel.com/ All warnings (new ones prefixed by >>): In file included from fs/xfs/libxfs/xfs_da_btree.c:24: fs/xfs/xfs_log.h: In function 'xlog_finish_iovec': >> fs/xfs/xfs_log.h:46:34: warning: unused variable 'lvec' [-Wunused-variable] 46 | struct xfs_log_iovec *lvec = lv->lv_iovecp; | ^~~~ vim +/lvec +46 fs/xfs/xfs_log.h 38 39 void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, 40 uint type); 41 42 static inline void 43 xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, 44 int data_len) 45 { > 46 struct xfs_log_iovec *lvec = lv->lv_iovecp; 47 struct xlog_op_header *oph = vec->i_addr; 48 int len; 49 50 /* 51 * Always round up the length to the correct alignment so callers don't 52 * need to know anything about this log vec layout requirement. This 53 * means we have to zero the area the data to be written does not cover. 54 * This is complicated by fact the payload region is offset into the 55 * logvec region by the opheader that tracks the payload. 56 */ 57 len = xlog_calc_iovec_len(data_len); 58 if (len - data_len != 0) { 59 char *buf = vec->i_addr + sizeof(struct xlog_op_header); 60 61 memset(buf + data_len, 0, len - data_len); 62 } 63 64 /* 65 * The opheader tracks aligned payload length, whilst the logvec tracks 66 * the overall region length. 67 */ 68 oph->oh_len = cpu_to_be32(len); 69 70 len += sizeof(struct xlog_op_header); 71 lv->lv_buf_len += len; 72 lv->lv_bytes += len; 73 vec->i_len = len; 74 75 /* Catch buffer overruns */ 76 ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lvec + lv->lv_size); 77 } 78
On Tue, 25 Jun 2024 04:33:00 -0700, Christoph Hellwig wrote: > > --- a/fs/xfs/xfs_log.c > > +++ b/fs/xfs/xfs_log.c > > @@ -2526,6 +2526,8 @@ xlog_write( > > xlog_write_full(lv, ticket, iclog, &log_offset, > > &len, &record_cnt, &data_cnt); > > } > > + if (lv->lv_flags & XFS_LOG_VEC_DYNAMIC) > > + kvfree(lv->lv_iovecp); > > This should porbably be a function paramter to xlog_write, with > xlog_cil_write_chain asking for the iovecs to be freed because they > are dynamically allocated, and the other two not becaue the iovecs > are on-stack. With that we don't need to grow a new field in > struct xfs_log_vec. xlog_write() will write all xfs_log_iovec on the lv chain linked list to iclog. We seem to have no way to distinguish whether the xfs_log_iovec on the lv_chain list is on the stack by adding new parameters to xlog_write(). > > > list_for_each_entry(lip, &tp->t_items, li_trans) { > > struct xfs_log_vec *lv; > > + struct xfs_log_iovec *lvec; > > int niovecs = 0; > > int nbytes = 0; > > int buf_size; > > @@ -339,18 +339,23 @@ xlog_cil_alloc_shadow_bufs( > > * the buffer, only the log vector header and the iovec > > * storage. > > */ > > - kvfree(lip->li_lv_shadow); > > - lv = xlog_kvmalloc(buf_size); > > - > > - memset(lv, 0, xlog_cil_iovec_space(niovecs)); > > + if (lip->li_lv_shadow) { > > + kvfree(lip->li_lv_shadow->lv_iovecp); > > + kvfree(lip->li_lv_shadow); > > + } > > + lv = xlog_kvmalloc(sizeof(struct xfs_log_vec)); > > + memset(lv, 0, sizeof(struct xfs_log_vec)); > > + lvec = xlog_kvmalloc(buf_size); > > + memset(lvec, 0, xlog_cil_iovec_space(niovecs)); > > This area can use quite a bit of a redo. The xfs_log_vec is tiny, > so it doesn't really need a vmalloc fallback but can simply use > kmalloc. > > But more importantly there is no need to really it, you just > need to allocate it. So this should probably become: > > lv = lip->li_lv_shadow; > if (!lv) { > /* kmalloc and initialize, set lv_size to zero */ > } > > if (buf_size > lv->lv_size) { > /* grow case that rallocates ->lv_iovecp */ > } else { > /* same or smaller, optimise common overwrite case */ > .. > } If we take the memory allocation of xfs_log_vec out of the if branch below, we have to face the corner case of buf_size = 0. But the release and reallocation of xfs_log_vec in this patch is indeed redundant. I've optimized it in [PATCH v2] and [PATCH v3]. Link to [PATCH v3]: - https://lore.kernel.org/linux-xfs/20240626044909.15060-1-alexjlzheng@tencent.com/T/#t Thank you for your suggestion. :) Jinliang Zheng
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 416c15494983..f7af9550c17b 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2526,6 +2526,8 @@ xlog_write( xlog_write_full(lv, ticket, iclog, &log_offset, &len, &record_cnt, &data_cnt); } + if (lv->lv_flags & XFS_LOG_VEC_DYNAMIC) + kvfree(lv->lv_iovecp); } ASSERT(len == 0); diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index d69acf881153..f052c7fdb3e9 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -6,6 +6,8 @@ #ifndef __XFS_LOG_H__ #define __XFS_LOG_H__ +#define XFS_LOG_VEC_DYNAMIC (1 << 0) + struct xfs_cil_ctx; struct xfs_log_vec { @@ -17,7 +19,8 @@ struct xfs_log_vec { char *lv_buf; /* formatted buffer */ int lv_bytes; /* accounted space in buffer */ int lv_buf_len; /* aligned size of buffer */ - int lv_size; /* size of allocated lv */ + int lv_size; /* size of allocated iovec + buffer */ + int lv_flags; /* lv flags */ }; #define XFS_LOG_VEC_ORDERED (-1) @@ -40,6 +43,7 @@ static inline void xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int data_len) { + struct xfs_log_iovec *lvec = lv->lv_iovecp; struct xlog_op_header *oph = vec->i_addr; int len; @@ -69,7 +73,7 @@ xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, vec->i_len = len; /* Catch buffer overruns */ - ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lv + lv->lv_size); + ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lvec + lv->lv_size); } /* diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index f51cbc6405c1..3be9f86ce655 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -219,8 +219,7 @@ static inline int xlog_cil_iovec_space( uint niovecs) { - return round_up((sizeof(struct xfs_log_vec) + - niovecs * sizeof(struct xfs_log_iovec)), + return round_up(niovecs * sizeof(struct xfs_log_iovec), sizeof(uint64_t)); } @@ -279,6 +278,7 @@ xlog_cil_alloc_shadow_bufs( list_for_each_entry(lip, &tp->t_items, li_trans) { struct xfs_log_vec *lv; + struct xfs_log_iovec *lvec; int niovecs = 0; int nbytes = 0; int buf_size; @@ -339,18 +339,23 @@ xlog_cil_alloc_shadow_bufs( * the buffer, only the log vector header and the iovec * storage. */ - kvfree(lip->li_lv_shadow); - lv = xlog_kvmalloc(buf_size); - - memset(lv, 0, xlog_cil_iovec_space(niovecs)); + if (lip->li_lv_shadow) { + kvfree(lip->li_lv_shadow->lv_iovecp); + kvfree(lip->li_lv_shadow); + } + lv = xlog_kvmalloc(sizeof(struct xfs_log_vec)); + memset(lv, 0, sizeof(struct xfs_log_vec)); + lvec = xlog_kvmalloc(buf_size); + memset(lvec, 0, xlog_cil_iovec_space(niovecs)); + lv->lv_flags |= XFS_LOG_VEC_DYNAMIC; INIT_LIST_HEAD(&lv->lv_list); lv->lv_item = lip; lv->lv_size = buf_size; if (ordered) lv->lv_buf_len = XFS_LOG_VEC_ORDERED; else - lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1]; + lv->lv_iovecp = lvec; lip->li_lv_shadow = lv; } else { /* same or smaller, optimise common overwrite case */ @@ -366,9 +371,9 @@ xlog_cil_alloc_shadow_bufs( lv->lv_niovecs = niovecs; /* The allocated data region lies beyond the iovec region */ - lv->lv_buf = (char *)lv + xlog_cil_iovec_space(niovecs); + lv->lv_buf = (char *)lv->lv_iovecp + + xlog_cil_iovec_space(niovecs); } - } /* @@ -502,7 +507,7 @@ xlog_cil_insert_format_items( /* reset the lv buffer information for new formatting */ lv->lv_buf_len = 0; lv->lv_bytes = 0; - lv->lv_buf = (char *)lv + + lv->lv_buf = (char *)lv->lv_iovecp + xlog_cil_iovec_space(lv->lv_niovecs); } else { /* switch to shadow buffer! */ @@ -1544,6 +1549,7 @@ xlog_cil_process_intents( set_bit(XFS_LI_WHITEOUT, &ilip->li_flags); trace_xfs_cil_whiteout_mark(ilip); len += ilip->li_lv->lv_bytes; + kvfree(ilip->li_lv->lv_iovecp); kvfree(ilip->li_lv); ilip->li_lv = NULL;