diff mbox series

[07/13] xfs: reserve space and initialise xlog_op_header in item formatting

Message ID 20210224063459.3436852-8-david@fromorbit.com (mailing list archive)
State Superseded
Headers show
Series xfs: rewrite xlog_write() | expand

Commit Message

Dave Chinner Feb. 24, 2021, 6:34 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

Current xlog_write() adds op headers to the log manually for every
log item region that is in the vector passed to it. While
xlog_write() needs to stamp the transaction ID into the ophdr, we
already know it's length, flags, clientid, etc at CIL commit time.

This means the only time that xlog write really needs to format and
reserve space for a new ophdr is when a region is split across two
iclogs. Adding the opheader and accounting for it as part of the
normal formatted item region means we simplify the accounting
of space used by a transaction and we don't have to special case
reserving of space in for the ophdrs in xlog_write(). It also means
we can largely initialise the ophdr in transaction commit instead
of xlog_write, making the xlog_write formatting inner loop much
tighter.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_log.c     | 59 ++++++++++++++++----------------------------
 fs/xfs/xfs_log.h     | 35 ++++++++++++++++++++++----
 fs/xfs/xfs_log_cil.c | 25 ++++++++++---------
 3 files changed, 65 insertions(+), 54 deletions(-)

Comments

Christoph Hellwig Feb. 25, 2021, 6:27 p.m. UTC | #1
> +			if (optype && index) {
> +				optype &= ~XLOG_START_TRANS;
> +			} else if (partial_copy) {
>                                  ophdr = xlog_write_setup_ophdr(ptr, ticket);

This line uses whitespaces for indentation, we should probably fix that
up somewhere in the series.

>  static inline void *
>  xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
>  		uint type)
>  {
> -	struct xfs_log_iovec *vec = *vecp;
> +	struct xfs_log_iovec	*vec = *vecp;
> +	struct xlog_op_header	*oph;
> +	uint32_t		len;
> +	void			*buf;
>  
>  	if (vec) {
>  		ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs);
> @@ -44,21 +54,36 @@ xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
>  		vec = &lv->lv_iovecp[0];
>  	}
>  
> -	if (!IS_ALIGNED(lv->lv_buf_len, sizeof(uint64_t)))
> -		lv->lv_buf_len = round_up(lv->lv_buf_len, sizeof(uint64_t));
> +	len = lv->lv_buf_len + sizeof(struct xlog_op_header);
> +	if (!IS_ALIGNED(len, sizeof(uint64_t))) {
> +		lv->lv_buf_len = round_up(len, sizeof(uint64_t)) -
> +					sizeof(struct xlog_op_header);
> +	}
>  
>  	vec->i_type = type;
>  	vec->i_addr = lv->lv_buf + lv->lv_buf_len;
>  
> -	ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t)));
> +	oph = vec->i_addr;
> +	oph->oh_clientid = XFS_TRANSACTION;
> +	oph->oh_res2 = 0;
> +	oph->oh_flags = 0;
> +
> +	buf = vec->i_addr + sizeof(struct xlog_op_header);
> +	ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t)));
>  
>  	*vecp = vec;
> -	return vec->i_addr;
> +	return buf;
>  }

I think this function is growing a little too larger to stay inlined.

> -		nbytes += niovecs * sizeof(uint64_t);
> +		nbytes += niovecs * (sizeof(uint64_t) +
> +					sizeof(struct xlog_op_header));;

Is it just me, or would

		nbytes += niovecs *
			(sizeof(uint64_t) + sizeof(struct xlog_op_header));

be a little easier to read?
Dave Chinner Feb. 25, 2021, 10:16 p.m. UTC | #2
On Thu, Feb 25, 2021 at 07:27:20PM +0100, Christoph Hellwig wrote:
> > +			if (optype && index) {
> > +				optype &= ~XLOG_START_TRANS;
> > +			} else if (partial_copy) {
> >                                  ophdr = xlog_write_setup_ophdr(ptr, ticket);
> 
> This line uses whitespaces for indentation, we should probably fix that
> up somewhere in the series.

It goes away entirely so, yes, it is fixed up :)

> >  static inline void *
> >  xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
> >  		uint type)
> >  {
> > -	struct xfs_log_iovec *vec = *vecp;
> > +	struct xfs_log_iovec	*vec = *vecp;
> > +	struct xlog_op_header	*oph;
> > +	uint32_t		len;
> > +	void			*buf;
> >  
> >  	if (vec) {
> >  		ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs);
> > @@ -44,21 +54,36 @@ xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
> >  		vec = &lv->lv_iovecp[0];
> >  	}
> >  
> > -	if (!IS_ALIGNED(lv->lv_buf_len, sizeof(uint64_t)))
> > -		lv->lv_buf_len = round_up(lv->lv_buf_len, sizeof(uint64_t));
> > +	len = lv->lv_buf_len + sizeof(struct xlog_op_header);
> > +	if (!IS_ALIGNED(len, sizeof(uint64_t))) {
> > +		lv->lv_buf_len = round_up(len, sizeof(uint64_t)) -
> > +					sizeof(struct xlog_op_header);
> > +	}
> >  
> >  	vec->i_type = type;
> >  	vec->i_addr = lv->lv_buf + lv->lv_buf_len;
> >  
> > -	ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t)));
> > +	oph = vec->i_addr;
> > +	oph->oh_clientid = XFS_TRANSACTION;
> > +	oph->oh_res2 = 0;
> > +	oph->oh_flags = 0;
> > +
> > +	buf = vec->i_addr + sizeof(struct xlog_op_header);
> > +	ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t)));
> >  
> >  	*vecp = vec;
> > -	return vec->i_addr;
> > +	return buf;
> >  }
> 
> I think this function is growing a little too larger to stay inlined.

Possibly. let me have a look at code size and if it does make a
difference I'll move it out of line in another patch.

> 
> > -		nbytes += niovecs * sizeof(uint64_t);
> > +		nbytes += niovecs * (sizeof(uint64_t) +
> > +					sizeof(struct xlog_op_header));;
> 
> Is it just me, or would
> 
> 		nbytes += niovecs *
> 			(sizeof(uint64_t) + sizeof(struct xlog_op_header));
> 
> be a little easier to read?

Yes, that's better.

Cheers,

Dave.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index dd86c141d9c9..f7e16cb3fe7f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2205,9 +2205,9 @@  xlog_print_trans(
 }
 
 /*
- * Calculate the potential space needed by the log vector. If this is a start
- * transaction, the caller has already accounted for both opheaders in the start
- * transaction, so we don't need to account for them here.
+ * Calculate the potential space needed by the log vector. All regions contain
+ * their own opheaders and they are accounted for in region space so we don't
+ * need to add them to the vector length here.
  */
 static int
 xlog_write_calc_vec_length(
@@ -2234,18 +2234,7 @@  xlog_write_calc_vec_length(
 			xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type);
 		}
 	}
-
-	/* Don't account for regions with embedded ophdrs */
-	if (optype && headers > 0) {
-		headers--;
-		if (optype & XLOG_START_TRANS) {
-			ASSERT(headers >= 1);
-			headers--;
-		}
-	}
-
 	ticket->t_res_num_ophdrs += headers;
-	len += headers * sizeof(struct xlog_op_header);
 
 	return len;
 }
@@ -2255,7 +2244,6 @@  xlog_write_setup_ophdr(
 	struct xlog_op_header	*ophdr,
 	struct xlog_ticket	*ticket)
 {
-	ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
 	ophdr->oh_clientid = XFS_TRANSACTION;
 	ophdr->oh_res2 = 0;
 	ophdr->oh_flags = 0;
@@ -2489,21 +2477,25 @@  xlog_write(
 			ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
 
 			/*
-			 * The XLOG_START_TRANS has embedded ophdrs for the
-			 * start record and transaction header. They will always
-			 * be the first two regions in the lv chain. Commit and
-			 * unmount records also have embedded ophdrs.
+			 * Regions always have their ophdr at the start of the
+			 * region, except for:
+			 * - a transaction start which has a start record ophdr
+			 *   before the first region ophdr; and
+			 * - the previous region didn't fully fit into an iclog
+			 *   so needs a continuation ophdr to prepend the region
+			 *   in this new iclog.
 			 */
-			if (optype) {
-				ophdr = reg->i_addr;
-				if (index)
-					optype &= ~XLOG_START_TRANS;
-			} else {
+			ophdr = reg->i_addr;
+			if (optype && index) {
+				optype &= ~XLOG_START_TRANS;
+			} else if (partial_copy) {
                                 ophdr = xlog_write_setup_ophdr(ptr, ticket);
 				xlog_write_adv_cnt(&ptr, &len, &log_offset,
 					   sizeof(struct xlog_op_header));
 				added_ophdr = true;
 			}
+			ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
+
 			len += xlog_write_setup_copy(ticket, ophdr,
 						     iclog->ic_size-log_offset,
 						     reg->i_len,
@@ -2521,20 +2513,11 @@  xlog_write(
 				ophdr->oh_len = cpu_to_be32(copy_len -
 						sizeof(struct xlog_op_header));
 			}
-			/*
-			 * Copy region.
-			 *
-			 * Commit records just log an opheader, so
-			 * we can have empty payloads with no data region to
-			 * copy.  Hence we only copy the payload if the vector
-			 * says it has data to copy.
-			 */
-			ASSERT(copy_len >= 0);
-			if (copy_len > 0) {
-				memcpy(ptr, reg->i_addr + copy_off, copy_len);
-				xlog_write_adv_cnt(&ptr, &len, &log_offset,
-						   copy_len);
-			}
+
+			ASSERT(copy_len > 0);
+			memcpy(ptr, reg->i_addr + copy_off, copy_len);
+			xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len);
+
 			if (added_ophdr)
 				copy_len += sizeof(struct xlog_op_header);
 			record_cnt++;
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 3bc93edb9929..335a139eb018 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -30,12 +30,22 @@  struct xfs_log_vec {
  * track the space used by the log vectors separately to prevent log space hangs
  * due to inaccurate accounting (i.e. a leak) of the used log space through the
  * CIL context ticket.
+ *
+ * We also add space for the xlog_op_header that describes this region in the
+ * log. This prepends the data region we return to the caller to copy their data
+ * into, so do all the static initialisation of the ophdr now. Because the ophdr
+ * is not 8 byte aligned, we have to be careful to ensure that we align the
+ * start of the buffer such that the region we return to the call is 8 byte
+ * aligned and packed against the tail of the ophdr.
  */
 static inline void *
 xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
 		uint type)
 {
-	struct xfs_log_iovec *vec = *vecp;
+	struct xfs_log_iovec	*vec = *vecp;
+	struct xlog_op_header	*oph;
+	uint32_t		len;
+	void			*buf;
 
 	if (vec) {
 		ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs);
@@ -44,21 +54,36 @@  xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
 		vec = &lv->lv_iovecp[0];
 	}
 
-	if (!IS_ALIGNED(lv->lv_buf_len, sizeof(uint64_t)))
-		lv->lv_buf_len = round_up(lv->lv_buf_len, sizeof(uint64_t));
+	len = lv->lv_buf_len + sizeof(struct xlog_op_header);
+	if (!IS_ALIGNED(len, sizeof(uint64_t))) {
+		lv->lv_buf_len = round_up(len, sizeof(uint64_t)) -
+					sizeof(struct xlog_op_header);
+	}
 
 	vec->i_type = type;
 	vec->i_addr = lv->lv_buf + lv->lv_buf_len;
 
-	ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t)));
+	oph = vec->i_addr;
+	oph->oh_clientid = XFS_TRANSACTION;
+	oph->oh_res2 = 0;
+	oph->oh_flags = 0;
+
+	buf = vec->i_addr + sizeof(struct xlog_op_header);
+	ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t)));
 
 	*vecp = vec;
-	return vec->i_addr;
+	return buf;
 }
 
 static inline void
 xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
 {
+	struct xlog_op_header	*oph = vec->i_addr;
+
+	/* opheader tracks payload length, logvec tracks region length */
+	oph->oh_len = len;
+
+	len += sizeof(struct xlog_op_header);
 	lv->lv_buf_len += len;
 	lv->lv_bytes += len;
 	vec->i_len = len;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index b5ad62f12e24..98a8ac0b4a87 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -181,13 +181,20 @@  xlog_cil_alloc_shadow_bufs(
 		}
 
 		/*
-		 * We 64-bit align the length of each iovec so that the start
-		 * of the next one is naturally aligned.  We'll need to
-		 * account for that slack space here. Then round nbytes up
-		 * to 64-bit alignment so that the initial buffer alignment is
-		 * easy to calculate and verify.
+		 * We 64-bit align the length of each iovec so that the start of
+		 * the next one is naturally aligned.  We'll need to account for
+		 * that slack space here.
+		 *
+		 * We also add the xlog_op_header to each region when
+		 * formatting, but that's not accounted to the size of the item
+		 * at this point. Hence we'll need an addition number of bytes
+		 * for each vector to hold an opheader.
+		 *
+		 * Then round nbytes up to 64-bit alignment so that the initial
+		 * buffer alignment is easy to calculate and verify.
 		 */
-		nbytes += niovecs * sizeof(uint64_t);
+		nbytes += niovecs * (sizeof(uint64_t) +
+					sizeof(struct xlog_op_header));;
 		nbytes = round_up(nbytes, sizeof(uint64_t));
 
 		/*
@@ -433,11 +440,6 @@  xlog_cil_insert_items(
 
 	spin_lock(&cil->xc_cil_lock);
 
-	/* account for space used by new iovec headers  */
-	iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t);
-	len += iovhdr_res;
-	ctx->nvecs += diff_iovecs;
-
 	/* attach the transaction to the CIL if it has any busy extents */
 	if (!list_empty(&tp->t_busy))
 		list_splice_init(&tp->t_busy, &ctx->busy_extents);
@@ -469,6 +471,7 @@  xlog_cil_insert_items(
 	}
 	tp->t_ticket->t_curr_res -= len;
 	ctx->space_used += len;
+	ctx->nvecs += diff_iovecs;
 
 	/*
 	 * If we've overrun the reservation, dump the tx details before we move