diff mbox series

[28/45] xfs: introduce xlog_write_single()

Message ID 20210305051143.182133-29-david@fromorbit.com (mailing list archive)
State Superseded
Headers show
Series xfs: consolidated log and optimisation changes | expand

Commit Message

Dave Chinner March 5, 2021, 5:11 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

Introduce an optimised version of xlog_write() that is used when the
entire write will fit in a single iclog. This greatly simplifies the
implementation of writing a log vector chain into an iclog, and sets
the ground work for a much more understandable xlog_write()
implementation.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_log.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

Comments

Darrick J. Wong March 9, 2021, 2:39 a.m. UTC | #1
On Fri, Mar 05, 2021 at 04:11:26PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> Introduce an optimised version of xlog_write() that is used when the
> entire write will fit in a single iclog. This greatly simplifies the
> implementation of writing a log vector chain into an iclog, and sets
> the ground work for a much more understandable xlog_write()
> implementation.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_log.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 56 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index 22f97914ab99..590c1e6db475 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -2214,6 +2214,52 @@ xlog_write_copy_finish(
>  	return error;
>  }
>  
> +/*
> + * Write log vectors into a single iclog which is guaranteed by the caller
> + * to have enough space to write the entire log vector into. Return the number
> + * of log vectors written into the iclog.
> + */
> +static int
> +xlog_write_single(
> +	struct xfs_log_vec	*log_vector,
> +	struct xlog_ticket	*ticket,
> +	struct xlog_in_core	*iclog,
> +	uint32_t		log_offset,
> +	uint32_t		len)
> +{
> +	struct xfs_log_vec	*lv = log_vector;
> +	void			*ptr;
> +	int			index = 0;
> +	int			record_cnt = 0;

Any reason these (and the return type) can't be unsigned?  I don't think
negative indices or record counts have any meaning, right?

Otherwise this looks ok to me.

--D

> +
> +	ASSERT(log_offset + len <= iclog->ic_size);
> +
> +	ptr = iclog->ic_datap + log_offset;
> +	for (lv = log_vector; lv; lv = lv->lv_next) {
> +		/*
> +		 * Ordered log vectors have no regions to write so this
> +		 * loop will naturally skip them.
> +		 */
> +		for (index = 0; index < lv->lv_niovecs; index++) {
> +			struct xfs_log_iovec	*reg = &lv->lv_iovecp[index];
> +			struct xlog_op_header	*ophdr = reg->i_addr;
> +
> +			ASSERT(reg->i_len % sizeof(int32_t) == 0);
> +			ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
> +
> +			ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
> +			ophdr->oh_len = cpu_to_be32(reg->i_len -
> +						sizeof(struct xlog_op_header));
> +			memcpy(ptr, reg->i_addr, reg->i_len);
> +			xlog_write_adv_cnt(&ptr, &len, &log_offset, reg->i_len);
> +			record_cnt++;
> +		}
> +	}
> +	ASSERT(len == 0);
> +	return record_cnt;
> +}
> +
> +
>  /*
>   * Write some region out to in-core log
>   *
> @@ -2294,7 +2340,6 @@ xlog_write(
>  			return error;
>  
>  		ASSERT(log_offset <= iclog->ic_size - 1);
> -		ptr = iclog->ic_datap + log_offset;
>  
>  		/* Start_lsn is the first lsn written to. */
>  		if (start_lsn && !*start_lsn)
> @@ -2311,10 +2356,20 @@ xlog_write(
>  						XLOG_ICL_NEED_FUA);
>  		}
>  
> +		/* If this is a single iclog write, go fast... */
> +		if (!contwr && lv == log_vector) {
> +			record_cnt = xlog_write_single(lv, ticket, iclog,
> +						log_offset, len);
> +			len = 0;
> +			data_cnt = len;
> +			break;
> +		}
> +
>  		/*
>  		 * This loop writes out as many regions as can fit in the amount
>  		 * of space which was allocated by xlog_state_get_iclog_space().
>  		 */
> +		ptr = iclog->ic_datap + log_offset;
>  		while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
>  			struct xfs_log_iovec	*reg;
>  			struct xlog_op_header	*ophdr;
> -- 
> 2.28.0
>
Dave Chinner March 11, 2021, 4:19 a.m. UTC | #2
On Mon, Mar 08, 2021 at 06:39:27PM -0800, Darrick J. Wong wrote:
> On Fri, Mar 05, 2021 at 04:11:26PM +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > Introduce an optimised version of xlog_write() that is used when the
> > entire write will fit in a single iclog. This greatly simplifies the
> > implementation of writing a log vector chain into an iclog, and sets
> > the ground work for a much more understandable xlog_write()
> > implementation.
> > 
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > ---
> >  fs/xfs/xfs_log.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 56 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> > index 22f97914ab99..590c1e6db475 100644
> > --- a/fs/xfs/xfs_log.c
> > +++ b/fs/xfs/xfs_log.c
> > @@ -2214,6 +2214,52 @@ xlog_write_copy_finish(
> >  	return error;
> >  }
> >  
> > +/*
> > + * Write log vectors into a single iclog which is guaranteed by the caller
> > + * to have enough space to write the entire log vector into. Return the number
> > + * of log vectors written into the iclog.
> > + */
> > +static int
> > +xlog_write_single(
> > +	struct xfs_log_vec	*log_vector,
> > +	struct xlog_ticket	*ticket,
> > +	struct xlog_in_core	*iclog,
> > +	uint32_t		log_offset,
> > +	uint32_t		len)
> > +{
> > +	struct xfs_log_vec	*lv = log_vector;
> > +	void			*ptr;
> > +	int			index = 0;
> > +	int			record_cnt = 0;
> 
> Any reason these (and the return type) can't be unsigned?  I don't think
> negative indices or record counts have any meaning, right?

Correct, but I'm going to ignore that because the next patch already
addresses both of these things. Changing it here just means a
massive reject of the next patch where it replaces the return value
with a log vector, and the record count moves to a function
parameter that is, indeed, a uint32_t.

Cheers,

Dave.
Brian Foster March 16, 2021, 6:39 p.m. UTC | #3
On Fri, Mar 05, 2021 at 04:11:26PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> Introduce an optimised version of xlog_write() that is used when the
> entire write will fit in a single iclog. This greatly simplifies the
> implementation of writing a log vector chain into an iclog, and sets
> the ground work for a much more understandable xlog_write()
> implementation.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_log.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 56 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index 22f97914ab99..590c1e6db475 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -2214,6 +2214,52 @@ xlog_write_copy_finish(
>  	return error;
>  }
>  
> +/*
> + * Write log vectors into a single iclog which is guaranteed by the caller
> + * to have enough space to write the entire log vector into. Return the number
> + * of log vectors written into the iclog.
> + */
> +static int
> +xlog_write_single(
> +	struct xfs_log_vec	*log_vector,
> +	struct xlog_ticket	*ticket,
> +	struct xlog_in_core	*iclog,
> +	uint32_t		log_offset,
> +	uint32_t		len)
> +{
> +	struct xfs_log_vec	*lv = log_vector;

This is initialized here and in the loop below.

> +	void			*ptr;
> +	int			index = 0;
> +	int			record_cnt = 0;
> +
> +	ASSERT(log_offset + len <= iclog->ic_size);
> +
> +	ptr = iclog->ic_datap + log_offset;
> +	for (lv = log_vector; lv; lv = lv->lv_next) {
> +		/*
> +		 * Ordered log vectors have no regions to write so this
> +		 * loop will naturally skip them.
> +		 */
> +		for (index = 0; index < lv->lv_niovecs; index++) {
> +			struct xfs_log_iovec	*reg = &lv->lv_iovecp[index];
> +			struct xlog_op_header	*ophdr = reg->i_addr;
> +
> +			ASSERT(reg->i_len % sizeof(int32_t) == 0);
> +			ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
> +
> +			ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
> +			ophdr->oh_len = cpu_to_be32(reg->i_len -
> +						sizeof(struct xlog_op_header));

Perhaps we should retain the xlog_verify_dest_ptr() call here? It's
DEBUG code and otherwise compiled out, so shouldn't impact production

> +			memcpy(ptr, reg->i_addr, reg->i_len);
> +			xlog_write_adv_cnt(&ptr, &len, &log_offset, reg->i_len);
> +			record_cnt++;
> +		}
> +	}
> +	ASSERT(len == 0);
> +	return record_cnt;
> +}
> +
> +
>  /*
>   * Write some region out to in-core log
>   *
> @@ -2294,7 +2340,6 @@ xlog_write(
>  			return error;
>  
>  		ASSERT(log_offset <= iclog->ic_size - 1);
> -		ptr = iclog->ic_datap + log_offset;
>  
>  		/* Start_lsn is the first lsn written to. */
>  		if (start_lsn && !*start_lsn)
> @@ -2311,10 +2356,20 @@ xlog_write(
>  						XLOG_ICL_NEED_FUA);
>  		}
>  
> +		/* If this is a single iclog write, go fast... */
> +		if (!contwr && lv == log_vector) {
> +			record_cnt = xlog_write_single(lv, ticket, iclog,
> +						log_offset, len);
> +			len = 0;

I assume this is here to satisfy the assert further down in the
function.. This seems a bit contrived when you consider we pass len to
the helper, the helper reduces it and asserts that it goes to zero, then
we do so again here just for another assert. Unless this is all just
removed later, it might be more straightforward to pass a reference.

> +			data_cnt = len;

Similarly, this looks a bit odd because it seems data_cnt should be zero
in the case where contwr == 0. xlog_state_get_iclog_space() has already
bumped ->ic_offset by len (so xlog_state_finish_copy() doesn't need to
via data_cnt).

Brian

> +			break;
> +		}
> +
>  		/*
>  		 * This loop writes out as many regions as can fit in the amount
>  		 * of space which was allocated by xlog_state_get_iclog_space().
>  		 */
> +		ptr = iclog->ic_datap + log_offset;
>  		while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
>  			struct xfs_log_iovec	*reg;
>  			struct xlog_op_header	*ophdr;
> -- 
> 2.28.0
>
Dave Chinner May 19, 2021, 3:44 a.m. UTC | #4
On Tue, Mar 16, 2021 at 02:39:59PM -0400, Brian Foster wrote:
> On Fri, Mar 05, 2021 at 04:11:26PM +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > Introduce an optimised version of xlog_write() that is used when the
> > entire write will fit in a single iclog. This greatly simplifies the
> > implementation of writing a log vector chain into an iclog, and sets
> > the ground work for a much more understandable xlog_write()
> > implementation.
> > 
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > ---
> >  fs/xfs/xfs_log.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 56 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> > index 22f97914ab99..590c1e6db475 100644
> > --- a/fs/xfs/xfs_log.c
> > +++ b/fs/xfs/xfs_log.c
> > @@ -2214,6 +2214,52 @@ xlog_write_copy_finish(
> >  	return error;
> >  }
> >  
> > +/*
> > + * Write log vectors into a single iclog which is guaranteed by the caller
> > + * to have enough space to write the entire log vector into. Return the number
> > + * of log vectors written into the iclog.
> > + */
> > +static int
> > +xlog_write_single(
> > +	struct xfs_log_vec	*log_vector,
> > +	struct xlog_ticket	*ticket,
> > +	struct xlog_in_core	*iclog,
> > +	uint32_t		log_offset,
> > +	uint32_t		len)
> > +{
> > +	struct xfs_log_vec	*lv = log_vector;
> 
> This is initialized here and in the loop below.

Fixed.

> 
> > +	void			*ptr;
> > +	int			index = 0;
> > +	int			record_cnt = 0;
> > +
> > +	ASSERT(log_offset + len <= iclog->ic_size);
> > +
> > +	ptr = iclog->ic_datap + log_offset;
> > +	for (lv = log_vector; lv; lv = lv->lv_next) {
> > +		/*
> > +		 * Ordered log vectors have no regions to write so this
> > +		 * loop will naturally skip them.
> > +		 */
> > +		for (index = 0; index < lv->lv_niovecs; index++) {
> > +			struct xfs_log_iovec	*reg = &lv->lv_iovecp[index];
> > +			struct xlog_op_header	*ophdr = reg->i_addr;
> > +
> > +			ASSERT(reg->i_len % sizeof(int32_t) == 0);
> > +			ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
> > +
> > +			ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
> > +			ophdr->oh_len = cpu_to_be32(reg->i_len -
> > +						sizeof(struct xlog_op_header));
> 
> Perhaps we should retain the xlog_verify_dest_ptr() call here? It's
> DEBUG code and otherwise compiled out, so shouldn't impact production

The pointer check does nothing to actually prevent memory
corruption. It only catches problems after we've already memcpy()d
off the end of the iclog in the previous loop. So if the last region
overruns the log, then it won't be triggered.

And, well, we've already checked and asserted that the copy is going
to fit entirely within the current iclog, so checking whether the
pointer has overrun outside the iclog buffer is both redundant and
too late.  Hence I removed it...

> > +			memcpy(ptr, reg->i_addr, reg->i_len);
> > +			xlog_write_adv_cnt(&ptr, &len, &log_offset, reg->i_len);
> > +			record_cnt++;
> > +		}
> > +	}
> > +	ASSERT(len == 0);
> > +	return record_cnt;
> > +}
> > +
> > +
> >  /*
> >   * Write some region out to in-core log
> >   *
> > @@ -2294,7 +2340,6 @@ xlog_write(
> >  			return error;
> >  
> >  		ASSERT(log_offset <= iclog->ic_size - 1);
> > -		ptr = iclog->ic_datap + log_offset;
> >  
> >  		/* Start_lsn is the first lsn written to. */
> >  		if (start_lsn && !*start_lsn)
> > @@ -2311,10 +2356,20 @@ xlog_write(
> >  						XLOG_ICL_NEED_FUA);
> >  		}
> >  
> > +		/* If this is a single iclog write, go fast... */
> > +		if (!contwr && lv == log_vector) {
> > +			record_cnt = xlog_write_single(lv, ticket, iclog,
> > +						log_offset, len);
> > +			len = 0;
> 
> I assume this is here to satisfy the assert further down in the
> function.. This seems a bit contrived when you consider we pass len to
> the helper, the helper reduces it and asserts that it goes to zero, then
> we do so again here just for another assert. Unless this is all just
> removed later, it might be more straightforward to pass a reference.
> 
> > +			data_cnt = len;
> 
> Similarly, this looks a bit odd because it seems data_cnt should be zero
> in the case where contwr == 0. xlog_state_get_iclog_space() has already
> bumped ->ic_offset by len (so xlog_state_finish_copy() doesn't need to
> via data_cnt).

Yes, it's entirely contrived to make it possible to split this code
out in a simple fashion to ease review of the simple, fast path case
this code will end up with. The next patch changes all this context
and the parameters passed to the function, but this was the only way
I could easily split the complex xlog_write() rewrite change into
something a little bit simpler....

Cheers,

Dave.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 22f97914ab99..590c1e6db475 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2214,6 +2214,52 @@  xlog_write_copy_finish(
 	return error;
 }
 
+/*
+ * Write log vectors into a single iclog which is guaranteed by the caller
+ * to have enough space to write the entire log vector into. Return the number
+ * of log vectors written into the iclog.
+ */
+static int
+xlog_write_single(
+	struct xfs_log_vec	*log_vector,
+	struct xlog_ticket	*ticket,
+	struct xlog_in_core	*iclog,
+	uint32_t		log_offset,
+	uint32_t		len)
+{
+	struct xfs_log_vec	*lv = log_vector;
+	void			*ptr;
+	int			index = 0;
+	int			record_cnt = 0;
+
+	ASSERT(log_offset + len <= iclog->ic_size);
+
+	ptr = iclog->ic_datap + log_offset;
+	for (lv = log_vector; lv; lv = lv->lv_next) {
+		/*
+		 * Ordered log vectors have no regions to write so this
+		 * loop will naturally skip them.
+		 */
+		for (index = 0; index < lv->lv_niovecs; index++) {
+			struct xfs_log_iovec	*reg = &lv->lv_iovecp[index];
+			struct xlog_op_header	*ophdr = reg->i_addr;
+
+			ASSERT(reg->i_len % sizeof(int32_t) == 0);
+			ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
+
+			ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
+			ophdr->oh_len = cpu_to_be32(reg->i_len -
+						sizeof(struct xlog_op_header));
+			memcpy(ptr, reg->i_addr, reg->i_len);
+			xlog_write_adv_cnt(&ptr, &len, &log_offset, reg->i_len);
+			record_cnt++;
+		}
+	}
+	ASSERT(len == 0);
+	return record_cnt;
+}
+
+
 /*
  * Write some region out to in-core log
  *
@@ -2294,7 +2340,6 @@  xlog_write(
 			return error;
 
 		ASSERT(log_offset <= iclog->ic_size - 1);
-		ptr = iclog->ic_datap + log_offset;
 
 		/* Start_lsn is the first lsn written to. */
 		if (start_lsn && !*start_lsn)
@@ -2311,10 +2356,20 @@  xlog_write(
 						XLOG_ICL_NEED_FUA);
 		}
 
+		/* If this is a single iclog write, go fast... */
+		if (!contwr && lv == log_vector) {
+			record_cnt = xlog_write_single(lv, ticket, iclog,
+						log_offset, len);
+			len = 0;
+			data_cnt = len;
+			break;
+		}
+
 		/*
 		 * This loop writes out as many regions as can fit in the amount
 		 * of space which was allocated by xlog_state_get_iclog_space().
 		 */
+		ptr = iclog->ic_datap + log_offset;
 		while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
 			struct xfs_log_iovec	*reg;
 			struct xlog_op_header	*ophdr;