diff mbox series

[1/8] xfs: don't try to write a start record into every iclog

Message ID 20200324174459.770999-2-hch@lst.de (mailing list archive)
State Superseded
Headers show
Series [1/8] xfs: don't try to write a start record into every iclog | expand

Commit Message

Christoph Hellwig March 24, 2020, 5:44 p.m. UTC
From: Dave Chinner <dchinner@redhat.com>

The xlog_write() function iterates over iclogs until it completes
writing all the log vectors passed in. The ticket tracks whether
a start record has been written or not, so only the first iclog gets
a start record. We only ever pass single use tickets to
xlog_write() so we only ever need to write a start record once per
xlog_write() call.

Hence we don't need to store whether we should write a start record
in the ticket as the callers provide all the information we need to
determine if a start record should be written. For the moment, we
have to ensure that we clear the XLOG_TIC_INITED appropriately so
the code in xfs_log_done() still works correctly for committing
transactions.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
[hch: use an need_start_rec bool]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_log.c | 63 ++++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 31 deletions(-)

Comments

Darrick J. Wong March 24, 2020, 8:39 p.m. UTC | #1
On Tue, Mar 24, 2020 at 06:44:52PM +0100, Christoph Hellwig wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> The xlog_write() function iterates over iclogs until it completes
> writing all the log vectors passed in. The ticket tracks whether
> a start record has been written or not, so only the first iclog gets
> a start record. We only ever pass single use tickets to
> xlog_write() so we only ever need to write a start record once per
> xlog_write() call.
> 
> Hence we don't need to store whether we should write a start record
> in the ticket as the callers provide all the information we need to
> determine if a start record should be written. For the moment, we
> have to ensure that we clear the XLOG_TIC_INITED appropriately so
> the code in xfs_log_done() still works correctly for committing
> transactions.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> [hch: use an need_start_rec bool]
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks ok,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_log.c | 63 ++++++++++++++++++++++++------------------------
>  1 file changed, 32 insertions(+), 31 deletions(-)
> 
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index 2a90a483c2d6..bf071552094a 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -2112,23 +2112,21 @@ xlog_print_trans(
>  }
>  
>  /*
> - * Calculate the potential space needed by the log vector.  Each region gets
> - * its own xlog_op_header_t and may need to be double word aligned.
> + * Calculate the potential space needed by the log vector.  We may need a start
> + * record, and each region gets its own struct xlog_op_header and may need to be
> + * double word aligned.
>   */
>  static int
>  xlog_write_calc_vec_length(
>  	struct xlog_ticket	*ticket,
> -	struct xfs_log_vec	*log_vector)
> +	struct xfs_log_vec	*log_vector,
> +	bool			need_start_rec)
>  {
>  	struct xfs_log_vec	*lv;
> -	int			headers = 0;
> +	int			headers = need_start_rec ? 1 : 0;
>  	int			len = 0;
>  	int			i;
>  
> -	/* acct for start rec of xact */
> -	if (ticket->t_flags & XLOG_TIC_INITED)
> -		headers++;
> -
>  	for (lv = log_vector; lv; lv = lv->lv_next) {
>  		/* we don't write ordered log vectors */
>  		if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED)
> @@ -2150,27 +2148,16 @@ xlog_write_calc_vec_length(
>  	return len;
>  }
>  
> -/*
> - * If first write for transaction, insert start record  We can't be trying to
> - * commit if we are inited.  We can't have any "partial_copy" if we are inited.
> - */
> -static int
> +static void
>  xlog_write_start_rec(
>  	struct xlog_op_header	*ophdr,
>  	struct xlog_ticket	*ticket)
>  {
> -	if (!(ticket->t_flags & XLOG_TIC_INITED))
> -		return 0;
> -
>  	ophdr->oh_tid	= cpu_to_be32(ticket->t_tid);
>  	ophdr->oh_clientid = ticket->t_clientid;
>  	ophdr->oh_len = 0;
>  	ophdr->oh_flags = XLOG_START_TRANS;
>  	ophdr->oh_res2 = 0;
> -
> -	ticket->t_flags &= ~XLOG_TIC_INITED;
> -
> -	return sizeof(struct xlog_op_header);
>  }
>  
>  static xlog_op_header_t *
> @@ -2372,25 +2359,29 @@ xlog_write(
>  	int			record_cnt = 0;
>  	int			data_cnt = 0;
>  	int			error = 0;
> +	bool			need_start_rec = true;
>  
>  	*start_lsn = 0;
>  
> -	len = xlog_write_calc_vec_length(ticket, log_vector);
>  
>  	/*
>  	 * Region headers and bytes are already accounted for.
>  	 * We only need to take into account start records and
>  	 * split regions in this function.
>  	 */
> -	if (ticket->t_flags & XLOG_TIC_INITED)
> -		ticket->t_curr_res -= sizeof(xlog_op_header_t);
> +	if (ticket->t_flags & XLOG_TIC_INITED) {
> +		ticket->t_curr_res -= sizeof(struct xlog_op_header);
> +		ticket->t_flags &= ~XLOG_TIC_INITED;
> +	}
>  
>  	/*
>  	 * Commit record headers need to be accounted for. These
>  	 * come in as separate writes so are easy to detect.
>  	 */
> -	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
> -		ticket->t_curr_res -= sizeof(xlog_op_header_t);
> +	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) {
> +		ticket->t_curr_res -= sizeof(struct xlog_op_header);
> +		need_start_rec = false;
> +	}
>  
>  	if (ticket->t_curr_res < 0) {
>  		xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
> @@ -2399,6 +2390,8 @@ xlog_write(
>  		xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
>  	}
>  
> +	len = xlog_write_calc_vec_length(ticket, log_vector, need_start_rec);
> +
>  	index = 0;
>  	lv = log_vector;
>  	vecp = lv->lv_iovecp;
> @@ -2425,7 +2418,6 @@ xlog_write(
>  		while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
>  			struct xfs_log_iovec	*reg;
>  			struct xlog_op_header	*ophdr;
> -			int			start_rec_copy;
>  			int			copy_len;
>  			int			copy_off;
>  			bool			ordered = false;
> @@ -2441,11 +2433,15 @@ xlog_write(
>  			ASSERT(reg->i_len % sizeof(int32_t) == 0);
>  			ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
>  
> -			start_rec_copy = xlog_write_start_rec(ptr, ticket);
> -			if (start_rec_copy) {
> -				record_cnt++;
> +			/*
> +			 * Before we start formatting log vectors, we need to
> +			 * write a start record. Only do this for the first
> +			 * iclog we write to.
> +			 */
> +			if (need_start_rec) {
> +				xlog_write_start_rec(ptr, ticket);
>  				xlog_write_adv_cnt(&ptr, &len, &log_offset,
> -						   start_rec_copy);
> +						sizeof(struct xlog_op_header));
>  			}
>  
>  			ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
> @@ -2477,8 +2473,13 @@ xlog_write(
>  				xlog_write_adv_cnt(&ptr, &len, &log_offset,
>  						   copy_len);
>  			}
> -			copy_len += start_rec_copy + sizeof(xlog_op_header_t);
> +			copy_len += sizeof(struct xlog_op_header);
>  			record_cnt++;
> +			if (need_start_rec) {
> +				copy_len += sizeof(struct xlog_op_header);
> +				record_cnt++;
> +				need_start_rec = false;
> +			}
>  			data_cnt += contwr ? copy_len : 0;
>  
>  			error = xlog_write_copy_finish(log, iclog, flags,
> -- 
> 2.25.1
>
Brian Foster March 25, 2020, 12:33 p.m. UTC | #2
On Tue, Mar 24, 2020 at 06:44:52PM +0100, Christoph Hellwig wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> The xlog_write() function iterates over iclogs until it completes
> writing all the log vectors passed in. The ticket tracks whether
> a start record has been written or not, so only the first iclog gets
> a start record. We only ever pass single use tickets to
> xlog_write() so we only ever need to write a start record once per
> xlog_write() call.
> 
> Hence we don't need to store whether we should write a start record
> in the ticket as the callers provide all the information we need to
> determine if a start record should be written. For the moment, we
> have to ensure that we clear the XLOG_TIC_INITED appropriately so
> the code in xfs_log_done() still works correctly for committing
> transactions.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> [hch: use an need_start_rec bool]
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_log.c | 63 ++++++++++++++++++++++++------------------------
>  1 file changed, 32 insertions(+), 31 deletions(-)
> 
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index 2a90a483c2d6..bf071552094a 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
...
> @@ -2372,25 +2359,29 @@ xlog_write(
>  	int			record_cnt = 0;
>  	int			data_cnt = 0;
>  	int			error = 0;
> +	bool			need_start_rec = true;
>  
>  	*start_lsn = 0;
>  
> -	len = xlog_write_calc_vec_length(ticket, log_vector);
>  
>  	/*
>  	 * Region headers and bytes are already accounted for.
>  	 * We only need to take into account start records and
>  	 * split regions in this function.
>  	 */
> -	if (ticket->t_flags & XLOG_TIC_INITED)
> -		ticket->t_curr_res -= sizeof(xlog_op_header_t);
> +	if (ticket->t_flags & XLOG_TIC_INITED) {
> +		ticket->t_curr_res -= sizeof(struct xlog_op_header);
> +		ticket->t_flags &= ~XLOG_TIC_INITED;
> +	}
>  
>  	/*
>  	 * Commit record headers need to be accounted for. These
>  	 * come in as separate writes so are easy to detect.
>  	 */
> -	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
> -		ticket->t_curr_res -= sizeof(xlog_op_header_t);
> +	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) {
> +		ticket->t_curr_res -= sizeof(struct xlog_op_header);
> +		need_start_rec = false;
> +	}

Hmm.. I was asking for a comment update in v1 for this logic change.
Looking through it again, what happens here if
xfs_log_write_unmount_record() clears the UNMOUNT_TRANS flag for that
summary counter check? That looks like a potential behavior change wrt
to the start record..

Brian

>  
>  	if (ticket->t_curr_res < 0) {
>  		xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
> @@ -2399,6 +2390,8 @@ xlog_write(
>  		xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
>  	}
>  
> +	len = xlog_write_calc_vec_length(ticket, log_vector, need_start_rec);
> +
>  	index = 0;
>  	lv = log_vector;
>  	vecp = lv->lv_iovecp;
> @@ -2425,7 +2418,6 @@ xlog_write(
>  		while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
>  			struct xfs_log_iovec	*reg;
>  			struct xlog_op_header	*ophdr;
> -			int			start_rec_copy;
>  			int			copy_len;
>  			int			copy_off;
>  			bool			ordered = false;
> @@ -2441,11 +2433,15 @@ xlog_write(
>  			ASSERT(reg->i_len % sizeof(int32_t) == 0);
>  			ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
>  
> -			start_rec_copy = xlog_write_start_rec(ptr, ticket);
> -			if (start_rec_copy) {
> -				record_cnt++;
> +			/*
> +			 * Before we start formatting log vectors, we need to
> +			 * write a start record. Only do this for the first
> +			 * iclog we write to.
> +			 */
> +			if (need_start_rec) {
> +				xlog_write_start_rec(ptr, ticket);
>  				xlog_write_adv_cnt(&ptr, &len, &log_offset,
> -						   start_rec_copy);
> +						sizeof(struct xlog_op_header));
>  			}
>  
>  			ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
> @@ -2477,8 +2473,13 @@ xlog_write(
>  				xlog_write_adv_cnt(&ptr, &len, &log_offset,
>  						   copy_len);
>  			}
> -			copy_len += start_rec_copy + sizeof(xlog_op_header_t);
> +			copy_len += sizeof(struct xlog_op_header);
>  			record_cnt++;
> +			if (need_start_rec) {
> +				copy_len += sizeof(struct xlog_op_header);
> +				record_cnt++;
> +				need_start_rec = false;
> +			}
>  			data_cnt += contwr ? copy_len : 0;
>  
>  			error = xlog_write_copy_finish(log, iclog, flags,
> -- 
> 2.25.1
>
Dave Chinner March 25, 2020, 11:25 p.m. UTC | #3
On Wed, Mar 25, 2020 at 08:33:14AM -0400, Brian Foster wrote:
> On Tue, Mar 24, 2020 at 06:44:52PM +0100, Christoph Hellwig wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > The xlog_write() function iterates over iclogs until it completes
> > writing all the log vectors passed in. The ticket tracks whether
> > a start record has been written or not, so only the first iclog gets
> > a start record. We only ever pass single use tickets to
> > xlog_write() so we only ever need to write a start record once per
> > xlog_write() call.
> > 
> > Hence we don't need to store whether we should write a start record
> > in the ticket as the callers provide all the information we need to
> > determine if a start record should be written. For the moment, we
> > have to ensure that we clear the XLOG_TIC_INITED appropriately so
> > the code in xfs_log_done() still works correctly for committing
> > transactions.
> > 
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > [hch: use an need_start_rec bool]
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > ---
> >  fs/xfs/xfs_log.c | 63 ++++++++++++++++++++++++------------------------
> >  1 file changed, 32 insertions(+), 31 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> > index 2a90a483c2d6..bf071552094a 100644
> > --- a/fs/xfs/xfs_log.c
> > +++ b/fs/xfs/xfs_log.c
> ...
> > @@ -2372,25 +2359,29 @@ xlog_write(
> >  	int			record_cnt = 0;
> >  	int			data_cnt = 0;
> >  	int			error = 0;
> > +	bool			need_start_rec = true;
> >  
> >  	*start_lsn = 0;
> >  
> > -	len = xlog_write_calc_vec_length(ticket, log_vector);
> >  
> >  	/*
> >  	 * Region headers and bytes are already accounted for.
> >  	 * We only need to take into account start records and
> >  	 * split regions in this function.
> >  	 */
> > -	if (ticket->t_flags & XLOG_TIC_INITED)
> > -		ticket->t_curr_res -= sizeof(xlog_op_header_t);
> > +	if (ticket->t_flags & XLOG_TIC_INITED) {
> > +		ticket->t_curr_res -= sizeof(struct xlog_op_header);
> > +		ticket->t_flags &= ~XLOG_TIC_INITED;
> > +	}
> >  
> >  	/*
> >  	 * Commit record headers need to be accounted for. These
> >  	 * come in as separate writes so are easy to detect.
> >  	 */
> > -	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
> > -		ticket->t_curr_res -= sizeof(xlog_op_header_t);
> > +	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) {
> > +		ticket->t_curr_res -= sizeof(struct xlog_op_header);
> > +		need_start_rec = false;
> > +	}
> 
> Hmm.. I was asking for a comment update in v1 for this logic change.
> Looking through it again, what happens here if
> xfs_log_write_unmount_record() clears the UNMOUNT_TRANS flag for that
> summary counter check? That looks like a potential behavior change wrt
> to the start record..

xfs_log_write_unmount_record() clears the XLOG_TIC_INITED
flag before calling xlog_write(), so the current code never writes
out a start record for the unmount record. i.e. the unmount
record is a single region with the unmount log item in it, and
AFAICT this code does not change the behaviour of the unmount record
write at all.

FWIW, that error injection code looks dodgy - it turns the unmount
record into an XFS_LOG transaction type with an invalid log item
type (0). That probably should be flagged as corruption, not be
silently ignored by recovery. IOWs, I think the error injection code
was wrong to begin with - if we want to ensure the log is dirty at
unmount, we should just skip writing the unmount record altogether.

Cheers,

Dave.
Brian Foster March 26, 2020, 11:08 a.m. UTC | #4
On Thu, Mar 26, 2020 at 10:25:00AM +1100, Dave Chinner wrote:
> On Wed, Mar 25, 2020 at 08:33:14AM -0400, Brian Foster wrote:
> > On Tue, Mar 24, 2020 at 06:44:52PM +0100, Christoph Hellwig wrote:
> > > From: Dave Chinner <dchinner@redhat.com>
> > > 
> > > The xlog_write() function iterates over iclogs until it completes
> > > writing all the log vectors passed in. The ticket tracks whether
> > > a start record has been written or not, so only the first iclog gets
> > > a start record. We only ever pass single use tickets to
> > > xlog_write() so we only ever need to write a start record once per
> > > xlog_write() call.
> > > 
> > > Hence we don't need to store whether we should write a start record
> > > in the ticket as the callers provide all the information we need to
> > > determine if a start record should be written. For the moment, we
> > > have to ensure that we clear the XLOG_TIC_INITED appropriately so
> > > the code in xfs_log_done() still works correctly for committing
> > > transactions.
> > > 
> > > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > > [hch: use an need_start_rec bool]
> > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > > ---
> > >  fs/xfs/xfs_log.c | 63 ++++++++++++++++++++++++------------------------
> > >  1 file changed, 32 insertions(+), 31 deletions(-)
> > > 
> > > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> > > index 2a90a483c2d6..bf071552094a 100644
> > > --- a/fs/xfs/xfs_log.c
> > > +++ b/fs/xfs/xfs_log.c
> > ...
> > > @@ -2372,25 +2359,29 @@ xlog_write(
> > >  	int			record_cnt = 0;
> > >  	int			data_cnt = 0;
> > >  	int			error = 0;
> > > +	bool			need_start_rec = true;
> > >  
> > >  	*start_lsn = 0;
> > >  
> > > -	len = xlog_write_calc_vec_length(ticket, log_vector);
> > >  
> > >  	/*
> > >  	 * Region headers and bytes are already accounted for.
> > >  	 * We only need to take into account start records and
> > >  	 * split regions in this function.
> > >  	 */
> > > -	if (ticket->t_flags & XLOG_TIC_INITED)
> > > -		ticket->t_curr_res -= sizeof(xlog_op_header_t);
> > > +	if (ticket->t_flags & XLOG_TIC_INITED) {
> > > +		ticket->t_curr_res -= sizeof(struct xlog_op_header);
> > > +		ticket->t_flags &= ~XLOG_TIC_INITED;
> > > +	}
> > >  
> > >  	/*
> > >  	 * Commit record headers need to be accounted for. These
> > >  	 * come in as separate writes so are easy to detect.
> > >  	 */
> > > -	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
> > > -		ticket->t_curr_res -= sizeof(xlog_op_header_t);
> > > +	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) {
> > > +		ticket->t_curr_res -= sizeof(struct xlog_op_header);
> > > +		need_start_rec = false;
> > > +	}
> > 
> > Hmm.. I was asking for a comment update in v1 for this logic change.
> > Looking through it again, what happens here if
> > xfs_log_write_unmount_record() clears the UNMOUNT_TRANS flag for that
> > summary counter check? That looks like a potential behavior change wrt
> > to the start record..
> 
> xfs_log_write_unmount_record() clears the XLOG_TIC_INITED
> flag before calling xlog_write(), so the current code never writes
> out a start record for the unmount record. i.e. the unmount
> record is a single region with the unmount log item in it, and
> AFAICT this code does not change the behaviour of the unmount record
> write at all.
> 

I'm referring to the UNMOUNT_TRANS flag, not t_flags. With this patch,
we actually would write a start record in some cases because
need_start_rec is toggled based on the flags parameter and the summary
counter check zeroes it.

> FWIW, that error injection code looks dodgy - it turns the unmount
> record into an XFS_LOG transaction type with an invalid log item
> type (0). That probably should be flagged as corruption, not be
> silently ignored by recovery. IOWs, I think the error injection code
> was wrong to begin with - if we want to ensure the log is dirty at
> unmount, we should just skip writing the unmount record altogether.
> 

That may be true... TBH I wasn't totally clear on what that logic was
for (it isn't purely error injection). From the commit (f467cad95f5e3)
log, the intent appears to be to "skip writing the unmount record," but
that doesn't quite describe the behavior. Darrick might want to
comment..? If we do revisit this, I'm mainly curious on whether there's
a change in recovery behavior between having this specially crafted
record vs. just writing nothing. For example, does recovery still set
the head/tail based on this record even though we don't mark the log
clean? If so, do we care..?

Brian

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
>
Darrick J. Wong March 26, 2020, 3:41 p.m. UTC | #5
On Thu, Mar 26, 2020 at 07:08:28AM -0400, Brian Foster wrote:
> On Thu, Mar 26, 2020 at 10:25:00AM +1100, Dave Chinner wrote:
> > On Wed, Mar 25, 2020 at 08:33:14AM -0400, Brian Foster wrote:
> > > On Tue, Mar 24, 2020 at 06:44:52PM +0100, Christoph Hellwig wrote:
> > > > From: Dave Chinner <dchinner@redhat.com>
> > > > 
> > > > The xlog_write() function iterates over iclogs until it completes
> > > > writing all the log vectors passed in. The ticket tracks whether
> > > > a start record has been written or not, so only the first iclog gets
> > > > a start record. We only ever pass single use tickets to
> > > > xlog_write() so we only ever need to write a start record once per
> > > > xlog_write() call.
> > > > 
> > > > Hence we don't need to store whether we should write a start record
> > > > in the ticket as the callers provide all the information we need to
> > > > determine if a start record should be written. For the moment, we
> > > > have to ensure that we clear the XLOG_TIC_INITED appropriately so
> > > > the code in xfs_log_done() still works correctly for committing
> > > > transactions.
> > > > 
> > > > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > > > [hch: use an need_start_rec bool]
> > > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > > > ---
> > > >  fs/xfs/xfs_log.c | 63 ++++++++++++++++++++++++------------------------
> > > >  1 file changed, 32 insertions(+), 31 deletions(-)
> > > > 
> > > > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> > > > index 2a90a483c2d6..bf071552094a 100644
> > > > --- a/fs/xfs/xfs_log.c
> > > > +++ b/fs/xfs/xfs_log.c
> > > ...
> > > > @@ -2372,25 +2359,29 @@ xlog_write(
> > > >  	int			record_cnt = 0;
> > > >  	int			data_cnt = 0;
> > > >  	int			error = 0;
> > > > +	bool			need_start_rec = true;
> > > >  
> > > >  	*start_lsn = 0;
> > > >  
> > > > -	len = xlog_write_calc_vec_length(ticket, log_vector);
> > > >  
> > > >  	/*
> > > >  	 * Region headers and bytes are already accounted for.
> > > >  	 * We only need to take into account start records and
> > > >  	 * split regions in this function.
> > > >  	 */
> > > > -	if (ticket->t_flags & XLOG_TIC_INITED)
> > > > -		ticket->t_curr_res -= sizeof(xlog_op_header_t);
> > > > +	if (ticket->t_flags & XLOG_TIC_INITED) {
> > > > +		ticket->t_curr_res -= sizeof(struct xlog_op_header);
> > > > +		ticket->t_flags &= ~XLOG_TIC_INITED;
> > > > +	}
> > > >  
> > > >  	/*
> > > >  	 * Commit record headers need to be accounted for. These
> > > >  	 * come in as separate writes so are easy to detect.
> > > >  	 */
> > > > -	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
> > > > -		ticket->t_curr_res -= sizeof(xlog_op_header_t);
> > > > +	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) {
> > > > +		ticket->t_curr_res -= sizeof(struct xlog_op_header);
> > > > +		need_start_rec = false;
> > > > +	}
> > > 
> > > Hmm.. I was asking for a comment update in v1 for this logic change.
> > > Looking through it again, what happens here if
> > > xfs_log_write_unmount_record() clears the UNMOUNT_TRANS flag for that
> > > summary counter check? That looks like a potential behavior change wrt
> > > to the start record..
> > 
> > xfs_log_write_unmount_record() clears the XLOG_TIC_INITED
> > flag before calling xlog_write(), so the current code never writes
> > out a start record for the unmount record. i.e. the unmount
> > record is a single region with the unmount log item in it, and
> > AFAICT this code does not change the behaviour of the unmount record
> > write at all.
> > 
> 
> I'm referring to the UNMOUNT_TRANS flag, not t_flags. With this patch,
> we actually would write a start record in some cases because
> need_start_rec is toggled based on the flags parameter and the summary
> counter check zeroes it.
> 
> > FWIW, that error injection code looks dodgy - it turns the unmount
> > record into an XFS_LOG transaction type with an invalid log item
> > type (0). That probably should be flagged as corruption, not be
> > silently ignored by recovery. IOWs, I think the error injection code
> > was wrong to begin with - if we want to ensure the log is dirty at
> > unmount, we should just skip writing the unmount record altogether.
> > 
> 
> That may be true... TBH I wasn't totally clear on what that logic was
> for (it isn't purely error injection). From the commit (f467cad95f5e3)
> log, the intent appears to be to "skip writing the unmount record," but
> that doesn't quite describe the behavior. Darrick might want to
> comment..? If we do revisit this, I'm mainly curious on whether there's
> a change in recovery behavior between having this specially crafted
> record vs. just writing nothing. For example, does recovery still set
> the head/tail based on this record even though we don't mark the log
> clean? If so, do we care..?

I'll have a look later today, but I think Dave is correct that the
summary recalc force code should bail out of xlog_unmount_write without
writing anything.

It's curious that recovery doesn't complain about this, but at this
point we've potentially been writing out logs with oh_flags == 0...

...though now that I look at xfs_log_recover.c, we don't ever actually
look for XLOG_UNMOUNT_TYPE, do we... heh.

--D

> 
> Brian
> 
> > Cheers,
> > 
> > Dave.
> > -- 
> > Dave Chinner
> > david@fromorbit.com
> > 
>
diff mbox series

Patch

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2a90a483c2d6..bf071552094a 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2112,23 +2112,21 @@  xlog_print_trans(
 }
 
 /*
- * Calculate the potential space needed by the log vector.  Each region gets
- * its own xlog_op_header_t and may need to be double word aligned.
+ * Calculate the potential space needed by the log vector.  We may need a start
+ * record, and each region gets its own struct xlog_op_header and may need to be
+ * double word aligned.
  */
 static int
 xlog_write_calc_vec_length(
 	struct xlog_ticket	*ticket,
-	struct xfs_log_vec	*log_vector)
+	struct xfs_log_vec	*log_vector,
+	bool			need_start_rec)
 {
 	struct xfs_log_vec	*lv;
-	int			headers = 0;
+	int			headers = need_start_rec ? 1 : 0;
 	int			len = 0;
 	int			i;
 
-	/* acct for start rec of xact */
-	if (ticket->t_flags & XLOG_TIC_INITED)
-		headers++;
-
 	for (lv = log_vector; lv; lv = lv->lv_next) {
 		/* we don't write ordered log vectors */
 		if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED)
@@ -2150,27 +2148,16 @@  xlog_write_calc_vec_length(
 	return len;
 }
 
-/*
- * If first write for transaction, insert start record  We can't be trying to
- * commit if we are inited.  We can't have any "partial_copy" if we are inited.
- */
-static int
+static void
 xlog_write_start_rec(
 	struct xlog_op_header	*ophdr,
 	struct xlog_ticket	*ticket)
 {
-	if (!(ticket->t_flags & XLOG_TIC_INITED))
-		return 0;
-
 	ophdr->oh_tid	= cpu_to_be32(ticket->t_tid);
 	ophdr->oh_clientid = ticket->t_clientid;
 	ophdr->oh_len = 0;
 	ophdr->oh_flags = XLOG_START_TRANS;
 	ophdr->oh_res2 = 0;
-
-	ticket->t_flags &= ~XLOG_TIC_INITED;
-
-	return sizeof(struct xlog_op_header);
 }
 
 static xlog_op_header_t *
@@ -2372,25 +2359,29 @@  xlog_write(
 	int			record_cnt = 0;
 	int			data_cnt = 0;
 	int			error = 0;
+	bool			need_start_rec = true;
 
 	*start_lsn = 0;
 
-	len = xlog_write_calc_vec_length(ticket, log_vector);
 
 	/*
 	 * Region headers and bytes are already accounted for.
 	 * We only need to take into account start records and
 	 * split regions in this function.
 	 */
-	if (ticket->t_flags & XLOG_TIC_INITED)
-		ticket->t_curr_res -= sizeof(xlog_op_header_t);
+	if (ticket->t_flags & XLOG_TIC_INITED) {
+		ticket->t_curr_res -= sizeof(struct xlog_op_header);
+		ticket->t_flags &= ~XLOG_TIC_INITED;
+	}
 
 	/*
 	 * Commit record headers need to be accounted for. These
 	 * come in as separate writes so are easy to detect.
 	 */
-	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
-		ticket->t_curr_res -= sizeof(xlog_op_header_t);
+	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) {
+		ticket->t_curr_res -= sizeof(struct xlog_op_header);
+		need_start_rec = false;
+	}
 
 	if (ticket->t_curr_res < 0) {
 		xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
@@ -2399,6 +2390,8 @@  xlog_write(
 		xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
 	}
 
+	len = xlog_write_calc_vec_length(ticket, log_vector, need_start_rec);
+
 	index = 0;
 	lv = log_vector;
 	vecp = lv->lv_iovecp;
@@ -2425,7 +2418,6 @@  xlog_write(
 		while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
 			struct xfs_log_iovec	*reg;
 			struct xlog_op_header	*ophdr;
-			int			start_rec_copy;
 			int			copy_len;
 			int			copy_off;
 			bool			ordered = false;
@@ -2441,11 +2433,15 @@  xlog_write(
 			ASSERT(reg->i_len % sizeof(int32_t) == 0);
 			ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
 
-			start_rec_copy = xlog_write_start_rec(ptr, ticket);
-			if (start_rec_copy) {
-				record_cnt++;
+			/*
+			 * Before we start formatting log vectors, we need to
+			 * write a start record. Only do this for the first
+			 * iclog we write to.
+			 */
+			if (need_start_rec) {
+				xlog_write_start_rec(ptr, ticket);
 				xlog_write_adv_cnt(&ptr, &len, &log_offset,
-						   start_rec_copy);
+						sizeof(struct xlog_op_header));
 			}
 
 			ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
@@ -2477,8 +2473,13 @@  xlog_write(
 				xlog_write_adv_cnt(&ptr, &len, &log_offset,
 						   copy_len);
 			}
-			copy_len += start_rec_copy + sizeof(xlog_op_header_t);
+			copy_len += sizeof(struct xlog_op_header);
 			record_cnt++;
+			if (need_start_rec) {
+				copy_len += sizeof(struct xlog_op_header);
+				record_cnt++;
+				need_start_rec = false;
+			}
 			data_cnt += contwr ? copy_len : 0;
 
 			error = xlog_write_copy_finish(log, iclog, flags,