diff mbox series

[1/8] xfs: hide log iovec alignment constraints

Message ID 20220427022259.695399-2-david@fromorbit.com (mailing list archive)
State Accepted
Headers show
Series xfs: intent whiteouts | expand

Commit Message

Dave Chinner April 27, 2022, 2:22 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

Callers currently have to round out the size of buffers to match the
aligment constraints of log iovecs and xlog_write(). They should not
need to know this detail, so introduce a new function to calculate
the iovec length (for use in ->iop_size implementations). Also
modify xlog_finish_iovec() to round up the length to the correct
alignment so the callers don't need to do this, either.

Convert the only user - inode forks - of this alignment rounding to
use the new interface.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_inode_fork.c  | 12 +++---------
 fs/xfs/xfs_inode_item.c         | 25 +++++++------------------
 fs/xfs/xfs_inode_item_recover.c |  4 ++--
 fs/xfs/xfs_log.h                | 23 +++++++++++++++++++++++
 4 files changed, 35 insertions(+), 29 deletions(-)

Comments

Darrick J. Wong April 27, 2022, 3:14 a.m. UTC | #1
On Wed, Apr 27, 2022 at 12:22:52PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> Callers currently have to round out the size of buffers to match the
> aligment constraints of log iovecs and xlog_write(). They should not
> need to know this detail, so introduce a new function to calculate
> the iovec length (for use in ->iop_size implementations). Also
> modify xlog_finish_iovec() to round up the length to the correct
> alignment so the callers don't need to do this, either.
> 
> Convert the only user - inode forks - of this alignment rounding to
> use the new interface.

Hmm.  So currently, we require that the inode fork buffer be rounded up
to the next 4 bytes, and then I guess the log will copy that into the
log iovec?  IOWs, if we have a 37-byte data fork, we'll allocate a 40
byte buffer for the xfs_ifork, and the log will copy all 40 bytes into a
40 byte iovec.

Now it looks like we'd allocate a 37-byte buffer for the xfs_ifork, but
the log iovec will still be 40 bytes.  So ... do we copy 37 bytes out of
the ifork buffer and zero the last 3 bytes in the iovec?  Does we leak
kernel memory in those last 3 bytes?  Or do we copy 40 bytes and
overrun?

It sorta looks like (at least for the local format case) xlog_copy_iovec
will copy 37 bytes and leave the last 3 bytes of the iovec in whatever
state it was in previously.  Is that zeroed?  Because it then looks like
xlog_finish_iovec will round that 37 up to 40.

(FWIW I'm just checking for kernel memory exposure vectors here.)

--D

> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/libxfs/xfs_inode_fork.c  | 12 +++---------
>  fs/xfs/xfs_inode_item.c         | 25 +++++++------------------
>  fs/xfs/xfs_inode_item_recover.c |  4 ++--
>  fs/xfs/xfs_log.h                | 23 +++++++++++++++++++++++
>  4 files changed, 35 insertions(+), 29 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
> index 9aee4a1e2fe9..1a4cdf550f6d 100644
> --- a/fs/xfs/libxfs/xfs_inode_fork.c
> +++ b/fs/xfs/libxfs/xfs_inode_fork.c
> @@ -36,7 +36,7 @@ xfs_init_local_fork(
>  	int64_t			size)
>  {
>  	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
> -	int			mem_size = size, real_size = 0;
> +	int			mem_size = size;
>  	bool			zero_terminate;
>  
>  	/*
> @@ -50,8 +50,7 @@ xfs_init_local_fork(
>  		mem_size++;
>  
>  	if (size) {
> -		real_size = roundup(mem_size, 4);
> -		ifp->if_u1.if_data = kmem_alloc(real_size, KM_NOFS);
> +		ifp->if_u1.if_data = kmem_alloc(mem_size, KM_NOFS);
>  		memcpy(ifp->if_u1.if_data, data, size);
>  		if (zero_terminate)
>  			ifp->if_u1.if_data[size] = '\0';
> @@ -497,12 +496,7 @@ xfs_idata_realloc(
>  		return;
>  	}
>  
> -	/*
> -	 * For inline data, the underlying buffer must be a multiple of 4 bytes
> -	 * in size so that it can be logged and stay on word boundaries.
> -	 * We enforce that here.
> -	 */
> -	ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, roundup(new_size, 4),
> +	ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, new_size,
>  				      GFP_NOFS | __GFP_NOFAIL);
>  	ifp->if_bytes = new_size;
>  }
> diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
> index 00733a18ccdc..721def0639fd 100644
> --- a/fs/xfs/xfs_inode_item.c
> +++ b/fs/xfs/xfs_inode_item.c
> @@ -71,7 +71,7 @@ xfs_inode_item_data_fork_size(
>  	case XFS_DINODE_FMT_LOCAL:
>  		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
>  		    ip->i_df.if_bytes > 0) {
> -			*nbytes += roundup(ip->i_df.if_bytes, 4);
> +			*nbytes += xlog_calc_iovec_len(ip->i_df.if_bytes);
>  			*nvecs += 1;
>  		}
>  		break;
> @@ -112,7 +112,7 @@ xfs_inode_item_attr_fork_size(
>  	case XFS_DINODE_FMT_LOCAL:
>  		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
>  		    ip->i_afp->if_bytes > 0) {
> -			*nbytes += roundup(ip->i_afp->if_bytes, 4);
> +			*nbytes += xlog_calc_iovec_len(ip->i_afp->if_bytes);
>  			*nvecs += 1;
>  		}
>  		break;
> @@ -204,17 +204,12 @@ xfs_inode_item_format_data_fork(
>  			~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
>  		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
>  		    ip->i_df.if_bytes > 0) {
> -			/*
> -			 * Round i_bytes up to a word boundary.
> -			 * The underlying memory is guaranteed
> -			 * to be there by xfs_idata_realloc().
> -			 */
> -			data_bytes = roundup(ip->i_df.if_bytes, 4);
>  			ASSERT(ip->i_df.if_u1.if_data != NULL);
>  			ASSERT(ip->i_disk_size > 0);
>  			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
> -					ip->i_df.if_u1.if_data, data_bytes);
> -			ilf->ilf_dsize = (unsigned)data_bytes;
> +					ip->i_df.if_u1.if_data,
> +					ip->i_df.if_bytes);
> +			ilf->ilf_dsize = (unsigned)ip->i_df.if_bytes;
>  			ilf->ilf_size++;
>  		} else {
>  			iip->ili_fields &= ~XFS_ILOG_DDATA;
> @@ -288,17 +283,11 @@ xfs_inode_item_format_attr_fork(
>  
>  		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
>  		    ip->i_afp->if_bytes > 0) {
> -			/*
> -			 * Round i_bytes up to a word boundary.
> -			 * The underlying memory is guaranteed
> -			 * to be there by xfs_idata_realloc().
> -			 */
> -			data_bytes = roundup(ip->i_afp->if_bytes, 4);
>  			ASSERT(ip->i_afp->if_u1.if_data != NULL);
>  			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
>  					ip->i_afp->if_u1.if_data,
> -					data_bytes);
> -			ilf->ilf_asize = (unsigned)data_bytes;
> +					ip->i_afp->if_bytes);
> +			ilf->ilf_asize = (unsigned)ip->i_afp->if_bytes;
>  			ilf->ilf_size++;
>  		} else {
>  			iip->ili_fields &= ~XFS_ILOG_ADATA;
> diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
> index 6d44f5fd6d7e..d28ffaebd067 100644
> --- a/fs/xfs/xfs_inode_item_recover.c
> +++ b/fs/xfs/xfs_inode_item_recover.c
> @@ -462,7 +462,7 @@ xlog_recover_inode_commit_pass2(
>  	ASSERT(in_f->ilf_size <= 4);
>  	ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
>  	ASSERT(!(fields & XFS_ILOG_DFORK) ||
> -	       (len == in_f->ilf_dsize));
> +	       (len == xlog_calc_iovec_len(in_f->ilf_dsize)));
>  
>  	switch (fields & XFS_ILOG_DFORK) {
>  	case XFS_ILOG_DDATA:
> @@ -497,7 +497,7 @@ xlog_recover_inode_commit_pass2(
>  		}
>  		len = item->ri_buf[attr_index].i_len;
>  		src = item->ri_buf[attr_index].i_addr;
> -		ASSERT(len == in_f->ilf_asize);
> +		ASSERT(len == xlog_calc_iovec_len(in_f->ilf_asize));
>  
>  		switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
>  		case XFS_ILOG_ADATA:
> diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
> index 8dafe8f771c7..d9aebca3f971 100644
> --- a/fs/xfs/xfs_log.h
> +++ b/fs/xfs/xfs_log.h
> @@ -21,6 +21,17 @@ struct xfs_log_vec {
>  
>  #define XFS_LOG_VEC_ORDERED	(-1)
>  
> +/*
> + * Calculate the log iovec length for a given user buffer length. Intended to be
> + * used by ->iop_size implementations when sizing buffers of arbitrary
> + * alignments.
> + */
> +static inline int
> +xlog_calc_iovec_len(int len)
> +{
> +	return roundup(len, 4);
> +}
> +
>  void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
>  		uint type);
>  
> @@ -29,6 +40,12 @@ xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
>  {
>  	struct xlog_op_header	*oph = vec->i_addr;
>  
> +	/*
> +	 * Always round up the length to the correct alignment so callers don't
> +	 * need to know anything about this log vec layout requirement.
> +	 */
> +	len = xlog_calc_iovec_len(len);
> +
>  	/* opheader tracks payload length, logvec tracks region length */
>  	oph->oh_len = cpu_to_be32(len);
>  
> @@ -36,8 +53,14 @@ xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
>  	lv->lv_buf_len += len;
>  	lv->lv_bytes += len;
>  	vec->i_len = len;
> +
> +	/* Catch buffer overruns */
> +	ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lv + lv->lv_size);
>  }
>  
> +/*
> + * Copy the amount of data requested by the caller into a new log iovec.
> + */
>  static inline void *
>  xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
>  		uint type, void *data, int len)
> -- 
> 2.35.1
>
Dave Chinner April 27, 2022, 4:50 a.m. UTC | #2
On Tue, Apr 26, 2022 at 08:14:45PM -0700, Darrick J. Wong wrote:
> On Wed, Apr 27, 2022 at 12:22:52PM +1000, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > Callers currently have to round out the size of buffers to match the
> > aligment constraints of log iovecs and xlog_write(). They should not
> > need to know this detail, so introduce a new function to calculate
> > the iovec length (for use in ->iop_size implementations). Also
> > modify xlog_finish_iovec() to round up the length to the correct
> > alignment so the callers don't need to do this, either.
> > 
> > Convert the only user - inode forks - of this alignment rounding to
> > use the new interface.
> 
> Hmm.  So currently, we require that the inode fork buffer be rounded up
> to the next 4 bytes, and then I guess the log will copy that into the
> log iovec?  IOWs, if we have a 37-byte data fork, we'll allocate a 40
> byte buffer for the xfs_ifork, and the log will copy all 40 bytes into a
> 40 byte iovec.

Yes, that's how the current code works. It ends up leaking whatever
was in those 3 bytes into the shadow buffer that we then copy into
the log region. i.e. the existing code "leaks" non-zeroed allocated
memory to the journal.

> Now it looks like we'd allocate a 37-byte buffer for the xfs_ifork, but
> the log iovec will still be 40 bytes.  So ... do we copy 37 bytes out of
> the ifork buffer and zero the last 3 bytes in the iovec?

Yes, we copy 37 bytes out of the ifork buffer now into the shadow
buffer so we do not overrun the inode fork buffer.

> Does we leak
> kernel memory in those last 3 bytes?

We does indeed still leak the remaining 3 bytes as they are not
zeroed.

> Or do we copy 40 bytes and
> overrun?

No, we definitely don't do that - KASAN gets very unhappy when you
do that...

> It sorta looks like (at least for the local format case) xlog_copy_iovec
> will copy 37 bytes and leave the last 3 bytes of the iovec in whatever
> state it was in previously.  Is that zeroed?  Because it then looks like
> xlog_finish_iovec will round that 37 up to 40.

The shadow buffer is only partially zeroed - the part that makes io
the header and iovec pointer array is zeroed, but the region that
the journal data is written to is not zeroed.

> (FWIW I'm just checking for kernel memory exposure vectors here.)

Yup, I hadn't even considered that aspect of the code because we
aren't actually leaking anything to userspace. If an unprivileged
user can read 3 bytes of uninitialised data out of the journal we've
got much, much bigger security problems to deal with.

It should be trivial to fix, though. I'll do the initial fix as a
standalone patch, though, and then roll it into this one because the
problem has been around for a long while and fixing this patch
doesn't produce an easily backportable fix...

Cheers,

Dave.
Darrick J. Wong April 27, 2022, 4:45 p.m. UTC | #3
On Wed, Apr 27, 2022 at 02:50:34PM +1000, Dave Chinner wrote:
> On Tue, Apr 26, 2022 at 08:14:45PM -0700, Darrick J. Wong wrote:
> > On Wed, Apr 27, 2022 at 12:22:52PM +1000, Dave Chinner wrote:
> > > From: Dave Chinner <dchinner@redhat.com>
> > > 
> > > Callers currently have to round out the size of buffers to match the
> > > aligment constraints of log iovecs and xlog_write(). They should not
> > > need to know this detail, so introduce a new function to calculate
> > > the iovec length (for use in ->iop_size implementations). Also
> > > modify xlog_finish_iovec() to round up the length to the correct
> > > alignment so the callers don't need to do this, either.
> > > 
> > > Convert the only user - inode forks - of this alignment rounding to
> > > use the new interface.
> > 
> > Hmm.  So currently, we require that the inode fork buffer be rounded up
> > to the next 4 bytes, and then I guess the log will copy that into the
> > log iovec?  IOWs, if we have a 37-byte data fork, we'll allocate a 40
> > byte buffer for the xfs_ifork, and the log will copy all 40 bytes into a
> > 40 byte iovec.
> 
> Yes, that's how the current code works. It ends up leaking whatever
> was in those 3 bytes into the shadow buffer that we then copy into
> the log region. i.e. the existing code "leaks" non-zeroed allocated
> memory to the journal.
> 
> > Now it looks like we'd allocate a 37-byte buffer for the xfs_ifork, but
> > the log iovec will still be 40 bytes.  So ... do we copy 37 bytes out of
> > the ifork buffer and zero the last 3 bytes in the iovec?
> 
> Yes, we copy 37 bytes out of the ifork buffer now into the shadow
> buffer so we do not overrun the inode fork buffer.
> 
> > Does we leak
> > kernel memory in those last 3 bytes?
> 
> We does indeed still leak the remaining 3 bytes as they are not
> zeroed.
> 
> > Or do we copy 40 bytes and
> > overrun?
> 
> No, we definitely don't do that - KASAN gets very unhappy when you
> do that...
> 
> > It sorta looks like (at least for the local format case) xlog_copy_iovec
> > will copy 37 bytes and leave the last 3 bytes of the iovec in whatever
> > state it was in previously.  Is that zeroed?  Because it then looks like
> > xlog_finish_iovec will round that 37 up to 40.
> 
> The shadow buffer is only partially zeroed - the part that makes io
> the header and iovec pointer array is zeroed, but the region that
> the journal data is written to is not zeroed.
> 
> > (FWIW I'm just checking for kernel memory exposure vectors here.)
> 
> Yup, I hadn't even considered that aspect of the code because we
> aren't actually leaking anything to userspace. If an unprivileged
> user can read 3 bytes of uninitialised data out of the journal we've
> got much, much bigger security problems to deal with.
> 
> It should be trivial to fix, though. I'll do the initial fix as a
> standalone patch, though, and then roll it into this one because the
> problem has been around for a long while and fixing this patch
> doesn't produce an easily backportable fix...

<nod> I agree that it's a very minor disclosure vulnerability (certainly
less severe than ALLOCSP) since you'd need CAP_SYS_RAWIO to exploit it.
But certainly worth patching before someone discovers that a former
pagecache page with your credit card numbers on it got recycled into a
log vector page.  Thanks for doing the fix. :)

--D

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
Christoph Hellwig April 28, 2022, 1 p.m. UTC | #4
Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 9aee4a1e2fe9..1a4cdf550f6d 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -36,7 +36,7 @@  xfs_init_local_fork(
 	int64_t			size)
 {
 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
-	int			mem_size = size, real_size = 0;
+	int			mem_size = size;
 	bool			zero_terminate;
 
 	/*
@@ -50,8 +50,7 @@  xfs_init_local_fork(
 		mem_size++;
 
 	if (size) {
-		real_size = roundup(mem_size, 4);
-		ifp->if_u1.if_data = kmem_alloc(real_size, KM_NOFS);
+		ifp->if_u1.if_data = kmem_alloc(mem_size, KM_NOFS);
 		memcpy(ifp->if_u1.if_data, data, size);
 		if (zero_terminate)
 			ifp->if_u1.if_data[size] = '\0';
@@ -497,12 +496,7 @@  xfs_idata_realloc(
 		return;
 	}
 
-	/*
-	 * For inline data, the underlying buffer must be a multiple of 4 bytes
-	 * in size so that it can be logged and stay on word boundaries.
-	 * We enforce that here.
-	 */
-	ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, roundup(new_size, 4),
+	ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, new_size,
 				      GFP_NOFS | __GFP_NOFAIL);
 	ifp->if_bytes = new_size;
 }
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 00733a18ccdc..721def0639fd 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -71,7 +71,7 @@  xfs_inode_item_data_fork_size(
 	case XFS_DINODE_FMT_LOCAL:
 		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
 		    ip->i_df.if_bytes > 0) {
-			*nbytes += roundup(ip->i_df.if_bytes, 4);
+			*nbytes += xlog_calc_iovec_len(ip->i_df.if_bytes);
 			*nvecs += 1;
 		}
 		break;
@@ -112,7 +112,7 @@  xfs_inode_item_attr_fork_size(
 	case XFS_DINODE_FMT_LOCAL:
 		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
 		    ip->i_afp->if_bytes > 0) {
-			*nbytes += roundup(ip->i_afp->if_bytes, 4);
+			*nbytes += xlog_calc_iovec_len(ip->i_afp->if_bytes);
 			*nvecs += 1;
 		}
 		break;
@@ -204,17 +204,12 @@  xfs_inode_item_format_data_fork(
 			~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
 		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
 		    ip->i_df.if_bytes > 0) {
-			/*
-			 * Round i_bytes up to a word boundary.
-			 * The underlying memory is guaranteed
-			 * to be there by xfs_idata_realloc().
-			 */
-			data_bytes = roundup(ip->i_df.if_bytes, 4);
 			ASSERT(ip->i_df.if_u1.if_data != NULL);
 			ASSERT(ip->i_disk_size > 0);
 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
-					ip->i_df.if_u1.if_data, data_bytes);
-			ilf->ilf_dsize = (unsigned)data_bytes;
+					ip->i_df.if_u1.if_data,
+					ip->i_df.if_bytes);
+			ilf->ilf_dsize = (unsigned)ip->i_df.if_bytes;
 			ilf->ilf_size++;
 		} else {
 			iip->ili_fields &= ~XFS_ILOG_DDATA;
@@ -288,17 +283,11 @@  xfs_inode_item_format_attr_fork(
 
 		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
 		    ip->i_afp->if_bytes > 0) {
-			/*
-			 * Round i_bytes up to a word boundary.
-			 * The underlying memory is guaranteed
-			 * to be there by xfs_idata_realloc().
-			 */
-			data_bytes = roundup(ip->i_afp->if_bytes, 4);
 			ASSERT(ip->i_afp->if_u1.if_data != NULL);
 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
 					ip->i_afp->if_u1.if_data,
-					data_bytes);
-			ilf->ilf_asize = (unsigned)data_bytes;
+					ip->i_afp->if_bytes);
+			ilf->ilf_asize = (unsigned)ip->i_afp->if_bytes;
 			ilf->ilf_size++;
 		} else {
 			iip->ili_fields &= ~XFS_ILOG_ADATA;
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index 6d44f5fd6d7e..d28ffaebd067 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -462,7 +462,7 @@  xlog_recover_inode_commit_pass2(
 	ASSERT(in_f->ilf_size <= 4);
 	ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
 	ASSERT(!(fields & XFS_ILOG_DFORK) ||
-	       (len == in_f->ilf_dsize));
+	       (len == xlog_calc_iovec_len(in_f->ilf_dsize)));
 
 	switch (fields & XFS_ILOG_DFORK) {
 	case XFS_ILOG_DDATA:
@@ -497,7 +497,7 @@  xlog_recover_inode_commit_pass2(
 		}
 		len = item->ri_buf[attr_index].i_len;
 		src = item->ri_buf[attr_index].i_addr;
-		ASSERT(len == in_f->ilf_asize);
+		ASSERT(len == xlog_calc_iovec_len(in_f->ilf_asize));
 
 		switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
 		case XFS_ILOG_ADATA:
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 8dafe8f771c7..d9aebca3f971 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -21,6 +21,17 @@  struct xfs_log_vec {
 
 #define XFS_LOG_VEC_ORDERED	(-1)
 
+/*
+ * Calculate the log iovec length for a given user buffer length. Intended to be
+ * used by ->iop_size implementations when sizing buffers of arbitrary
+ * alignments.
+ */
+static inline int
+xlog_calc_iovec_len(int len)
+{
+	return roundup(len, 4);
+}
+
 void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
 		uint type);
 
@@ -29,6 +40,12 @@  xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
 {
 	struct xlog_op_header	*oph = vec->i_addr;
 
+	/*
+	 * Always round up the length to the correct alignment so callers don't
+	 * need to know anything about this log vec layout requirement.
+	 */
+	len = xlog_calc_iovec_len(len);
+
 	/* opheader tracks payload length, logvec tracks region length */
 	oph->oh_len = cpu_to_be32(len);
 
@@ -36,8 +53,14 @@  xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
 	lv->lv_buf_len += len;
 	lv->lv_bytes += len;
 	vec->i_len = len;
+
+	/* Catch buffer overruns */
+	ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lv + lv->lv_size);
 }
 
+/*
+ * Copy the amount of data requested by the caller into a new log iovec.
+ */
 static inline void *
 xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
 		uint type, void *data, int len)