[v6,1/7] fscrypt: Add functions for direct I/O support
diff mbox series

Message ID 20200724184501.1651378-2-satyat@google.com
State New
Headers show
Series
  • add support for direct I/O with fscrypt using blk-crypto
Related show

Commit Message

Satya Tangirala July 24, 2020, 6:44 p.m. UTC
From: Eric Biggers <ebiggers@google.com>

Introduce fscrypt_dio_supported() to check whether a direct I/O request
is unsupported due to encryption constraints.

Also introduce fscrypt_limit_io_blocks() to limit how many blocks can be
added to a bio being prepared for direct I/O. This is needed for
filesystems that use the iomap direct I/O implementation to avoid DUN
wraparound in the middle of a bio (which is possible with the
IV_INO_LBLK_32 IV generation method). Elsewhere fscrypt_mergeable_bio()
is used for this, but iomap operates on logical ranges directly, so
filesystems using iomap won't have a chance to call fscrypt_mergeable_bio()
on every block added to a bio. So we need this function which limits a
logical range in one go.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Co-developed-by: Satya Tangirala <satyat@google.com>
Signed-off-by: Satya Tangirala <satyat@google.com>
---
 fs/crypto/crypto.c       |  8 +++++
 fs/crypto/inline_crypt.c | 74 ++++++++++++++++++++++++++++++++++++++++
 include/linux/fscrypt.h  | 18 ++++++++++
 3 files changed, 100 insertions(+)

Comments

Dave Chinner July 25, 2020, 12:14 a.m. UTC | #1
On Fri, Jul 24, 2020 at 06:44:55PM +0000, Satya Tangirala wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> Introduce fscrypt_dio_supported() to check whether a direct I/O request
> is unsupported due to encryption constraints.
> 
> Also introduce fscrypt_limit_io_blocks() to limit how many blocks can be
> added to a bio being prepared for direct I/O. This is needed for
> filesystems that use the iomap direct I/O implementation to avoid DUN
> wraparound in the middle of a bio (which is possible with the
> IV_INO_LBLK_32 IV generation method). Elsewhere fscrypt_mergeable_bio()
> is used for this, but iomap operates on logical ranges directly, so
> filesystems using iomap won't have a chance to call fscrypt_mergeable_bio()
> on every block added to a bio. So we need this function which limits a
> logical range in one go.
> 
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> Co-developed-by: Satya Tangirala <satyat@google.com>
> Signed-off-by: Satya Tangirala <satyat@google.com>
> ---
>  fs/crypto/crypto.c       |  8 +++++
>  fs/crypto/inline_crypt.c | 74 ++++++++++++++++++++++++++++++++++++++++
>  include/linux/fscrypt.h  | 18 ++++++++++
>  3 files changed, 100 insertions(+)
> 
> diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
> index 9212325763b0..f72f22a718b2 100644
> --- a/fs/crypto/crypto.c
> +++ b/fs/crypto/crypto.c
> @@ -69,6 +69,14 @@ void fscrypt_free_bounce_page(struct page *bounce_page)
>  }
>  EXPORT_SYMBOL(fscrypt_free_bounce_page);
>  
> +/*
> + * Generate the IV for the given logical block number within the given file.
> + * For filenames encryption, lblk_num == 0.
> + *
> + * Keep this in sync with fscrypt_limit_io_blocks().  fscrypt_limit_io_blocks()
> + * needs to know about any IV generation methods where the low bits of IV don't
> + * simply contain the lblk_num (e.g., IV_INO_LBLK_32).
> + */
>  void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
>  			 const struct fscrypt_info *ci)
>  {
> diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c
> index d7aecadf33c1..4cdf807b89b9 100644
> --- a/fs/crypto/inline_crypt.c
> +++ b/fs/crypto/inline_crypt.c
> @@ -16,6 +16,7 @@
>  #include <linux/blkdev.h>
>  #include <linux/buffer_head.h>
>  #include <linux/sched/mm.h>
> +#include <linux/uio.h>
>  
>  #include "fscrypt_private.h"
>  
> @@ -362,3 +363,76 @@ bool fscrypt_mergeable_bio_bh(struct bio *bio,
>  	return fscrypt_mergeable_bio(bio, inode, next_lblk);
>  }
>  EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio_bh);
> +
> +/**
> + * fscrypt_dio_supported() - check whether a direct I/O request is unsupported
> + *			     due to encryption constraints
> + * @iocb: the file and position the I/O is targeting
> + * @iter: the I/O data segment(s)
> + *
> + * Return: true if direct I/O is supported
> + */
> +bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
> +{
> +	const struct inode *inode = file_inode(iocb->ki_filp);
> +	const unsigned int blocksize = i_blocksize(inode);
> +
> +	/* If the file is unencrypted, no veto from us. */
> +	if (!fscrypt_needs_contents_encryption(inode))
> +		return true;
> +
> +	/* We only support direct I/O with inline crypto, not fs-layer crypto */
> +	if (!fscrypt_inode_uses_inline_crypto(inode))
> +		return false;
> +
> +	/*
> +	 * Since the granularity of encryption is filesystem blocks, the I/O
> +	 * must be block aligned -- not just disk sector aligned.
> +	 */
> +	if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), blocksize))
> +		return false;

Doesn't this force user buffers to be filesystem block size aligned,
instead of 512 byte aligned as is typical for direct IO?

That's going to cause applications that work fine on normal
filesystems becaues the memalign() buffers to 512 bytes or logical
block device sector sizes (as per the open(2) man page) to fail on
encrypted volumes, and it's not going to be obvious to users as to
why this happens.

XFS has XFS_IOC_DIOINFO to expose exactly this information to
userspace on a per-file basis. Other filesystem and VFS developers
have said for the past 15 years "we don't need no stinking DIOINFO".
The same people shot down adding optional IO alignment
constraint fields to statx() a few years ago, too.

Yet here were are again, with alignment of DIO buffers being an
issue that userspace needs to know about....

Cheers,

Dave.
Eric Biggers July 26, 2020, 2:49 a.m. UTC | #2
On Sat, Jul 25, 2020 at 10:14:41AM +1000, Dave Chinner wrote:
> > +bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
> > +{
> > +	const struct inode *inode = file_inode(iocb->ki_filp);
> > +	const unsigned int blocksize = i_blocksize(inode);
> > +
> > +	/* If the file is unencrypted, no veto from us. */
> > +	if (!fscrypt_needs_contents_encryption(inode))
> > +		return true;
> > +
> > +	/* We only support direct I/O with inline crypto, not fs-layer crypto */
> > +	if (!fscrypt_inode_uses_inline_crypto(inode))
> > +		return false;
> > +
> > +	/*
> > +	 * Since the granularity of encryption is filesystem blocks, the I/O
> > +	 * must be block aligned -- not just disk sector aligned.
> > +	 */
> > +	if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), blocksize))
> > +		return false;
> 
> Doesn't this force user buffers to be filesystem block size aligned,
> instead of 512 byte aligned as is typical for direct IO?
> 
> That's going to cause applications that work fine on normal
> filesystems becaues the memalign() buffers to 512 bytes or logical
> block device sector sizes (as per the open(2) man page) to fail on
> encrypted volumes, and it's not going to be obvious to users as to
> why this happens.

The status quo is that direct I/O on encrypted files falls back to buffered I/O.

So this patch is strictly an improvement; it's making direct I/O work in a case
where previously it didn't work.

> 
> XFS has XFS_IOC_DIOINFO to expose exactly this information to
> userspace on a per-file basis. Other filesystem and VFS developers
> have said for the past 15 years "we don't need no stinking DIOINFO".
> The same people shot down adding optional IO alignment
> constraint fields to statx() a few years ago, too.
> 
> Yet here were are again, with alignment of DIO buffers being an
> issue that userspace needs to know about....
> 

A DIOINFO ioctl sounds like a good idea to me, although I'm not familiar with
previous discussions about it.

Note that there are lots of other cases where ext4 and f2fs fall back to
buffered I/O; see ext4_dio_supported() and f2fs_force_buffered_io().  So this
isn't a new problem.

- Eric
Dave Chinner July 27, 2020, 12:58 a.m. UTC | #3
On Sat, Jul 25, 2020 at 07:49:20PM -0700, Eric Biggers wrote:
> On Sat, Jul 25, 2020 at 10:14:41AM +1000, Dave Chinner wrote:
> > > +bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
> > > +{
> > > +	const struct inode *inode = file_inode(iocb->ki_filp);
> > > +	const unsigned int blocksize = i_blocksize(inode);
> > > +
> > > +	/* If the file is unencrypted, no veto from us. */
> > > +	if (!fscrypt_needs_contents_encryption(inode))
> > > +		return true;
> > > +
> > > +	/* We only support direct I/O with inline crypto, not fs-layer crypto */
> > > +	if (!fscrypt_inode_uses_inline_crypto(inode))
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * Since the granularity of encryption is filesystem blocks, the I/O
> > > +	 * must be block aligned -- not just disk sector aligned.
> > > +	 */
> > > +	if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), blocksize))
> > > +		return false;
> > 
> > Doesn't this force user buffers to be filesystem block size aligned,
> > instead of 512 byte aligned as is typical for direct IO?
> > 
> > That's going to cause applications that work fine on normal
> > filesystems becaues the memalign() buffers to 512 bytes or logical
> > block device sector sizes (as per the open(2) man page) to fail on
> > encrypted volumes, and it's not going to be obvious to users as to
> > why this happens.
> 
> The status quo is that direct I/O on encrypted files falls back to buffered I/O.

Largely irrelevant.

You claimed in another thread that performance is a key feature that
inline encryption + DIO provides. Now you're implying that failing
to provide that performance doesn't really matter at all.

> So this patch is strictly an improvement; it's making direct I/O work in a case
> where previously it didn't work.

Improvements still need to follow longstanding conventions. And,
IMO, it's not an improvement if the feature results in 
unpredictable performance for userspace applications.

i.e. there is no point in enabling direct IO if it is unpredictably
going to fall back to the buffered IO path when applications are
coded to the guidelines the man page said they should use. Such
problems are an utter PITA to diagnose in the field, and on those
grounds alone the current implementation gets a NACK.

> Note that there are lots of other cases where ext4 and f2fs fall back to
> buffered I/O; see ext4_dio_supported() and f2fs_force_buffered_io().  So this
> isn't a new problem.

No shit, sherlock. But that's also irrelevant to the discussion at
hand - claiming "we can fall back to buffered IO" doesn't address
the problem I've raised. It's just an excuse for not fixing it.

Indeed, the problem is easy to fix - fscrypt only cares that the
user IO offset and length is DUN aligned.  fscrypt does not care
that the user memory buffer is filesystem block aligned - user
memory buffer alignment is an underlying hardware DMA constraint -
and so fscrypt_dio_supported() needs to relax or remove the user
memroy buffer alignment constraint so that it follows existing
conventions....

Cheers,

Dave.
Eric Biggers July 27, 2020, 2:59 a.m. UTC | #4
On Mon, Jul 27, 2020 at 10:58:48AM +1000, Dave Chinner wrote:
> On Sat, Jul 25, 2020 at 07:49:20PM -0700, Eric Biggers wrote:
> > On Sat, Jul 25, 2020 at 10:14:41AM +1000, Dave Chinner wrote:
> > > > +bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
> > > > +{
> > > > +	const struct inode *inode = file_inode(iocb->ki_filp);
> > > > +	const unsigned int blocksize = i_blocksize(inode);
> > > > +
> > > > +	/* If the file is unencrypted, no veto from us. */
> > > > +	if (!fscrypt_needs_contents_encryption(inode))
> > > > +		return true;
> > > > +
> > > > +	/* We only support direct I/O with inline crypto, not fs-layer crypto */
> > > > +	if (!fscrypt_inode_uses_inline_crypto(inode))
> > > > +		return false;
> > > > +
> > > > +	/*
> > > > +	 * Since the granularity of encryption is filesystem blocks, the I/O
> > > > +	 * must be block aligned -- not just disk sector aligned.
> > > > +	 */
> > > > +	if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), blocksize))
> > > > +		return false;
> > > 
> > > Doesn't this force user buffers to be filesystem block size aligned,
> > > instead of 512 byte aligned as is typical for direct IO?
> > > 
> > > That's going to cause applications that work fine on normal
> > > filesystems becaues the memalign() buffers to 512 bytes or logical
> > > block device sector sizes (as per the open(2) man page) to fail on
> > > encrypted volumes, and it's not going to be obvious to users as to
> > > why this happens.
> > 
> > The status quo is that direct I/O on encrypted files falls back to buffered I/O.
> 
> Largely irrelevant.
> 
> You claimed in another thread that performance is a key feature that
> inline encryption + DIO provides. Now you're implying that failing
> to provide that performance doesn't really matter at all.
> 
> > So this patch is strictly an improvement; it's making direct I/O work in a case
> > where previously it didn't work.
> 
> Improvements still need to follow longstanding conventions. And,
> IMO, it's not an improvement if the feature results in 
> unpredictable performance for userspace applications.
> 
> i.e. there is no point in enabling direct IO if it is unpredictably
> going to fall back to the buffered IO path when applications are
> coded to the guidelines the man page said they should use. Such
> problems are an utter PITA to diagnose in the field, and on those
> grounds alone the current implementation gets a NACK.
> 
> > Note that there are lots of other cases where ext4 and f2fs fall back to
> > buffered I/O; see ext4_dio_supported() and f2fs_force_buffered_io().  So this
> > isn't a new problem.
> 
> No shit, sherlock. But that's also irrelevant to the discussion at
> hand - claiming "we can fall back to buffered IO" doesn't address
> the problem I've raised. It's just an excuse for not fixing it.

Actually we never specifically discussed the motivation for DIO on encrypted
files, but yes there are some specific applications that need it for performance
reasons (e.g., zram writeback to a loop device backed by an encrypted file), as
well as benchmarking applications.  These applications aren't expected to have
much trouble (if any) dealing with a fs blocksize alignment requirement.

We always try to make encrypted files behave just like unencrypted files, but
sometimes it's just not possible to do so.  We document the exceptions in
Documentation/filesystems/fscrypt.rst, which this patchset updates to document
the conditions for direct I/O working.  Note that these conditions include more
than just the alignment requirement.

The open() man page does mention that O_DIRECT I/O typically needs to be aligned
to logical_block_size; however it also says "In Linux alignment restrictions
vary by filesystem and kernel version and might be absent entirely."

The other examples of falling back to buffered I/O are relevant, since they show
that similar issues are already being dealt with in the (rare) use cases of
O_DIRECT.  So I don't think the convention is as strong as you think it is...

> Indeed, the problem is easy to fix - fscrypt only cares that the
> user IO offset and length is DUN aligned.  fscrypt does not care
> that the user memory buffer is filesystem block aligned - user
> memory buffer alignment is an underlying hardware DMA constraint -
> and so fscrypt_dio_supported() needs to relax or remove the user
> memroy buffer alignment constraint so that it follows existing
> conventions....

Relaxing the user buffer alignment requirement would mean that a single
encryption data unit could be discontiguous in memory.  I'm not sure that's
allowed -- it *might* be, but we'd have to verify it on every vendor's inline
encryption hardware, as well as handle this case in block/blk-crypto-fallback.c.
It's much easier to just require proper alignment.

Also, would relaxing the user buffer alignment really address your concern,
given that the file offset and length would still have to be fs-block aligned?
Applications might also align the offset and length to logical_block_size only.

So I don't see how this is "easy to fix" at all, other than by limiting direct
I/O support to data_unit_size == logical_block_size (which we could do for now
if it gets you to stop nacking the DIO patches, though I'm pretty sure that
restriction won't work for some people so would need to be re-visited later...).

- Eric
Dave Chinner July 27, 2020, 4:47 a.m. UTC | #5
On Sun, Jul 26, 2020 at 07:59:46PM -0700, Eric Biggers wrote:
> On Mon, Jul 27, 2020 at 10:58:48AM +1000, Dave Chinner wrote:
> > On Sat, Jul 25, 2020 at 07:49:20PM -0700, Eric Biggers wrote:
> > > On Sat, Jul 25, 2020 at 10:14:41AM +1000, Dave Chinner wrote:
> > > > > +bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
> > > > > +{
> > > > > +	const struct inode *inode = file_inode(iocb->ki_filp);
> > > > > +	const unsigned int blocksize = i_blocksize(inode);
> > > > > +
> > > > > +	/* If the file is unencrypted, no veto from us. */
> > > > > +	if (!fscrypt_needs_contents_encryption(inode))
> > > > > +		return true;
> > > > > +
> > > > > +	/* We only support direct I/O with inline crypto, not fs-layer crypto */
> > > > > +	if (!fscrypt_inode_uses_inline_crypto(inode))
> > > > > +		return false;
> > > > > +
> > > > > +	/*
> > > > > +	 * Since the granularity of encryption is filesystem blocks, the I/O
> > > > > +	 * must be block aligned -- not just disk sector aligned.
> > > > > +	 */
> > > > > +	if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), blocksize))
> > > > > +		return false;
> > > > 
> > > > Doesn't this force user buffers to be filesystem block size aligned,
> > > > instead of 512 byte aligned as is typical for direct IO?
> > > > 
> > > > That's going to cause applications that work fine on normal
> > > > filesystems becaues the memalign() buffers to 512 bytes or logical
> > > > block device sector sizes (as per the open(2) man page) to fail on
> > > > encrypted volumes, and it's not going to be obvious to users as to
> > > > why this happens.
> > > 
> > > The status quo is that direct I/O on encrypted files falls back to buffered I/O.
> > 
> > Largely irrelevant.
> > 
> > You claimed in another thread that performance is a key feature that
> > inline encryption + DIO provides. Now you're implying that failing
> > to provide that performance doesn't really matter at all.
> > 
> > > So this patch is strictly an improvement; it's making direct I/O work in a case
> > > where previously it didn't work.
> > 
> > Improvements still need to follow longstanding conventions. And,
> > IMO, it's not an improvement if the feature results in 
> > unpredictable performance for userspace applications.
.....

> The open() man page does mention that O_DIRECT I/O typically needs to be aligned
> to logical_block_size; however it also says "In Linux alignment restrictions
> vary by filesystem and kernel version and might be absent entirely."

Now you are just language-laywering. I'll quote from the next
paragraph in the man page:

	"Since Linux 2.6.0, alignment to the logical block size of
	the underlying storage (typically 512 bytes) suffices. The
	logi cal block size can be determined using the ioctl(2)
	BLKSSZGET operation [...]"

There's the longstanding convention I've been talking about, clearly
splet out. Applications that follow this convention (and there are
lots) should just work. Having code that works correctly on one file
but not on another -on the same filesystem- despite doing all the
right things is not at all user friendly.

What I really care about is that new functionality works without
requiring applications to be rewritten to cater specifically for
some whacky foilble in a new feature.

fscrypt is generic functionality and hardware acceleration of crypt
functions are only going to get more common in future. Hence the
combination of the two needs to *play nicely* with the vast library
of existing userspace software that is already out there.

We need to get this stuff correct right from the start, otherwise
we're just leaving a huge mountain of technical debt for our future
selfs to have to clean up.

> The other examples of falling back to buffered I/O are relevant, since they show
> that similar issues are already being dealt with in the (rare) use cases of
> O_DIRECT.  So I don't think the convention is as strong as you think it is...

The convention is there so that applications that *expect* to be
using direct IO can do so -reliably-. Breaking conventions that
people have become accustomed to just because it is convenient for
you is pretty damn selfish act.

> > Indeed, the problem is easy to fix - fscrypt only cares that the
> > user IO offset and length is DUN aligned.  fscrypt does not care
> > that the user memory buffer is filesystem block aligned - user
> > memory buffer alignment is an underlying hardware DMA constraint -
> > and so fscrypt_dio_supported() needs to relax or remove the user
> > memroy buffer alignment constraint so that it follows existing
> > conventions....
> 
> Relaxing the user buffer alignment requirement would mean that a single
> encryption data unit could be discontiguous in memory. I'm not sure that's
> allowed -- it *might* be, but we'd have to verify it on every vendor's inline
> encryption hardware, as well as handle this case in block/blk-crypto-fallback.c.
> It's much easier to just require proper alignment.

If the hardware can't handle logical block size aligned DMA
addresses for any operation they might be are asked to perform, then
the hardware has not specified it's blk_queue_logical_block_size()
correctly. This is not something the fscrypt layer should be trying
to enforce - that's a massive layering violation.

Seriously, if the hardware can't support discontiguous memory
addresses for critical operations, then it needs to tell the rest of
the IO stack about these limitations that so that the higher layers
will either align things correctly or bounce buffer IOs that aren't
memory aligned properly.

> Also, would relaxing the user buffer alignment really address your concern,
> given that the file offset and length would still have to be fs-block aligned?

Isn't that exactly what I just suggested you do?

> Applications might also align the offset and length to logical_block_size only.

And so fscrypt_dio_supported() rejects them if they they don't align
to the DUN requirements of fscrypt. That's the only -fscrypt
specific restriction- on direct IO.

> So I don't see how this is "easy to fix" at all, other than by limiting direct
> I/O support to data_unit_size == logical_block_size (which we could do for now
> if it gets you to stop nacking the DIO patches,

I'm saying now because I think these things need to be fixed before
the code gets merged, not because I think they are easy to fix.

I'm just asking you layer your checks the way the rest of the
storage stack does them (i.e. user data IO constraints applied at
the fscrypt level, DMA address requirements propagate up from the
hardware) so that they behave as existing applications expect them
to.

i.e. if crypted data requires contiguous memory for the DMA, then
usrespace needs to be told that via BLKSSZGET. Filesystem mkfs apps
need to be told this so they can set up their internal block sizes
and alignments correctly. Everything that issues IO to the storage
needs to know this.

> though I'm pretty sure that
> restriction won't work for some people so would need to be re-visited later...).

That's entirely my point. You need to fix the architectural flaws
right now so we don't have to deal with trying to fix them in future
when we are limited by constraints such as "thou shalt not break
userspace"....

Cheers,

Dave.

Patch
diff mbox series

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 9212325763b0..f72f22a718b2 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -69,6 +69,14 @@  void fscrypt_free_bounce_page(struct page *bounce_page)
 }
 EXPORT_SYMBOL(fscrypt_free_bounce_page);
 
+/*
+ * Generate the IV for the given logical block number within the given file.
+ * For filenames encryption, lblk_num == 0.
+ *
+ * Keep this in sync with fscrypt_limit_io_blocks().  fscrypt_limit_io_blocks()
+ * needs to know about any IV generation methods where the low bits of IV don't
+ * simply contain the lblk_num (e.g., IV_INO_LBLK_32).
+ */
 void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
 			 const struct fscrypt_info *ci)
 {
diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c
index d7aecadf33c1..4cdf807b89b9 100644
--- a/fs/crypto/inline_crypt.c
+++ b/fs/crypto/inline_crypt.c
@@ -16,6 +16,7 @@ 
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/sched/mm.h>
+#include <linux/uio.h>
 
 #include "fscrypt_private.h"
 
@@ -362,3 +363,76 @@  bool fscrypt_mergeable_bio_bh(struct bio *bio,
 	return fscrypt_mergeable_bio(bio, inode, next_lblk);
 }
 EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio_bh);
+
+/**
+ * fscrypt_dio_supported() - check whether a direct I/O request is unsupported
+ *			     due to encryption constraints
+ * @iocb: the file and position the I/O is targeting
+ * @iter: the I/O data segment(s)
+ *
+ * Return: true if direct I/O is supported
+ */
+bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
+{
+	const struct inode *inode = file_inode(iocb->ki_filp);
+	const unsigned int blocksize = i_blocksize(inode);
+
+	/* If the file is unencrypted, no veto from us. */
+	if (!fscrypt_needs_contents_encryption(inode))
+		return true;
+
+	/* We only support direct I/O with inline crypto, not fs-layer crypto */
+	if (!fscrypt_inode_uses_inline_crypto(inode))
+		return false;
+
+	/*
+	 * Since the granularity of encryption is filesystem blocks, the I/O
+	 * must be block aligned -- not just disk sector aligned.
+	 */
+	if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), blocksize))
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(fscrypt_dio_supported);
+
+/**
+ * fscrypt_limit_io_blocks() - limit I/O blocks to avoid discontiguous DUNs
+ * @inode: the file on which I/O is being done
+ * @lblk: the block at which the I/O is being started from
+ * @nr_blocks: the number of blocks we want to submit starting at @pos
+ *
+ * Determine the limit to the number of blocks that can be submitted in the bio
+ * targeting @pos without causing a data unit number (DUN) discontinuity.
+ *
+ * This is normally just @nr_blocks, as normally the DUNs just increment along
+ * with the logical blocks.  (Or the file is not encrypted.)
+ *
+ * In rare cases, fscrypt can be using an IV generation method that allows the
+ * DUN to wrap around within logically continuous blocks, and that wraparound
+ * will occur.  If this happens, a value less than @nr_blocks will be returned
+ * so that the wraparound doesn't occur in the middle of the bio.
+ *
+ * Return: the actual number of blocks that can be submitted
+ */
+u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks)
+{
+	const struct fscrypt_info *ci = inode->i_crypt_info;
+	u32 dun;
+
+	if (!fscrypt_inode_uses_inline_crypto(inode))
+		return nr_blocks;
+
+	if (nr_blocks <= 1)
+		return nr_blocks;
+
+	if (!(fscrypt_policy_flags(&ci->ci_policy) &
+	      FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32))
+		return nr_blocks;
+
+	/* With IV_INO_LBLK_32, the DUN can wrap around from U32_MAX to 0. */
+
+	dun = ci->ci_hashed_ino + lblk;
+
+	return min_t(u64, nr_blocks, (u64)U32_MAX + 1 - dun);
+}
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index bb257411365f..5de122ec0464 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -559,6 +559,10 @@  bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
 bool fscrypt_mergeable_bio_bh(struct bio *bio,
 			      const struct buffer_head *next_bh);
 
+bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter);
+
+u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks);
+
 #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */
 
 static inline bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode)
@@ -587,6 +591,20 @@  static inline bool fscrypt_mergeable_bio_bh(struct bio *bio,
 {
 	return true;
 }
+
+static inline bool fscrypt_dio_supported(struct kiocb *iocb,
+					 struct iov_iter *iter)
+{
+	const struct inode *inode = file_inode(iocb->ki_filp);
+
+	return !fscrypt_needs_contents_encryption(inode);
+}
+
+static inline u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk,
+					  u64 nr_blocks)
+{
+	return nr_blocks;
+}
 #endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */
 
 /**