diff mbox series

[2/2] iomap: move the iomap_dio_rw ->end_io callback into a structure

Message ID 20190903130327.6023-3-hch@lst.de (mailing list archive)
State Accepted
Headers show
Series [1/2] iomap: split size and error for iomap_dio_rw ->end_io | expand

Commit Message

Christoph Hellwig Sept. 3, 2019, 1:03 p.m. UTC
Add a new iomap_dio_ops structure that for now just contains the end_io
handler.  This avoid storing the function pointer in a mutable structure,
which is a possible exploit vector for kernel code execution, and prepares
for adding a submit_io handler that btrfs needs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap/direct-io.c  | 21 ++++++++++-----------
 fs/xfs/xfs_file.c     |  6 +++++-
 include/linux/iomap.h | 10 +++++++---
 3 files changed, 22 insertions(+), 15 deletions(-)

Comments

Darrick J. Wong Sept. 3, 2019, 2:46 p.m. UTC | #1
On Tue, Sep 03, 2019 at 03:03:27PM +0200, Christoph Hellwig wrote:
> Add a new iomap_dio_ops structure that for now just contains the end_io
> handler.  This avoid storing the function pointer in a mutable structure,
> which is a possible exploit vector for kernel code execution, and prepares
> for adding a submit_io handler that btrfs needs.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks ok,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/iomap/direct-io.c  | 21 ++++++++++-----------
>  fs/xfs/xfs_file.c     |  6 +++++-
>  include/linux/iomap.h | 10 +++++++---
>  3 files changed, 22 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 2ccf1c6460d4..1fc28c2da279 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -24,7 +24,7 @@
>  
>  struct iomap_dio {
>  	struct kiocb		*iocb;
> -	iomap_dio_end_io_t	*end_io;
> +	const struct iomap_dio_ops *dops;
>  	loff_t			i_size;
>  	loff_t			size;
>  	atomic_t		ref;
> @@ -72,15 +72,14 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
>  
>  static ssize_t iomap_dio_complete(struct iomap_dio *dio)
>  {
> +	const struct iomap_dio_ops *dops = dio->dops;
>  	struct kiocb *iocb = dio->iocb;
>  	struct inode *inode = file_inode(iocb->ki_filp);
>  	loff_t offset = iocb->ki_pos;
> -	ssize_t ret;
> +	ssize_t ret = dio->error;
>  
> -	if (dio->end_io)
> -		ret = dio->end_io(iocb, dio->size, dio->error, dio->flags);
> -	else
> -		ret = dio->error;
> +	if (dops && dops->end_io)
> +		ret = dops->end_io(iocb, dio->size, ret, dio->flags);
>  
>  	if (likely(!ret)) {
>  		ret = dio->size;
> @@ -98,9 +97,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
>  	 * one is a pretty crazy thing to do, so we don't support it 100%.  If
>  	 * this invalidation fails, tough, the write still worked...
>  	 *
> -	 * And this page cache invalidation has to be after dio->end_io(), as
> -	 * some filesystems convert unwritten extents to real allocations in
> -	 * end_io() when necessary, otherwise a racing buffer read would cache
> +	 * And this page cache invalidation has to be after ->end_io(), as some
> +	 * filesystems convert unwritten extents to real allocations in
> +	 * ->end_io() when necessary, otherwise a racing buffer read would cache
>  	 * zeros from unwritten extents.
>  	 */
>  	if (!dio->error &&
> @@ -393,7 +392,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
>   */
>  ssize_t
>  iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
> -		const struct iomap_ops *ops, iomap_dio_end_io_t end_io)
> +		const struct iomap_ops *ops, const struct iomap_dio_ops *dops)
>  {
>  	struct address_space *mapping = iocb->ki_filp->f_mapping;
>  	struct inode *inode = file_inode(iocb->ki_filp);
> @@ -418,7 +417,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  	atomic_set(&dio->ref, 1);
>  	dio->size = 0;
>  	dio->i_size = i_size_read(inode);
> -	dio->end_io = end_io;
> +	dio->dops = dops;
>  	dio->error = 0;
>  	dio->flags = 0;
>  
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 3d8e6db9ef77..1ffb179f35d2 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -443,6 +443,10 @@ xfs_dio_write_end_io(
>  	return error;
>  }
>  
> +static const struct iomap_dio_ops xfs_dio_write_ops = {
> +	.end_io		= xfs_dio_write_end_io,
> +};
> +
>  /*
>   * xfs_file_dio_aio_write - handle direct IO writes
>   *
> @@ -543,7 +547,7 @@ xfs_file_dio_aio_write(
>  	}
>  
>  	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
> -	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
> +	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops);
>  
>  	/*
>  	 * If unaligned, this is the only IO in-flight. If it has not yet
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 50bb746d2216..7aa5d6117936 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -188,10 +188,14 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
>   */
>  #define IOMAP_DIO_UNWRITTEN	(1 << 0)	/* covers unwritten extent(s) */
>  #define IOMAP_DIO_COW		(1 << 1)	/* covers COW extent(s) */
> -typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t size, int error,
> -				 unsigned int flags);
> +
> +struct iomap_dio_ops {
> +	int (*end_io)(struct kiocb *iocb, ssize_t size, int error,
> +		      unsigned flags);
> +};
> +
>  ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
> -		const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
> +		const struct iomap_ops *ops, const struct iomap_dio_ops *dops);
>  int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
>  
>  #ifdef CONFIG_SWAP
> -- 
> 2.20.1
>
Matthew Wilcox (Oracle) Sept. 3, 2019, 4:14 p.m. UTC | #2
On Tue, Sep 03, 2019 at 03:03:27PM +0200, Christoph Hellwig wrote:
> Add a new iomap_dio_ops structure that for now just contains the end_io
> handler.  This avoid storing the function pointer in a mutable structure,
> which is a possible exploit vector for kernel code execution, and prepares
> for adding a submit_io handler that btrfs needs.

Is it really a security win?  If I can overwrite dio->end_io, I can as
well overwrite dio->dops.

The patch itself looks sane, but I'm not sure about this particular reason.
Christoph Hellwig Sept. 4, 2019, 12:51 p.m. UTC | #3
On Tue, Sep 03, 2019 at 09:14:46AM -0700, Matthew Wilcox wrote:
> On Tue, Sep 03, 2019 at 03:03:27PM +0200, Christoph Hellwig wrote:
> > Add a new iomap_dio_ops structure that for now just contains the end_io
> > handler.  This avoid storing the function pointer in a mutable structure,
> > which is a possible exploit vector for kernel code execution, and prepares
> > for adding a submit_io handler that btrfs needs.
> 
> Is it really a security win?  If I can overwrite dio->end_io, I can as
> well overwrite dio->dops.

Which you'd then need to point to another place where you can stuff
function pointer.  Not impossible, but just another hoop to jump
through.  At least until we add run-time checks that ops structures
are in read-only memory, which sounds more sensible than some of the
other security hardening patches floating around.
diff mbox series

Patch

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 2ccf1c6460d4..1fc28c2da279 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -24,7 +24,7 @@ 
 
 struct iomap_dio {
 	struct kiocb		*iocb;
-	iomap_dio_end_io_t	*end_io;
+	const struct iomap_dio_ops *dops;
 	loff_t			i_size;
 	loff_t			size;
 	atomic_t		ref;
@@ -72,15 +72,14 @@  static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
 
 static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 {
+	const struct iomap_dio_ops *dops = dio->dops;
 	struct kiocb *iocb = dio->iocb;
 	struct inode *inode = file_inode(iocb->ki_filp);
 	loff_t offset = iocb->ki_pos;
-	ssize_t ret;
+	ssize_t ret = dio->error;
 
-	if (dio->end_io)
-		ret = dio->end_io(iocb, dio->size, dio->error, dio->flags);
-	else
-		ret = dio->error;
+	if (dops && dops->end_io)
+		ret = dops->end_io(iocb, dio->size, ret, dio->flags);
 
 	if (likely(!ret)) {
 		ret = dio->size;
@@ -98,9 +97,9 @@  static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 	 * one is a pretty crazy thing to do, so we don't support it 100%.  If
 	 * this invalidation fails, tough, the write still worked...
 	 *
-	 * And this page cache invalidation has to be after dio->end_io(), as
-	 * some filesystems convert unwritten extents to real allocations in
-	 * end_io() when necessary, otherwise a racing buffer read would cache
+	 * And this page cache invalidation has to be after ->end_io(), as some
+	 * filesystems convert unwritten extents to real allocations in
+	 * ->end_io() when necessary, otherwise a racing buffer read would cache
 	 * zeros from unwritten extents.
 	 */
 	if (!dio->error &&
@@ -393,7 +392,7 @@  iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
  */
 ssize_t
 iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
-		const struct iomap_ops *ops, iomap_dio_end_io_t end_io)
+		const struct iomap_ops *ops, const struct iomap_dio_ops *dops)
 {
 	struct address_space *mapping = iocb->ki_filp->f_mapping;
 	struct inode *inode = file_inode(iocb->ki_filp);
@@ -418,7 +417,7 @@  iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	atomic_set(&dio->ref, 1);
 	dio->size = 0;
 	dio->i_size = i_size_read(inode);
-	dio->end_io = end_io;
+	dio->dops = dops;
 	dio->error = 0;
 	dio->flags = 0;
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 3d8e6db9ef77..1ffb179f35d2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -443,6 +443,10 @@  xfs_dio_write_end_io(
 	return error;
 }
 
+static const struct iomap_dio_ops xfs_dio_write_ops = {
+	.end_io		= xfs_dio_write_end_io,
+};
+
 /*
  * xfs_file_dio_aio_write - handle direct IO writes
  *
@@ -543,7 +547,7 @@  xfs_file_dio_aio_write(
 	}
 
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
-	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
+	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops);
 
 	/*
 	 * If unaligned, this is the only IO in-flight. If it has not yet
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 50bb746d2216..7aa5d6117936 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -188,10 +188,14 @@  sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
  */
 #define IOMAP_DIO_UNWRITTEN	(1 << 0)	/* covers unwritten extent(s) */
 #define IOMAP_DIO_COW		(1 << 1)	/* covers COW extent(s) */
-typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t size, int error,
-				 unsigned int flags);
+
+struct iomap_dio_ops {
+	int (*end_io)(struct kiocb *iocb, ssize_t size, int error,
+		      unsigned flags);
+};
+
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
-		const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
+		const struct iomap_ops *ops, const struct iomap_dio_ops *dops);
 int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
 
 #ifdef CONFIG_SWAP