diff mbox series

[v14,08/17] splice: Do splice read from a file without using ITER_PIPE

Message ID 20230214171330.2722188-9-dhowells@redhat.com (mailing list archive)
State New
Headers show
Series iov_iter: Improve page extraction (pin or just list) | expand

Commit Message

David Howells Feb. 14, 2023, 5:13 p.m. UTC
Make generic_file_splice_read() use filemap_splice_read() and
direct_splice_read() rather than using an ITER_PIPE and call_read_iter().

With this, ITER_PIPE is no longer used.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: Christoph Hellwig <hch@lst.de>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: David Hildenbrand <david@redhat.com>
cc: John Hubbard <jhubbard@nvidia.com>
cc: linux-mm@kvack.org
cc: linux-block@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
---

Notes:
    ver #14)
    - Split out filemap_splice_read() into a separate patch.

 fs/splice.c | 30 +++++++-----------------------
 1 file changed, 7 insertions(+), 23 deletions(-)

Comments

Mezgani Ali Feb. 14, 2023, 5:24 p.m. UTC | #1
Hello Kernel,

Whom made this bad quality of code :

> -	struct iov_iter to;
> -	struct kiocb kiocb;
> -	int ret;
> -
> -	iov_iter_pipe(&to, ITER_DEST, pipe, len);
> -	init_sync_kiocb(&kiocb, in);
> -	kiocb.ki_pos = *ppos;
> -	ret = call_read_iter(in, &kiocb, &to);
> -	if (ret > 0) {
> -		*ppos = kiocb.ki_pos;
> -		file_accessed(in);
> -	} else if (ret < 0) {
> -		/* free what was emitted */
> -		pipe_discard_from(pipe, to.start_head);
> -		/*
> -		 * callers of ->splice_read() expect -EAGAIN on
> -		 * "can't put anything in there", rather than -EFAULT.
> -		 */
> -		if (ret == -EFAULT)
> -			ret = -EAGAIN;
> -	}
> -
> -	return ret;

I pretend that keeping the file clear, clean and explicit far from new comers is much more needed.

Please a cache flush is wonderful here.


Kind regards,
--
Mezgani Ali
SVP of Engineering
https://www.nativelabs.ma/
ali.mezgani@nativelabs.ma
+212 6 44 17 94 51

 






> On 14/02/2023, at 18:13, David Howells <dhowells@redhat.com> wrote:
> 
> Make generic_file_splice_read() use filemap_splice_read() and
> direct_splice_read() rather than using an ITER_PIPE and call_read_iter().
> 
> With this, ITER_PIPE is no longer used.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> cc: Jens Axboe <axboe@kernel.dk>
> cc: Christoph Hellwig <hch@lst.de>
> cc: Al Viro <viro@zeniv.linux.org.uk>
> cc: David Hildenbrand <david@redhat.com>
> cc: John Hubbard <jhubbard@nvidia.com>
> cc: linux-mm@kvack.org
> cc: linux-block@vger.kernel.org
> cc: linux-fsdevel@vger.kernel.org
> ---
> 
> Notes:
>    ver #14)
>    - Split out filemap_splice_read() into a separate patch.
> 
> fs/splice.c | 30 +++++++-----------------------
> 1 file changed, 7 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/splice.c b/fs/splice.c
> index 4c6332854b63..a93478338cec 100644
> --- a/fs/splice.c
> +++ b/fs/splice.c
> @@ -391,29 +391,13 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
> 				 struct pipe_inode_info *pipe, size_t len,
> 				 unsigned int flags)
> {
> -	struct iov_iter to;
> -	struct kiocb kiocb;
> -	int ret;
> -
> -	iov_iter_pipe(&to, ITER_DEST, pipe, len);
> -	init_sync_kiocb(&kiocb, in);
> -	kiocb.ki_pos = *ppos;
> -	ret = call_read_iter(in, &kiocb, &to);
> -	if (ret > 0) {
> -		*ppos = kiocb.ki_pos;
> -		file_accessed(in);
> -	} else if (ret < 0) {
> -		/* free what was emitted */
> -		pipe_discard_from(pipe, to.start_head);
> -		/*
> -		 * callers of ->splice_read() expect -EAGAIN on
> -		 * "can't put anything in there", rather than -EFAULT.
> -		 */
> -		if (ret == -EFAULT)
> -			ret = -EAGAIN;
> -	}
> -
> -	return ret;
> +	if (unlikely(*ppos >= file_inode(in)->i_sb->s_maxbytes))
> +		return 0;
> +	if (unlikely(!len))
> +		return 0;
> +	if (in->f_flags & O_DIRECT)
> +		return direct_splice_read(in, ppos, pipe, len, flags);
> +	return filemap_splice_read(in, ppos, pipe, len, flags);
> }
> EXPORT_SYMBOL(generic_file_splice_read);
> 
>
Ming Lei Feb. 17, 2023, 8:22 a.m. UTC | #2
On Tue, Feb 14, 2023 at 05:13:21PM +0000, David Howells wrote:
> Make generic_file_splice_read() use filemap_splice_read() and
> direct_splice_read() rather than using an ITER_PIPE and call_read_iter().
> 
> With this, ITER_PIPE is no longer used.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> cc: Jens Axboe <axboe@kernel.dk>
> cc: Christoph Hellwig <hch@lst.de>
> cc: Al Viro <viro@zeniv.linux.org.uk>
> cc: David Hildenbrand <david@redhat.com>
> cc: John Hubbard <jhubbard@nvidia.com>
> cc: linux-mm@kvack.org
> cc: linux-block@vger.kernel.org
> cc: linux-fsdevel@vger.kernel.org
> ---
> 
> Notes:
>     ver #14)
>     - Split out filemap_splice_read() into a separate patch.
> 
>  fs/splice.c | 30 +++++++-----------------------
>  1 file changed, 7 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/splice.c b/fs/splice.c
> index 4c6332854b63..a93478338cec 100644
> --- a/fs/splice.c
> +++ b/fs/splice.c
> @@ -391,29 +391,13 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
>  				 struct pipe_inode_info *pipe, size_t len,
>  				 unsigned int flags)
>  {
> -	struct iov_iter to;
> -	struct kiocb kiocb;
> -	int ret;
> -
> -	iov_iter_pipe(&to, ITER_DEST, pipe, len);
> -	init_sync_kiocb(&kiocb, in);
> -	kiocb.ki_pos = *ppos;
> -	ret = call_read_iter(in, &kiocb, &to);
> -	if (ret > 0) {
> -		*ppos = kiocb.ki_pos;
> -		file_accessed(in);
> -	} else if (ret < 0) {
> -		/* free what was emitted */
> -		pipe_discard_from(pipe, to.start_head);
> -		/*
> -		 * callers of ->splice_read() expect -EAGAIN on
> -		 * "can't put anything in there", rather than -EFAULT.
> -		 */
> -		if (ret == -EFAULT)
> -			ret = -EAGAIN;
> -	}
> -
> -	return ret;
> +	if (unlikely(*ppos >= file_inode(in)->i_sb->s_maxbytes))
> +		return 0;
> +	if (unlikely(!len))
> +		return 0;
> +	if (in->f_flags & O_DIRECT)
> +		return direct_splice_read(in, ppos, pipe, len, flags);

Hello David,

I have one question, for dio, pages need to map to userspace
memory, but direct_splice_read() just allocates pages and not
see when the user mapping is setup, can you give one hint?


Thanks, 
Ming
Ming Lei Feb. 17, 2023, 9:18 a.m. UTC | #3
On Fri, Feb 17, 2023 at 4:22 PM Ming Lei <ming.lei@redhat.com> wrote:
>
> On Tue, Feb 14, 2023 at 05:13:21PM +0000, David Howells wrote:
> > Make generic_file_splice_read() use filemap_splice_read() and
> > direct_splice_read() rather than using an ITER_PIPE and call_read_iter().
> >
> > With this, ITER_PIPE is no longer used.
> >
> > Signed-off-by: David Howells <dhowells@redhat.com>
> > cc: Jens Axboe <axboe@kernel.dk>
> > cc: Christoph Hellwig <hch@lst.de>
> > cc: Al Viro <viro@zeniv.linux.org.uk>
> > cc: David Hildenbrand <david@redhat.com>
> > cc: John Hubbard <jhubbard@nvidia.com>
> > cc: linux-mm@kvack.org
> > cc: linux-block@vger.kernel.org
> > cc: linux-fsdevel@vger.kernel.org
> > ---
> >
> > Notes:
> >     ver #14)
> >     - Split out filemap_splice_read() into a separate patch.
> >
> >  fs/splice.c | 30 +++++++-----------------------
> >  1 file changed, 7 insertions(+), 23 deletions(-)
> >
> > diff --git a/fs/splice.c b/fs/splice.c
> > index 4c6332854b63..a93478338cec 100644
> > --- a/fs/splice.c
> > +++ b/fs/splice.c
> > @@ -391,29 +391,13 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
> >                                struct pipe_inode_info *pipe, size_t len,
> >                                unsigned int flags)
> >  {
> > -     struct iov_iter to;
> > -     struct kiocb kiocb;
> > -     int ret;
> > -
> > -     iov_iter_pipe(&to, ITER_DEST, pipe, len);
> > -     init_sync_kiocb(&kiocb, in);
> > -     kiocb.ki_pos = *ppos;
> > -     ret = call_read_iter(in, &kiocb, &to);
> > -     if (ret > 0) {
> > -             *ppos = kiocb.ki_pos;
> > -             file_accessed(in);
> > -     } else if (ret < 0) {
> > -             /* free what was emitted */
> > -             pipe_discard_from(pipe, to.start_head);
> > -             /*
> > -              * callers of ->splice_read() expect -EAGAIN on
> > -              * "can't put anything in there", rather than -EFAULT.
> > -              */
> > -             if (ret == -EFAULT)
> > -                     ret = -EAGAIN;
> > -     }
> > -
> > -     return ret;
> > +     if (unlikely(*ppos >= file_inode(in)->i_sb->s_maxbytes))
> > +             return 0;
> > +     if (unlikely(!len))
> > +             return 0;
> > +     if (in->f_flags & O_DIRECT)
> > +             return direct_splice_read(in, ppos, pipe, len, flags);
>
> Hello David,
>
> I have one question, for dio, pages need to map to userspace
> memory, but direct_splice_read() just allocates pages and not
> see when the user mapping is setup, can you give one hint?

oops, it is ->splice_read,  not ->read_iter, and pipe buffer isn't
user-backed, sorry for the noise.

Thanks,
Ming
Alexander Egorenkov Feb. 17, 2023, 8:39 p.m. UTC | #4
Hello,

this commit breaks our s390x tests on linux-next which use Python 3
among other things.

We are using the Python 3 tox module and for some reason,
the above commit makes Python create files with padded zeroes.

--- a/tox/distro/lib/python3.11/site-packages/_distutils_hack/__init__.py
+++ b/tox/distro/lib/python3.11/site-packages/_distutils_hack/__init__.py
@@ -381,133 +381,4 @@
 000017c0  49 4c 53 5f 46 49 4e 44  45 52 29 0a 20 20 20 20  |ILS_FINDER).    |
 000017d0  65 78 63 65 70 74 20 56  61 6c 75 65 45 72 72 6f  |except ValueErro|
 000017e0  72 3a 0a 20 20 20 20 20  20 20 20 70 61 73 73 0a  |r:.        pass.|
-000017f0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
-00001800  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
-00001810  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
-00001820  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
-00001830  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
-00001840  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
-00001850  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
-00001860  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
... 

How to reproduce on Fedora
--------------------------

# dnf install -y python3-tox

# cat >tox.ini <<EOF
[tox]
envlist = dev,distro,doc,lint
skipsdist = True

[testenv:distro]
sitepackages = true
EOF

# python3 -m tox -v --notest -e distro

Error processing line 1 of /mnt/test/.tox/distro/lib/python3.11/site-packages/distutils-precedence.pth:

  Traceback (most recent call last):
    File "<frozen site>", line 186, in addpackage
    File "<string>", line 1, in <module>
  ValueError: source code string cannot contain null bytes

Remainder of file ignored

# The above error message disappears if one reverts the bad commit.

Regards
Alex
Alexander Egorenkov Feb. 17, 2023, 8:59 p.m. UTC | #5
Forgot to mention another related problem we had recently.

We have seen LTP test suite failing in 'sendfile09' test case on 2023-02-15:

 [ 5157.233192] CPU: 0 PID: 2435571 Comm: sendfile09 Tainted: G           OE K  N 6.2.0-20230214.rc8.git112.3ebb0ac55efa.300.fc37.s390x+next #1
 [ 5157.233197] Hardware name: IBM 8561 T01 701 (z/VM 7.3.0)
 [ 5157.233199] Krnl PSW : 0704d00180000000 0000000000000002 (0x2)
 [ 5157.233207]            R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3
 [ 5157.233210] Krnl GPRS: 0000000000000008 0000000000000000 00000000893d1800 00000372088bd040
 [ 5157.233213]            00000372088bd040 000000018c836000 0000000000000000 0000038000a4bb80
 [ 5157.233215]            00000372088bd040 0000000000000000 00000000893d1800 00000372088bd040
 [ 5157.233218]            0000000081182100 00000000893d1800 000000001f691596 0000038000a4b9b0
 [ 5157.233222] Krnl Code:#0000000000000000: 0000       illegal 
              >0000000000000002: 0000        illegal 
               0000000000000004: 0000       illegal 
               0000000000000006: 0000       illegal 
               0000000000000008: 0000       illegal 
               000000000000000a: 0000       illegal 
               000000000000000c: 0000       illegal 
               000000000000000e: 0000       illegal 
 [ 5157.233281] Call Trace:
 [ 5157.233284]  [<0000000000000002>] 0x2 
 [ 5157.233288]  [<000000001f694e26>] filemap_get_pages+0x276/0x3b0 
 [ 5157.233296]  [<000000001f7c7256>] generic_file_buffered_splice_read.constprop.0+0xd6/0x370 
 [ 5157.233301]  [<000000001f7c6aa0>] do_splice_to+0x98/0xc8 
 [ 5157.233304]  [<000000001f7c6eee>] splice_direct_to_actor+0xb6/0x270 
 [ 5157.233306]  [<000000001f7c714a>] do_splice_direct+0xa2/0xd8 
 [ 5157.233309]  [<000000001f77c79c>] do_sendfile+0x39c/0x4e8 
 [ 5157.233314]  [<000000001f77ca00>] __do_sys_sendfile64+0x68/0xc0 
 [ 5157.233317]  [<00000000200b4bb4>] __do_syscall+0x1d4/0x200 
 [ 5157.233322]  [<00000000200c42f2>] system_call+0x82/0xb0 
 [ 5157.233328] Last Breaking-Event-Address:
 [ 5157.233329]  [<000000001f691594>] filemap_read_folio+0x3c/0xd0
 [ 5157.233338] Kernel panic - not syncing: Fatal exception: panic_on_oops

But it was apparently fixed on the next day.

Regards
Alex
David Howells Feb. 17, 2023, 9:16 p.m. UTC | #6
Alexander Egorenkov <egorenar@linux.ibm.com> wrote:

> Subject: Re: [PATCH v14 08/17] splice: Do splice read from a file without
>  using ITER_PIPE

Well, I can reproduce it.  Putting a printks in generic_file_splice_read(),
direct_splice_read() and filemap_splice_read(), however, seems to show that it
isn't using any of those functions; nor can I see any sign of a splice syscall
in a strace:-/

David
David Howells Feb. 17, 2023, 9:24 p.m. UTC | #7
egorenar@linux.ibm.com wrote:

>  [ 5157.233284]  [<0000000000000002>] 0x2 
>  [ 5157.233288]  [<000000001f694e26>] filemap_get_pages+0x276/0x3b0 

Yeah.  I think this was fixed by the provision of a shmem-specific splice read
(patch 04/17 in this series).

David
David Howells Feb. 17, 2023, 9:47 p.m. UTC | #8
Does the attached fix the problem for you?  The data being written into the
pipe needs to be limited to the size of the file.

David

diff --git a/mm/filemap.c b/mm/filemap.c
index c01bbcb9fa92..6362ac697a70 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2948,7 +2948,8 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
 			if (writably_mapped)
 				flush_dcache_folio(folio);
 
-			n = splice_folio_into_pipe(pipe, folio, *ppos, len);
+			n = min_t(loff_t, len, isize - *ppos);
+			n = splice_folio_into_pipe(pipe, folio, *ppos, n);
 			if (!n)
 				goto out;
 			len -= n;
Alexander Egorenkov Feb. 18, 2023, 8:29 a.m. UTC | #9
Hi David,


David Howells <dhowells@redhat.com> writes:

> Does the attached fix the problem for you?  The data being written into the
> pipe needs to be limited to the size of the file.
>
> David
>
> diff --git a/mm/filemap.c b/mm/filemap.c
> index c01bbcb9fa92..6362ac697a70 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2948,7 +2948,8 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
>  			if (writably_mapped)
>  				flush_dcache_folio(folio);
>  
> -			n = splice_folio_into_pipe(pipe, folio, *ppos, len);
> +			n = min_t(loff_t, len, isize - *ppos);
> +			n = splice_folio_into_pipe(pipe, folio, *ppos, n);
>  			if (!n)
>  				goto out;
>  			len -= n;

Yes, this change fixed the problem.

Thanks
Regards
Alex
David Howells Feb. 18, 2023, 9:18 a.m. UTC | #10
Alexander Egorenkov <egorenar@linux.ibm.com> wrote:

> > +			n = min_t(loff_t, len, isize - *ppos);
> > +			n = splice_folio_into_pipe(pipe, folio, *ppos, n);
> >  			if (!n)
> >  				goto out;
> >  			len -= n;
> 
> Yes, this change fixed the problem.

Thanks!

David
diff mbox series

Patch

diff --git a/fs/splice.c b/fs/splice.c
index 4c6332854b63..a93478338cec 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -391,29 +391,13 @@  ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 				 struct pipe_inode_info *pipe, size_t len,
 				 unsigned int flags)
 {
-	struct iov_iter to;
-	struct kiocb kiocb;
-	int ret;
-
-	iov_iter_pipe(&to, ITER_DEST, pipe, len);
-	init_sync_kiocb(&kiocb, in);
-	kiocb.ki_pos = *ppos;
-	ret = call_read_iter(in, &kiocb, &to);
-	if (ret > 0) {
-		*ppos = kiocb.ki_pos;
-		file_accessed(in);
-	} else if (ret < 0) {
-		/* free what was emitted */
-		pipe_discard_from(pipe, to.start_head);
-		/*
-		 * callers of ->splice_read() expect -EAGAIN on
-		 * "can't put anything in there", rather than -EFAULT.
-		 */
-		if (ret == -EFAULT)
-			ret = -EAGAIN;
-	}
-
-	return ret;
+	if (unlikely(*ppos >= file_inode(in)->i_sb->s_maxbytes))
+		return 0;
+	if (unlikely(!len))
+		return 0;
+	if (in->f_flags & O_DIRECT)
+		return direct_splice_read(in, ppos, pipe, len, flags);
+	return filemap_splice_read(in, ppos, pipe, len, flags);
 }
 EXPORT_SYMBOL(generic_file_splice_read);