diff mbox series

[v4,3/4] iov_iter: add copy_page_to_iter_atomic()

Message ID 31482908634cbb68adafedb65f0b21888c194a1b.1679431886.git.lstoakes@gmail.com (mailing list archive)
State New
Headers show
Series convert read_kcore(), vread() to use iterators | expand

Commit Message

Lorenzo Stoakes March 21, 2023, 8:54 p.m. UTC
Provide an atomic context equivalent for copy_page_to_iter(). This eschews
the might_fault() check copies memory in the same way that
copy_page_from_iter_atomic() does.

This functions assumes a non-compound page, however this mimics the
existing behaviour of copy_page_from_iter_atomic(). I am keeping the
behaviour consistent between the two, deferring any such change to an
explicit folio-fication effort.

This is being added in order that an iteratable form of vread() can be
implemented with known prefaulted pages to avoid the need for mutex
locking.

Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
---
 include/linux/uio.h |  2 ++
 lib/iov_iter.c      | 28 ++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

Comments

Baoquan He March 22, 2023, 10:17 a.m. UTC | #1
On 03/21/23 at 08:54pm, Lorenzo Stoakes wrote:
> Provide an atomic context equivalent for copy_page_to_iter(). This eschews
> the might_fault() check copies memory in the same way that
> copy_page_from_iter_atomic() does.
> 
> This functions assumes a non-compound page, however this mimics the
> existing behaviour of copy_page_from_iter_atomic(). I am keeping the
> behaviour consistent between the two, deferring any such change to an
> explicit folio-fication effort.
> 
> This is being added in order that an iteratable form of vread() can be
> implemented with known prefaulted pages to avoid the need for mutex
> locking.
> 
> Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
> ---
>  include/linux/uio.h |  2 ++
>  lib/iov_iter.c      | 28 ++++++++++++++++++++++++++++
>  2 files changed, 30 insertions(+)
> 
> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index 27e3fd942960..fab07103090f 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -154,6 +154,8 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
>  
>  size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
>  				  size_t bytes, struct iov_iter *i);
> +size_t copy_page_to_iter_atomic(struct page *page, unsigned offset,
> +				size_t bytes, struct iov_iter *i);
>  void iov_iter_advance(struct iov_iter *i, size_t bytes);
>  void iov_iter_revert(struct iov_iter *i, size_t bytes);
>  size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 274014e4eafe..48ca1c5dfc04 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -821,6 +821,34 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt
>  }
>  EXPORT_SYMBOL(copy_page_from_iter_atomic);
>  
> +size_t copy_page_to_iter_atomic(struct page *page, unsigned offset, size_t bytes,
> +				struct iov_iter *i)
> +{
> +	char *kaddr = kmap_local_page(page);

I am a little confused about the name of this new function. In its
conterpart, copy_page_from_iter_atomic(), kmap_atomic()/kunmpa_atomic()
are used. With them, if CONFIG_HIGHMEM=n, it's like below:

static inline void *kmap_atomic(struct page *page)
{
        if (IS_ENABLED(CONFIG_PREEMPT_RT))
                migrate_disable();
        else
                preempt_disable();
        pagefault_disable();
        return page_address(page);
}

But kmap_local_page() is only having page_address(), the code block
between kmap_local_page() and kunmap_local() is also atomic, it's a
little messy in my mind.

static inline void *kmap_local_page(struct page *page)
{
        return page_address(page);
}

> +	char *p = kaddr + offset;
> +	size_t copied = 0;
> +
> +	if (!page_copy_sane(page, offset, bytes) ||
> +	    WARN_ON_ONCE(i->data_source))
> +		goto out;
> +
> +	if (unlikely(iov_iter_is_pipe(i))) {
> +		copied = copy_page_to_iter_pipe(page, offset, bytes, i);
> +		goto out;
> +	}
> +
> +	iterate_and_advance(i, bytes, base, len, off,
> +		copyout(base, p + off, len),
> +		memcpy(base, p + off, len)
> +	)
> +	copied = bytes;
> +
> +out:
> +	kunmap_local(kaddr);
> +	return copied;
> +}
> +EXPORT_SYMBOL(copy_page_to_iter_atomic);
> +
>  static void pipe_advance(struct iov_iter *i, size_t size)
>  {
>  	struct pipe_inode_info *pipe = i->pipe;
> -- 
> 2.39.2
>
Lorenzo Stoakes March 22, 2023, 10:32 a.m. UTC | #2
On Wed, Mar 22, 2023 at 06:17:25PM +0800, Baoquan He wrote:
> On 03/21/23 at 08:54pm, Lorenzo Stoakes wrote:
> > Provide an atomic context equivalent for copy_page_to_iter(). This eschews
> > the might_fault() check copies memory in the same way that
> > copy_page_from_iter_atomic() does.
> >
> > This functions assumes a non-compound page, however this mimics the
> > existing behaviour of copy_page_from_iter_atomic(). I am keeping the
> > behaviour consistent between the two, deferring any such change to an
> > explicit folio-fication effort.
> >
> > This is being added in order that an iteratable form of vread() can be
> > implemented with known prefaulted pages to avoid the need for mutex
> > locking.
> >
> > Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
> > ---
> >  include/linux/uio.h |  2 ++
> >  lib/iov_iter.c      | 28 ++++++++++++++++++++++++++++
> >  2 files changed, 30 insertions(+)
> >
> > diff --git a/include/linux/uio.h b/include/linux/uio.h
> > index 27e3fd942960..fab07103090f 100644
> > --- a/include/linux/uio.h
> > +++ b/include/linux/uio.h
> > @@ -154,6 +154,8 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
> >
> >  size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
> >  				  size_t bytes, struct iov_iter *i);
> > +size_t copy_page_to_iter_atomic(struct page *page, unsigned offset,
> > +				size_t bytes, struct iov_iter *i);
> >  void iov_iter_advance(struct iov_iter *i, size_t bytes);
> >  void iov_iter_revert(struct iov_iter *i, size_t bytes);
> >  size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
> > diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> > index 274014e4eafe..48ca1c5dfc04 100644
> > --- a/lib/iov_iter.c
> > +++ b/lib/iov_iter.c
> > @@ -821,6 +821,34 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt
> >  }
> >  EXPORT_SYMBOL(copy_page_from_iter_atomic);
> >
> > +size_t copy_page_to_iter_atomic(struct page *page, unsigned offset, size_t bytes,
> > +				struct iov_iter *i)
> > +{
> > +	char *kaddr = kmap_local_page(page);
>
> I am a little confused about the name of this new function. In its
> conterpart, copy_page_from_iter_atomic(), kmap_atomic()/kunmpa_atomic()
> are used. With them, if CONFIG_HIGHMEM=n, it's like below:

The reason for this is that:-

1. kmap_atomic() explicitly states that it is now deprecated and must no longer
   be used, and kmap_local_page() should be used instead:-

 * kmap_atomic - Atomically map a page for temporary usage - Deprecated!

 * Do not use in new code. Use kmap_local_page() instead.

2. kmap_local_page() explicitly states that it can be used in any context:-

 * Can be invoked from any context, including interrupts.

I wanted follow this advice as strictly as I could, hence the change. However,
we do need preemption/pagefaults explicitly disabled in this context (we are
happy to fail if the faulted in pages are unmapped in meantime), and I didn't
check the internals to make sure.

So I think for safety it is better to use k[un]map_atomic() here, I'll respin
and put that back in, good catch!

>
> static inline void *kmap_atomic(struct page *page)
> {
>         if (IS_ENABLED(CONFIG_PREEMPT_RT))
>                 migrate_disable();
>         else
>                 preempt_disable();
>         pagefault_disable();
>         return page_address(page);
> }
>
> But kmap_local_page() is only having page_address(), the code block
> between kmap_local_page() and kunmap_local() is also atomic, it's a
> little messy in my mind.
>
> static inline void *kmap_local_page(struct page *page)
> {
>         return page_address(page);
> }
>
> > +	char *p = kaddr + offset;
> > +	size_t copied = 0;
> > +
> > +	if (!page_copy_sane(page, offset, bytes) ||
> > +	    WARN_ON_ONCE(i->data_source))
> > +		goto out;
> > +
> > +	if (unlikely(iov_iter_is_pipe(i))) {
> > +		copied = copy_page_to_iter_pipe(page, offset, bytes, i);
> > +		goto out;
> > +	}
> > +
> > +	iterate_and_advance(i, bytes, base, len, off,
> > +		copyout(base, p + off, len),
> > +		memcpy(base, p + off, len)
> > +	)
> > +	copied = bytes;
> > +
> > +out:
> > +	kunmap_local(kaddr);
> > +	return copied;
> > +}
> > +EXPORT_SYMBOL(copy_page_to_iter_atomic);
> > +
> >  static void pipe_advance(struct iov_iter *i, size_t size)
> >  {
> >  	struct pipe_inode_info *pipe = i->pipe;
> > --
> > 2.39.2
> >
>
Lorenzo Stoakes March 22, 2023, 11:06 a.m. UTC | #3
On Wed, Mar 22, 2023 at 10:32:47AM +0000, Lorenzo Stoakes wrote:
> > I am a little confused about the name of this new function. In its
> > conterpart, copy_page_from_iter_atomic(), kmap_atomic()/kunmpa_atomic()
> > are used. With them, if CONFIG_HIGHMEM=n, it's like below:
>
> The reason for this is that:-
>
> 1. kmap_atomic() explicitly states that it is now deprecated and must no longer
>    be used, and kmap_local_page() should be used instead:-
>
>  * kmap_atomic - Atomically map a page for temporary usage - Deprecated!
>
>  * Do not use in new code. Use kmap_local_page() instead.
>
> 2. kmap_local_page() explicitly states that it can be used in any context:-
>
>  * Can be invoked from any context, including interrupts.
>
> I wanted follow this advice as strictly as I could, hence the change. However,
> we do need preemption/pagefaults explicitly disabled in this context (we are
> happy to fail if the faulted in pages are unmapped in meantime), and I didn't
> check the internals to make sure.
>
> So I think for safety it is better to use k[un]map_atomic() here, I'll respin
> and put that back in, good catch!
>

Actually, given we have preemption disabled due to the held spinlock, I think
it'd be better to add a copy_page_to_iter_nofault() that uses
copy_to_user_nofault() which will disable pagefaults thus have exactly the
equivalent behaviour, more explicitly and without the use of a deprecated
function.

Thanks for raising this!!

> >
> > static inline void *kmap_atomic(struct page *page)
> > {
> >         if (IS_ENABLED(CONFIG_PREEMPT_RT))
> >                 migrate_disable();
> >         else
> >                 preempt_disable();
> >         pagefault_disable();
> >         return page_address(page);
> > }
> >
> > But kmap_local_page() is only having page_address(), the code block
> > between kmap_local_page() and kunmap_local() is also atomic, it's a
> > little messy in my mind.
> >
> > static inline void *kmap_local_page(struct page *page)
> > {
> >         return page_address(page);
> > }
> >
> > > +	char *p = kaddr + offset;
> > > +	size_t copied = 0;
> > > +
> > > +	if (!page_copy_sane(page, offset, bytes) ||
> > > +	    WARN_ON_ONCE(i->data_source))
> > > +		goto out;
> > > +
> > > +	if (unlikely(iov_iter_is_pipe(i))) {
> > > +		copied = copy_page_to_iter_pipe(page, offset, bytes, i);
> > > +		goto out;
> > > +	}
> > > +
> > > +	iterate_and_advance(i, bytes, base, len, off,
> > > +		copyout(base, p + off, len),
> > > +		memcpy(base, p + off, len)
> > > +	)
> > > +	copied = bytes;
> > > +
> > > +out:
> > > +	kunmap_local(kaddr);
> > > +	return copied;
> > > +}
> > > +EXPORT_SYMBOL(copy_page_to_iter_atomic);
> > > +
> > >  static void pipe_advance(struct iov_iter *i, size_t size)
> > >  {
> > >  	struct pipe_inode_info *pipe = i->pipe;
> > > --
> > > 2.39.2
> > >
> >
Baoquan He March 22, 2023, 1:08 p.m. UTC | #4
On 03/22/23 at 10:32am, Lorenzo Stoakes wrote:
> On Wed, Mar 22, 2023 at 06:17:25PM +0800, Baoquan He wrote:
> > On 03/21/23 at 08:54pm, Lorenzo Stoakes wrote:
> > > Provide an atomic context equivalent for copy_page_to_iter(). This eschews
> > > the might_fault() check copies memory in the same way that
> > > copy_page_from_iter_atomic() does.
> > >
> > > This functions assumes a non-compound page, however this mimics the
> > > existing behaviour of copy_page_from_iter_atomic(). I am keeping the
> > > behaviour consistent between the two, deferring any such change to an
> > > explicit folio-fication effort.
> > >
> > > This is being added in order that an iteratable form of vread() can be
> > > implemented with known prefaulted pages to avoid the need for mutex
> > > locking.
> > >
> > > Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
> > > ---
> > >  include/linux/uio.h |  2 ++
> > >  lib/iov_iter.c      | 28 ++++++++++++++++++++++++++++
> > >  2 files changed, 30 insertions(+)
> > >
> > > diff --git a/include/linux/uio.h b/include/linux/uio.h
> > > index 27e3fd942960..fab07103090f 100644
> > > --- a/include/linux/uio.h
> > > +++ b/include/linux/uio.h
> > > @@ -154,6 +154,8 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
> > >
> > >  size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
> > >  				  size_t bytes, struct iov_iter *i);
> > > +size_t copy_page_to_iter_atomic(struct page *page, unsigned offset,
> > > +				size_t bytes, struct iov_iter *i);
> > >  void iov_iter_advance(struct iov_iter *i, size_t bytes);
> > >  void iov_iter_revert(struct iov_iter *i, size_t bytes);
> > >  size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
> > > diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> > > index 274014e4eafe..48ca1c5dfc04 100644
> > > --- a/lib/iov_iter.c
> > > +++ b/lib/iov_iter.c
> > > @@ -821,6 +821,34 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt
> > >  }
> > >  EXPORT_SYMBOL(copy_page_from_iter_atomic);
> > >
> > > +size_t copy_page_to_iter_atomic(struct page *page, unsigned offset, size_t bytes,
> > > +				struct iov_iter *i)
> > > +{
> > > +	char *kaddr = kmap_local_page(page);
> >
> > I am a little confused about the name of this new function. In its
> > conterpart, copy_page_from_iter_atomic(), kmap_atomic()/kunmpa_atomic()
> > are used. With them, if CONFIG_HIGHMEM=n, it's like below:
> 
> The reason for this is that:-
> 
> 1. kmap_atomic() explicitly states that it is now deprecated and must no longer
>    be used, and kmap_local_page() should be used instead:-
> 
>  * kmap_atomic - Atomically map a page for temporary usage - Deprecated!
> 
>  * Do not use in new code. Use kmap_local_page() instead.
> 
> 2. kmap_local_page() explicitly states that it can be used in any context:-
> 
>  * Can be invoked from any context, including interrupts.

Yeah, I saw that stated in document too. With my understanding, it's the
page mapping itself will be guaranteed and can be used in any context
when kmap_local_page() is taken. However, here kmap_local_page() is used
to make the code block atomic, it could be not achieved.

> 
> I wanted follow this advice as strictly as I could, hence the change. However,
> we do need preemption/pagefaults explicitly disabled in this context (we are
> happy to fail if the faulted in pages are unmapped in meantime), and I didn't
> check the internals to make sure.
> 
> So I think for safety it is better to use k[un]map_atomic() here, I'll respin
> and put that back in, good catch!
> 
> >
> > static inline void *kmap_atomic(struct page *page)
> > {
> >         if (IS_ENABLED(CONFIG_PREEMPT_RT))
> >                 migrate_disable();
> >         else
> >                 preempt_disable();
> >         pagefault_disable();
> >         return page_address(page);
> > }
> >
> > But kmap_local_page() is only having page_address(), the code block
> > between kmap_local_page() and kunmap_local() is also atomic, it's a
> > little messy in my mind.
> >
> > static inline void *kmap_local_page(struct page *page)
> > {
> >         return page_address(page);
> > }
> >
> > > +	char *p = kaddr + offset;
> > > +	size_t copied = 0;
> > > +
> > > +	if (!page_copy_sane(page, offset, bytes) ||
> > > +	    WARN_ON_ONCE(i->data_source))
> > > +		goto out;
> > > +
> > > +	if (unlikely(iov_iter_is_pipe(i))) {
> > > +		copied = copy_page_to_iter_pipe(page, offset, bytes, i);
> > > +		goto out;
> > > +	}
> > > +
> > > +	iterate_and_advance(i, bytes, base, len, off,
> > > +		copyout(base, p + off, len),
> > > +		memcpy(base, p + off, len)
> > > +	)
> > > +	copied = bytes;
> > > +
> > > +out:
> > > +	kunmap_local(kaddr);
> > > +	return copied;
> > > +}
> > > +EXPORT_SYMBOL(copy_page_to_iter_atomic);
> > > +
> > >  static void pipe_advance(struct iov_iter *i, size_t size)
> > >  {
> > >  	struct pipe_inode_info *pipe = i->pipe;
> > > --
> > > 2.39.2
> > >
> >
>
Baoquan He March 22, 2023, 1:21 p.m. UTC | #5
On 03/22/23 at 11:06am, Lorenzo Stoakes wrote:
> On Wed, Mar 22, 2023 at 10:32:47AM +0000, Lorenzo Stoakes wrote:
> > > I am a little confused about the name of this new function. In its
> > > conterpart, copy_page_from_iter_atomic(), kmap_atomic()/kunmpa_atomic()
> > > are used. With them, if CONFIG_HIGHMEM=n, it's like below:
> >
> > The reason for this is that:-
> >
> > 1. kmap_atomic() explicitly states that it is now deprecated and must no longer
> >    be used, and kmap_local_page() should be used instead:-
> >
> >  * kmap_atomic - Atomically map a page for temporary usage - Deprecated!
> >
> >  * Do not use in new code. Use kmap_local_page() instead.
> >
> > 2. kmap_local_page() explicitly states that it can be used in any context:-
> >
> >  * Can be invoked from any context, including interrupts.
> >
> > I wanted follow this advice as strictly as I could, hence the change. However,
> > we do need preemption/pagefaults explicitly disabled in this context (we are
> > happy to fail if the faulted in pages are unmapped in meantime), and I didn't
> > check the internals to make sure.
> >
> > So I think for safety it is better to use k[un]map_atomic() here, I'll respin
> > and put that back in, good catch!
> >
> 
> Actually, given we have preemption disabled due to the held spinlock, I think
> it'd be better to add a copy_page_to_iter_nofault() that uses
> copy_to_user_nofault() which will disable pagefaults thus have exactly the
> equivalent behaviour, more explicitly and without the use of a deprecated
> function.

Sounds a great idea, that let us be able to avoid using kmap_atomic.

> 
> > >
> > > static inline void *kmap_atomic(struct page *page)
> > > {
> > >         if (IS_ENABLED(CONFIG_PREEMPT_RT))
> > >                 migrate_disable();
> > >         else
> > >                 preempt_disable();
> > >         pagefault_disable();
> > >         return page_address(page);
> > > }
> > >
> > > But kmap_local_page() is only having page_address(), the code block
> > > between kmap_local_page() and kunmap_local() is also atomic, it's a
> > > little messy in my mind.
> > >
> > > static inline void *kmap_local_page(struct page *page)
> > > {
> > >         return page_address(page);
> > > }
> > >
> > > > +	char *p = kaddr + offset;
> > > > +	size_t copied = 0;
> > > > +
> > > > +	if (!page_copy_sane(page, offset, bytes) ||
> > > > +	    WARN_ON_ONCE(i->data_source))
> > > > +		goto out;
> > > > +
> > > > +	if (unlikely(iov_iter_is_pipe(i))) {
> > > > +		copied = copy_page_to_iter_pipe(page, offset, bytes, i);
> > > > +		goto out;
> > > > +	}
> > > > +
> > > > +	iterate_and_advance(i, bytes, base, len, off,
> > > > +		copyout(base, p + off, len),
> > > > +		memcpy(base, p + off, len)
> > > > +	)
> > > > +	copied = bytes;
> > > > +
> > > > +out:
> > > > +	kunmap_local(kaddr);
> > > > +	return copied;
> > > > +}
> > > > +EXPORT_SYMBOL(copy_page_to_iter_atomic);
> > > > +
> > > >  static void pipe_advance(struct iov_iter *i, size_t size)
> > > >  {
> > > >  	struct pipe_inode_info *pipe = i->pipe;
> > > > --
> > > > 2.39.2
> > > >
> > >
>
diff mbox series

Patch

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 27e3fd942960..fab07103090f 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -154,6 +154,8 @@  static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
 
 size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
 				  size_t bytes, struct iov_iter *i);
+size_t copy_page_to_iter_atomic(struct page *page, unsigned offset,
+				size_t bytes, struct iov_iter *i);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 void iov_iter_revert(struct iov_iter *i, size_t bytes);
 size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 274014e4eafe..48ca1c5dfc04 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -821,6 +821,34 @@  size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt
 }
 EXPORT_SYMBOL(copy_page_from_iter_atomic);
 
+size_t copy_page_to_iter_atomic(struct page *page, unsigned offset, size_t bytes,
+				struct iov_iter *i)
+{
+	char *kaddr = kmap_local_page(page);
+	char *p = kaddr + offset;
+	size_t copied = 0;
+
+	if (!page_copy_sane(page, offset, bytes) ||
+	    WARN_ON_ONCE(i->data_source))
+		goto out;
+
+	if (unlikely(iov_iter_is_pipe(i))) {
+		copied = copy_page_to_iter_pipe(page, offset, bytes, i);
+		goto out;
+	}
+
+	iterate_and_advance(i, bytes, base, len, off,
+		copyout(base, p + off, len),
+		memcpy(base, p + off, len)
+	)
+	copied = bytes;
+
+out:
+	kunmap_local(kaddr);
+	return copied;
+}
+EXPORT_SYMBOL(copy_page_to_iter_atomic);
+
 static void pipe_advance(struct iov_iter *i, size_t size)
 {
 	struct pipe_inode_info *pipe = i->pipe;