diff mbox series

[2/4] tee: Remove vmalloc page support

Message ID 20221002002326.946620-3-ira.weiny@intel.com (mailing list archive)
State New
Headers show
Series Remove get_kernel_pages() | expand

Commit Message

Ira Weiny Oct. 2, 2022, 12:23 a.m. UTC
From: Ira Weiny <ira.weiny@intel.com>

The kernel pages used by shm_get_kernel_pages() are allocated using
GFP_KERNEL through the following call stack:

trusted_instantiate()
	trusted_payload_alloc() -> GFP_KERNEL
	<trusted key op>
		tee_shm_register_kernel_buf()
			register_shm_helper()
				shm_get_kernel_pages()

Where <trusted key op> is one of:

	trusted_key_unseal()
	trusted_key_get_random()
	trusted_key_seal()

Remove the vmalloc page support from shm_get_kernel_pages().  Replace
with a warn on once.

Cc: Jens Wiklander <jens.wiklander@linaro.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
Jens I went with the suggestion from Linus and Christoph and rejected
vmalloc addresses.  I did not hear back from you regarding Linus'
question if the vmalloc page support was required by an up coming patch
set or not.  So I assumed it was something out of tree.
---
 drivers/tee/tee_shm.c | 36 ++++++++++++------------------------
 1 file changed, 12 insertions(+), 24 deletions(-)

Comments

Jens Wiklander Oct. 3, 2022, 6:41 a.m. UTC | #1
Hi Ira,

On Sun, Oct 2, 2022 at 2:23 AM <ira.weiny@intel.com> wrote:
>
> From: Ira Weiny <ira.weiny@intel.com>
>
> The kernel pages used by shm_get_kernel_pages() are allocated using
> GFP_KERNEL through the following call stack:
>
> trusted_instantiate()
>         trusted_payload_alloc() -> GFP_KERNEL
>         <trusted key op>
>                 tee_shm_register_kernel_buf()
>                         register_shm_helper()
>                                 shm_get_kernel_pages()
>
> Where <trusted key op> is one of:
>
>         trusted_key_unseal()
>         trusted_key_get_random()
>         trusted_key_seal()
>
> Remove the vmalloc page support from shm_get_kernel_pages().  Replace
> with a warn on once.
>
> Cc: Jens Wiklander <jens.wiklander@linaro.org>
> Cc: Al Viro <viro@zeniv.linux.org.uk>
> Cc: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
>
> ---
> Jens I went with the suggestion from Linus and Christoph and rejected
> vmalloc addresses.  I did not hear back from you regarding Linus'
> question if the vmalloc page support was required by an up coming patch
> set or not.  So I assumed it was something out of tree.

Yes, that's correctly assumed, sorry for not confirming that earlier.

Reviewed-by: Jens Wiklander <jens.wiklander@linaro.org>

Thanks,
Jens

> ---
>  drivers/tee/tee_shm.c | 36 ++++++++++++------------------------
>  1 file changed, 12 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
> index 27295bda3e0b..527a6eabc03e 100644
> --- a/drivers/tee/tee_shm.c
> +++ b/drivers/tee/tee_shm.c
> @@ -24,37 +24,25 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count)
>  static int shm_get_kernel_pages(unsigned long start, size_t page_count,
>                                 struct page **pages)
>  {
> +       struct kvec *kiov;
>         size_t n;
>         int rc;
>
> -       if (is_vmalloc_addr((void *)start)) {
> -               struct page *page;
> -
> -               for (n = 0; n < page_count; n++) {
> -                       page = vmalloc_to_page((void *)(start + PAGE_SIZE * n));
> -                       if (!page)
> -                               return -ENOMEM;
> -
> -                       get_page(page);
> -                       pages[n] = page;
> -               }
> -               rc = page_count;
> -       } else {
> -               struct kvec *kiov;
> -
> -               kiov = kcalloc(page_count, sizeof(*kiov), GFP_KERNEL);
> -               if (!kiov)
> -                       return -ENOMEM;
> +       if (WARN_ON_ONCE(is_vmalloc_addr((void *)start)))
> +               return -EINVAL;
>
> -               for (n = 0; n < page_count; n++) {
> -                       kiov[n].iov_base = (void *)(start + n * PAGE_SIZE);
> -                       kiov[n].iov_len = PAGE_SIZE;
> -               }
> +       kiov = kcalloc(page_count, sizeof(*kiov), GFP_KERNEL);
> +       if (!kiov)
> +               return -ENOMEM;
>
> -               rc = get_kernel_pages(kiov, page_count, 0, pages);
> -               kfree(kiov);
> +       for (n = 0; n < page_count; n++) {
> +               kiov[n].iov_base = (void *)(start + n * PAGE_SIZE);
> +               kiov[n].iov_len = PAGE_SIZE;
>         }
>
> +       rc = get_kernel_pages(kiov, page_count, 0, pages);
> +       kfree(kiov);
> +
>         return rc;
>  }
>
> --
> 2.37.2
>
Sumit Garg Oct. 3, 2022, 6:57 a.m. UTC | #2
+ Phil

Hi Ira,

On Sun, 2 Oct 2022 at 05:53, <ira.weiny@intel.com> wrote:
>
> From: Ira Weiny <ira.weiny@intel.com>
>
> The kernel pages used by shm_get_kernel_pages() are allocated using
> GFP_KERNEL through the following call stack:
>
> trusted_instantiate()
>         trusted_payload_alloc() -> GFP_KERNEL
>         <trusted key op>
>                 tee_shm_register_kernel_buf()
>                         register_shm_helper()
>                                 shm_get_kernel_pages()
>
> Where <trusted key op> is one of:
>
>         trusted_key_unseal()
>         trusted_key_get_random()
>         trusted_key_seal()
>
> Remove the vmalloc page support from shm_get_kernel_pages().  Replace
> with a warn on once.
>
> Cc: Jens Wiklander <jens.wiklander@linaro.org>
> Cc: Al Viro <viro@zeniv.linux.org.uk>
> Cc: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
>
> ---
> Jens I went with the suggestion from Linus and Christoph and rejected
> vmalloc addresses.  I did not hear back from you regarding Linus'
> question if the vmalloc page support was required by an up coming patch
> set or not.  So I assumed it was something out of tree.

It looks like I wasn't CC'd to that conversation. IIRC, support for
vmalloc addresses was added recently by Phil here [1]. So I would like
to give him a chance if he is planning to post a corresponding kernel
driver upstream.

[1] https://lists.trustedfirmware.org/archives/list/op-tee@lists.trustedfirmware.org/thread/M7HI3P2M66V27SK35CGQRICZ7DJZ5J2W/

-Sumit

> ---
>  drivers/tee/tee_shm.c | 36 ++++++++++++------------------------
>  1 file changed, 12 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
> index 27295bda3e0b..527a6eabc03e 100644
> --- a/drivers/tee/tee_shm.c
> +++ b/drivers/tee/tee_shm.c
> @@ -24,37 +24,25 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count)
>  static int shm_get_kernel_pages(unsigned long start, size_t page_count,
>                                 struct page **pages)
>  {
> +       struct kvec *kiov;
>         size_t n;
>         int rc;
>
> -       if (is_vmalloc_addr((void *)start)) {
> -               struct page *page;
> -
> -               for (n = 0; n < page_count; n++) {
> -                       page = vmalloc_to_page((void *)(start + PAGE_SIZE * n));
> -                       if (!page)
> -                               return -ENOMEM;
> -
> -                       get_page(page);
> -                       pages[n] = page;
> -               }
> -               rc = page_count;
> -       } else {
> -               struct kvec *kiov;
> -
> -               kiov = kcalloc(page_count, sizeof(*kiov), GFP_KERNEL);
> -               if (!kiov)
> -                       return -ENOMEM;
> +       if (WARN_ON_ONCE(is_vmalloc_addr((void *)start)))
> +               return -EINVAL;
>
> -               for (n = 0; n < page_count; n++) {
> -                       kiov[n].iov_base = (void *)(start + n * PAGE_SIZE);
> -                       kiov[n].iov_len = PAGE_SIZE;
> -               }
> +       kiov = kcalloc(page_count, sizeof(*kiov), GFP_KERNEL);
> +       if (!kiov)
> +               return -ENOMEM;
>
> -               rc = get_kernel_pages(kiov, page_count, 0, pages);
> -               kfree(kiov);
> +       for (n = 0; n < page_count; n++) {
> +               kiov[n].iov_base = (void *)(start + n * PAGE_SIZE);
> +               kiov[n].iov_len = PAGE_SIZE;
>         }
>
> +       rc = get_kernel_pages(kiov, page_count, 0, pages);
> +       kfree(kiov);
> +
>         return rc;
>  }
>
> --
> 2.37.2
>
Phil Chang (張世勳) Oct. 5, 2022, 3:28 a.m. UTC | #3
Hi Sumit

Thanks for mentioning that, in fact, our product is low memory devices, and continuous pages are extremely valuable.
Although our driver is not upstream yet but highly dependent on tee shm vmalloc support,
some scenarios are driver alloc high order pages but system memory is fragmentation so that alloc failed.
In this situation, vmalloc support is important and gives flexible usage to user.


-----Original Message-----
From: Sumit Garg <sumit.garg@linaro.org> 
Sent: Monday, October 3, 2022 2:57 PM
To: ira.weiny@intel.com
Cc: Jens Wiklander <jens.wiklander@linaro.org>; Andrew Morton <akpm@linux-foundation.org>; Al Viro <viro@zeniv.linux.org.uk>; Fabio M. De Francesco <fmdefrancesco@gmail.com>; Christoph Hellwig <hch@lst.de>; Linus Torvalds <torvalds@linux-foundation.org>; op-tee@lists.trustedfirmware.org; linux-kernel@vger.kernel.org; linux-mm@kvack.org; Phil Chang (張世勳) <Phil.Chang@mediatek.com>
Subject: Re: [PATCH 2/4] tee: Remove vmalloc page support

+ Phil

Hi Ira,

On Sun, 2 Oct 2022 at 05:53, <ira.weiny@intel.com> wrote:
>
> From: Ira Weiny <ira.weiny@intel.com>
>
> The kernel pages used by shm_get_kernel_pages() are allocated using 
> GFP_KERNEL through the following call stack:
>
> trusted_instantiate()
>         trusted_payload_alloc() -> GFP_KERNEL
>         <trusted key op>
>                 tee_shm_register_kernel_buf()
>                         register_shm_helper()
>                                 shm_get_kernel_pages()
>
> Where <trusted key op> is one of:
>
>         trusted_key_unseal()
>         trusted_key_get_random()
>         trusted_key_seal()
>
> Remove the vmalloc page support from shm_get_kernel_pages().  Replace 
> with a warn on once.
>
> Cc: Jens Wiklander <jens.wiklander@linaro.org>
> Cc: Al Viro <viro@zeniv.linux.org.uk>
> Cc: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
>
> ---
> Jens I went with the suggestion from Linus and Christoph and rejected 
> vmalloc addresses.  I did not hear back from you regarding Linus'
> question if the vmalloc page support was required by an up coming 
> patch set or not.  So I assumed it was something out of tree.

It looks like I wasn't CC'd to that conversation. IIRC, support for vmalloc addresses was added recently by Phil here [1]. So I would like to give him a chance if he is planning to post a corresponding kernel driver upstream.

[1] https://urldefense.com/v3/__https://lists.trustedfirmware.org/archives/list/op-tee@lists.trustedfirmware.org/thread/M7HI3P2M66V27SK35CGQRICZ7DJZ5J2W/__;!!CTRNKA9wMg0ARbw!wGOKR9k3khZJlPt1K_xBCXX4EBM5ZCfWKuruFgSP45H8wTvJrx4_St3Fb5ZrljD5QQ$ 

-Sumit

> ---
>  drivers/tee/tee_shm.c | 36 ++++++++++++------------------------
>  1 file changed, 12 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 
> 27295bda3e0b..527a6eabc03e 100644
> --- a/drivers/tee/tee_shm.c
> +++ b/drivers/tee/tee_shm.c
> @@ -24,37 +24,25 @@ static void shm_put_kernel_pages(struct page 
> **pages, size_t page_count)  static int shm_get_kernel_pages(unsigned long start, size_t page_count,
>                                 struct page **pages)  {
> +       struct kvec *kiov;
>         size_t n;
>         int rc;
>
> -       if (is_vmalloc_addr((void *)start)) {
> -               struct page *page;
> -
> -               for (n = 0; n < page_count; n++) {
> -                       page = vmalloc_to_page((void *)(start + PAGE_SIZE * n));
> -                       if (!page)
> -                               return -ENOMEM;
> -
> -                       get_page(page);
> -                       pages[n] = page;
> -               }
> -               rc = page_count;
> -       } else {
> -               struct kvec *kiov;
> -
> -               kiov = kcalloc(page_count, sizeof(*kiov), GFP_KERNEL);
> -               if (!kiov)
> -                       return -ENOMEM;
> +       if (WARN_ON_ONCE(is_vmalloc_addr((void *)start)))
> +               return -EINVAL;
>
> -               for (n = 0; n < page_count; n++) {
> -                       kiov[n].iov_base = (void *)(start + n * PAGE_SIZE);
> -                       kiov[n].iov_len = PAGE_SIZE;
> -               }
> +       kiov = kcalloc(page_count, sizeof(*kiov), GFP_KERNEL);
> +       if (!kiov)
> +               return -ENOMEM;
>
> -               rc = get_kernel_pages(kiov, page_count, 0, pages);
> -               kfree(kiov);
> +       for (n = 0; n < page_count; n++) {
> +               kiov[n].iov_base = (void *)(start + n * PAGE_SIZE);
> +               kiov[n].iov_len = PAGE_SIZE;
>         }
>
> +       rc = get_kernel_pages(kiov, page_count, 0, pages);
> +       kfree(kiov);
> +
>         return rc;
>  }
>
> --
> 2.37.2
>
Sumit Garg Oct. 6, 2022, 6:23 a.m. UTC | #4
Hi Phil,

Please don't top-post in the OSS mailing list.

On Wed, 5 Oct 2022 at 08:59, Phil Chang (張世勳) <Phil.Chang@mediatek.com> wrote:
>
> Hi Sumit
>
> Thanks for mentioning that, in fact, our product is low memory devices, and continuous pages are extremely valuable.
> Although our driver is not upstream yet but highly dependent on tee shm vmalloc support,

Sorry but you need to get your driver mainline in order to support
vmalloc interface. As otherwise it's a maintenance nightmare to
support interfaces in the mainline for out-of-tree drivers.

-Sumit

> some scenarios are driver alloc high order pages but system memory is fragmentation so that alloc failed.
> In this situation, vmalloc support is important and gives flexible usage to user.
>
>
> -----Original Message-----
> From: Sumit Garg <sumit.garg@linaro.org>
> Sent: Monday, October 3, 2022 2:57 PM
> To: ira.weiny@intel.com
> Cc: Jens Wiklander <jens.wiklander@linaro.org>; Andrew Morton <akpm@linux-foundation.org>; Al Viro <viro@zeniv.linux.org.uk>; Fabio M. De Francesco <fmdefrancesco@gmail.com>; Christoph Hellwig <hch@lst.de>; Linus Torvalds <torvalds@linux-foundation.org>; op-tee@lists.trustedfirmware.org; linux-kernel@vger.kernel.org; linux-mm@kvack.org; Phil Chang (張世勳) <Phil.Chang@mediatek.com>
> Subject: Re: [PATCH 2/4] tee: Remove vmalloc page support
>
> + Phil
>
> Hi Ira,
>
> On Sun, 2 Oct 2022 at 05:53, <ira.weiny@intel.com> wrote:
> >
> > From: Ira Weiny <ira.weiny@intel.com>
> >
> > The kernel pages used by shm_get_kernel_pages() are allocated using
> > GFP_KERNEL through the following call stack:
> >
> > trusted_instantiate()
> >         trusted_payload_alloc() -> GFP_KERNEL
> >         <trusted key op>
> >                 tee_shm_register_kernel_buf()
> >                         register_shm_helper()
> >                                 shm_get_kernel_pages()
> >
> > Where <trusted key op> is one of:
> >
> >         trusted_key_unseal()
> >         trusted_key_get_random()
> >         trusted_key_seal()
> >
> > Remove the vmalloc page support from shm_get_kernel_pages().  Replace
> > with a warn on once.
> >
> > Cc: Jens Wiklander <jens.wiklander@linaro.org>
> > Cc: Al Viro <viro@zeniv.linux.org.uk>
> > Cc: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
> > Cc: Christoph Hellwig <hch@lst.de>
> > Cc: Linus Torvalds <torvalds@linux-foundation.org>
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> >
> > ---
> > Jens I went with the suggestion from Linus and Christoph and rejected
> > vmalloc addresses.  I did not hear back from you regarding Linus'
> > question if the vmalloc page support was required by an up coming
> > patch set or not.  So I assumed it was something out of tree.
>
> It looks like I wasn't CC'd to that conversation. IIRC, support for vmalloc addresses was added recently by Phil here [1]. So I would like to give him a chance if he is planning to post a corresponding kernel driver upstream.
>
> [1] https://urldefense.com/v3/__https://lists.trustedfirmware.org/archives/list/op-tee@lists.trustedfirmware.org/thread/M7HI3P2M66V27SK35CGQRICZ7DJZ5J2W/__;!!CTRNKA9wMg0ARbw!wGOKR9k3khZJlPt1K_xBCXX4EBM5ZCfWKuruFgSP45H8wTvJrx4_St3Fb5ZrljD5QQ$
>
> -Sumit
>
> > ---
> >  drivers/tee/tee_shm.c | 36 ++++++++++++------------------------
> >  1 file changed, 12 insertions(+), 24 deletions(-)
> >
> > diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index
> > 27295bda3e0b..527a6eabc03e 100644
> > --- a/drivers/tee/tee_shm.c
> > +++ b/drivers/tee/tee_shm.c
> > @@ -24,37 +24,25 @@ static void shm_put_kernel_pages(struct page
> > **pages, size_t page_count)  static int shm_get_kernel_pages(unsigned long start, size_t page_count,
> >                                 struct page **pages)  {
> > +       struct kvec *kiov;
> >         size_t n;
> >         int rc;
> >
> > -       if (is_vmalloc_addr((void *)start)) {
> > -               struct page *page;
> > -
> > -               for (n = 0; n < page_count; n++) {
> > -                       page = vmalloc_to_page((void *)(start + PAGE_SIZE * n));
> > -                       if (!page)
> > -                               return -ENOMEM;
> > -
> > -                       get_page(page);
> > -                       pages[n] = page;
> > -               }
> > -               rc = page_count;
> > -       } else {
> > -               struct kvec *kiov;
> > -
> > -               kiov = kcalloc(page_count, sizeof(*kiov), GFP_KERNEL);
> > -               if (!kiov)
> > -                       return -ENOMEM;
> > +       if (WARN_ON_ONCE(is_vmalloc_addr((void *)start)))
> > +               return -EINVAL;
> >
> > -               for (n = 0; n < page_count; n++) {
> > -                       kiov[n].iov_base = (void *)(start + n * PAGE_SIZE);
> > -                       kiov[n].iov_len = PAGE_SIZE;
> > -               }
> > +       kiov = kcalloc(page_count, sizeof(*kiov), GFP_KERNEL);
> > +       if (!kiov)
> > +               return -ENOMEM;
> >
> > -               rc = get_kernel_pages(kiov, page_count, 0, pages);
> > -               kfree(kiov);
> > +       for (n = 0; n < page_count; n++) {
> > +               kiov[n].iov_base = (void *)(start + n * PAGE_SIZE);
> > +               kiov[n].iov_len = PAGE_SIZE;
> >         }
> >
> > +       rc = get_kernel_pages(kiov, page_count, 0, pages);
> > +       kfree(kiov);
> > +
> >         return rc;
> >  }
> >
> > --
> > 2.37.2
> >
Ira Weiny Oct. 6, 2022, 6:19 p.m. UTC | #5
On Thu, Oct 06, 2022 at 11:53:55AM +0530, Sumit Garg wrote:
> Hi Phil,
> 
> Please don't top-post in the OSS mailing list.
> 
> On Wed, 5 Oct 2022 at 08:59, Phil Chang (張世勳) <Phil.Chang@mediatek.com> wrote:
> >
> > Hi Sumit
> >
> > Thanks for mentioning that, in fact, our product is low memory devices, and continuous pages are extremely valuable.
> > Although our driver is not upstream yet but highly dependent on tee shm vmalloc support,
> 
> Sorry but you need to get your driver mainline in order to support
> vmalloc interface. As otherwise it's a maintenance nightmare to
> support interfaces in the mainline for out-of-tree drivers.

I agree.  It sounds like this driver is not going to be submitted any time
soon.  So I'll respin this series as is.  But I do have a couple of other
things to deal with first.  So if I'm wrong let me know soon.

Thanks,
Ira
Linus Torvalds Oct. 6, 2022, 6:20 p.m. UTC | #6
On Wed, Oct 5, 2022 at 11:24 PM Sumit Garg <sumit.garg@linaro.org> wrote:
>
> Sorry but you need to get your driver mainline in order to support
> vmalloc interface.

Actually, I think even then we shouldn't support vmalloc - and
register_shm_helper() just needs to be changed to pass in an array of
actual page pointers instead.

At that point TEE_SHM_USER_MAPPED should also go away, because then
it's the caller that should just do either the user space page
pinning, or pass in the kernel page pointer.

JensW, is there some reason that wouldn't work?

                 Linus
Jens Wiklander Oct. 7, 2022, 8:12 a.m. UTC | #7
On Thu, Oct 6, 2022 at 8:20 PM Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> On Wed, Oct 5, 2022 at 11:24 PM Sumit Garg <sumit.garg@linaro.org> wrote:
> >
> > Sorry but you need to get your driver mainline in order to support
> > vmalloc interface.
>
> Actually, I think even then we shouldn't support vmalloc - and
> register_shm_helper() just needs to be changed to pass in an array of
> actual page pointers instead.

register_shm_helper() is an internal function, I suppose it's what's
passed to tee_shm_register_user_buf() and especially
tee_shm_register_kernel_buf() in this case.

So the gain is that in the kernel it becomes the caller's
responsibility to provide the array of page pointers and the TEE
subsystem doesn't need to care about what kind of kernel memory it is
any longer. Yes, that should avoid eventual complexities with
vmalloc() etc.

>
> At that point TEE_SHM_USER_MAPPED should also go away, because then
> it's the caller that should just do either the user space page
> pinning, or pass in the kernel page pointer.
>
> JensW, is there some reason that wouldn't work?

We still need to know if it's kernel or user pages in
release_registered_pages().

The struct tee_shm:s acquired with syscalls from user space are
reference counted. As are the kernel tee_shm:s, but I believe we could
separate them to avoid reference counting tee_shm:s used by kernel
clients if needed. I'll need to look closer at this if we're going to
use that approach.

Without reference counting the caller of tee_shm_free() can be certain
that the secure world is done with the memory so we could delegate the
kernel pages part of release_registered_pages() to the caller instead.

Cheers,
Jens

>
>                  Linus
Christoph Hellwig Oct. 10, 2022, 7:42 a.m. UTC | #8
On Thu, Oct 06, 2022 at 11:20:16AM -0700, Linus Torvalds wrote:
> Actually, I think even then we shouldn't support vmalloc - and
> register_shm_helper() just needs to be changed to pass in an array of
> actual page pointers instead.
> 
> At that point TEE_SHM_USER_MAPPED should also go away, because then
> it's the caller that should just do either the user space page
> pinning, or pass in the kernel page pointer.
> 
> JensW, is there some reason that wouldn't work?

I suspect the best long term option would be to just pass an iov_iter..
Linus Torvalds Oct. 10, 2022, 5:20 p.m. UTC | #9
On Mon, Oct 10, 2022 at 12:42 AM Christoph Hellwig <hch@lst.de> wrote:
>
> I suspect the best long term option would be to just pass an iov_iter..

Hmm. Yeah, that sounds like a workable model, and solves the problem
JensW pointed out with my simplistic "just pass a page array" approach
where you also need to keep track of how to release things.

                   Linus
Al Viro Oct. 10, 2022, 5:57 p.m. UTC | #10
On Mon, Oct 10, 2022 at 10:20:15AM -0700, Linus Torvalds wrote:
> On Mon, Oct 10, 2022 at 12:42 AM Christoph Hellwig <hch@lst.de> wrote:
> >
> > I suspect the best long term option would be to just pass an iov_iter..
> 
> Hmm. Yeah, that sounds like a workable model, and solves the problem
> JensW pointed out with my simplistic "just pass a page array" approach
> where you also need to keep track of how to release things.

Except that then you need to get iov_iter_get_pages analogue that would
work for ITER_KVEC, which is exact same problem right back.
Ira Weiny Dec. 16, 2022, 12:41 a.m. UTC | #11
On Fri, Oct 07, 2022 at 10:12:57AM +0200, Jens Wiklander wrote:
> On Thu, Oct 6, 2022 at 8:20 PM Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> >
> > On Wed, Oct 5, 2022 at 11:24 PM Sumit Garg <sumit.garg@linaro.org> wrote:
> > >
> > > Sorry but you need to get your driver mainline in order to support
> > > vmalloc interface.
> >
> > Actually, I think even then we shouldn't support vmalloc - and
> > register_shm_helper() just needs to be changed to pass in an array of
> > actual page pointers instead.
> 
> register_shm_helper() is an internal function, I suppose it's what's
> passed to tee_shm_register_user_buf() and especially
> tee_shm_register_kernel_buf() in this case.
> 
> So the gain is that in the kernel it becomes the caller's
> responsibility to provide the array of page pointers and the TEE
> subsystem doesn't need to care about what kind of kernel memory it is
> any longer. Yes, that should avoid eventual complexities with
> vmalloc() etc.

I finally spent some time digging into this again.

Overall I'm not opposed to trying to clean up the code more but I feel like the
removal of TEE_SHM_USER_MAPPED is too complex for the main goal; to remove a
caller of kmap_to_page().

Not only is that flag used in release_registered_pages() but it is also used in
tee_shm_fop_mmap().  I'm not following exactly why.  I think this is to allow
mmap of the tee_shm's allocated by kernel users?  Which I _think_ is
orthogonal to the callers of tee_shm_register_kernel_buf()?

> 
> >
> > At that point TEE_SHM_USER_MAPPED should also go away, because then
> > it's the caller that should just do either the user space page
> > pinning, or pass in the kernel page pointer.
> >
> > JensW, is there some reason that wouldn't work?
> 
> We still need to know if it's kernel or user pages in
> release_registered_pages().

Yes.

As I dug into this it seemed ok to define a tee_shm_kernel_free().  Pull out
the allocation of the page array from register_shm_helper() such that it could
be handled by tee_shm_register_kernel_buf() and this new tee_shm_kernel_free().

This seems reasonable because the only callers of tee_shm_register_kernel_buf()
are in trusted_tee.c and they all call tee_shm_free() on the registered memory
prior to returning.

Other callers[*] of tee_shm_free() obtained tee_shm from
tee_shm_alloc_kernel_buf() which AFAICT avoids all this nonsense.

[*] such as .../drivers/firmware/broadcom/tee_bnxt_fw.c.

> 
> The struct tee_shm:s acquired with syscalls from user space are
> reference counted. As are the kernel tee_shm:s, but I believe we could
> separate them to avoid reference counting tee_shm:s used by kernel
> clients if needed. I'll need to look closer at this if we're going to
> use that approach.
> 
> Without reference counting the caller of tee_shm_free() can be certain
> that the secure world is done with the memory so we could delegate the
> kernel pages part of release_registered_pages() to the caller instead.
> 

I'm not sure I follow you here.  Would this be along the lines of creating a
tee_shm_free_kernel() to be used in trusted_tee.c for those specific kernel
data?

Overall I feel like submitting this series again with Christoph and Al's
comments addressed is the best way forward to get rid of kmap_to_page().  I
would really like to get moving on that to avoid any further issues with the
kmap conversions.

But if folks feel strongly enough about removing that flag I can certainly try
to do so.

Ira

> Cheers,
> Jens
> 
> >
> >                  Linus
Sumit Garg Dec. 16, 2022, 5:09 a.m. UTC | #12
On Fri, 16 Dec 2022 at 06:11, Ira Weiny <ira.weiny@intel.com> wrote:
>
> On Fri, Oct 07, 2022 at 10:12:57AM +0200, Jens Wiklander wrote:
> > On Thu, Oct 6, 2022 at 8:20 PM Linus Torvalds
> > <torvalds@linux-foundation.org> wrote:
> > >
> > > On Wed, Oct 5, 2022 at 11:24 PM Sumit Garg <sumit.garg@linaro.org> wrote:
> > > >
> > > > Sorry but you need to get your driver mainline in order to support
> > > > vmalloc interface.
> > >
> > > Actually, I think even then we shouldn't support vmalloc - and
> > > register_shm_helper() just needs to be changed to pass in an array of
> > > actual page pointers instead.
> >
> > register_shm_helper() is an internal function, I suppose it's what's
> > passed to tee_shm_register_user_buf() and especially
> > tee_shm_register_kernel_buf() in this case.
> >
> > So the gain is that in the kernel it becomes the caller's
> > responsibility to provide the array of page pointers and the TEE
> > subsystem doesn't need to care about what kind of kernel memory it is
> > any longer. Yes, that should avoid eventual complexities with
> > vmalloc() etc.
>
> I finally spent some time digging into this again.
>
> Overall I'm not opposed to trying to clean up the code more but I feel like the
> removal of TEE_SHM_USER_MAPPED is too complex for the main goal; to remove a
> caller of kmap_to_page().
>
> Not only is that flag used in release_registered_pages() but it is also used in
> tee_shm_fop_mmap().  I'm not following exactly why.  I think this is to allow
> mmap of the tee_shm's allocated by kernel users?

No, its rather to allow mmap of tee_shm allocated via
tee_shm_alloc_user_buf(). Have a look at its user-space usage here
[1]. So overall I agree here that we can't get rid of
TEE_SHM_USER_MAPPED completely as it has a valid mmap use-case.

[1] https://github.com/OP-TEE/optee_client/blob/master/libteec/src/tee_client_api.c#L907

>  Which I _think_ is
> orthogonal to the callers of tee_shm_register_kernel_buf()?
>
> >
> > >
> > > At that point TEE_SHM_USER_MAPPED should also go away, because then
> > > it's the caller that should just do either the user space page
> > > pinning, or pass in the kernel page pointer.
> > >
> > > JensW, is there some reason that wouldn't work?
> >
> > We still need to know if it's kernel or user pages in
> > release_registered_pages().
>
> Yes.
>
> As I dug into this it seemed ok to define a tee_shm_kernel_free().  Pull out
> the allocation of the page array from register_shm_helper() such that it could
> be handled by tee_shm_register_kernel_buf() and this new tee_shm_kernel_free().
>

+1

> This seems reasonable because the only callers of tee_shm_register_kernel_buf()
> are in trusted_tee.c and they all call tee_shm_free() on the registered memory
> prior to returning.
>
> Other callers[*] of tee_shm_free() obtained tee_shm from
> tee_shm_alloc_kernel_buf() which AFAICT avoids all this nonsense.
>
> [*] such as .../drivers/firmware/broadcom/tee_bnxt_fw.c.
>
> >
> > The struct tee_shm:s acquired with syscalls from user space are
> > reference counted. As are the kernel tee_shm:s, but I believe we could
> > separate them to avoid reference counting tee_shm:s used by kernel
> > clients if needed. I'll need to look closer at this if we're going to
> > use that approach.
> >
> > Without reference counting the caller of tee_shm_free() can be certain
> > that the secure world is done with the memory so we could delegate the
> > kernel pages part of release_registered_pages() to the caller instead.
> >
>
> I'm not sure I follow you here.  Would this be along the lines of creating a
> tee_shm_free_kernel() to be used in trusted_tee.c for those specific kernel
> data?

I can't find a reason/use-case for the TEE subsystem to keep a
refcount of memory registered by other kernel drivers like
trusted_tee.c. So yes I think it should be safe to directly free that
shm via tee_shm_free_kernel(). Also with that we can simplify shm
registration by kernel clients via directly passing page pointers to
tee_shm_register_kernel_buf() (I would rather rename this API as
tee_shm_register_kernel_pages()).

-Sumit

>
> Overall I feel like submitting this series again with Christoph and Al's
> comments addressed is the best way forward to get rid of kmap_to_page().  I
> would really like to get moving on that to avoid any further issues with the
> kmap conversions.
>
> But if folks feel strongly enough about removing that flag I can certainly try
> to do so.
>
> Ira
>
> > Cheers,
> > Jens
> >
> > >
> > >                  Linus
Christoph Hellwig Dec. 16, 2022, 7:06 a.m. UTC | #13
On Thu, Dec 15, 2022 at 04:41:04PM -0800, Ira Weiny wrote:
> Overall I feel like submitting this series again with Christoph and Al's
> comments addressed is the best way forward to get rid of kmap_to_page().  I
> would really like to get moving on that to avoid any further issues with the
> kmap conversions.

Yes.  While the flag is really odd, killing kmap_to_page should be the
priority.
Sumit Garg Dec. 16, 2022, 8:45 a.m. UTC | #14
On Fri, 16 Dec 2022 at 10:39, Sumit Garg <sumit.garg@linaro.org> wrote:
>
> On Fri, 16 Dec 2022 at 06:11, Ira Weiny <ira.weiny@intel.com> wrote:
> >
> > On Fri, Oct 07, 2022 at 10:12:57AM +0200, Jens Wiklander wrote:
> > > On Thu, Oct 6, 2022 at 8:20 PM Linus Torvalds
> > > <torvalds@linux-foundation.org> wrote:
> > > >
> > > > On Wed, Oct 5, 2022 at 11:24 PM Sumit Garg <sumit.garg@linaro.org> wrote:
> > > > >
> > > > > Sorry but you need to get your driver mainline in order to support
> > > > > vmalloc interface.
> > > >
> > > > Actually, I think even then we shouldn't support vmalloc - and
> > > > register_shm_helper() just needs to be changed to pass in an array of
> > > > actual page pointers instead.
> > >
> > > register_shm_helper() is an internal function, I suppose it's what's
> > > passed to tee_shm_register_user_buf() and especially
> > > tee_shm_register_kernel_buf() in this case.
> > >
> > > So the gain is that in the kernel it becomes the caller's
> > > responsibility to provide the array of page pointers and the TEE
> > > subsystem doesn't need to care about what kind of kernel memory it is
> > > any longer. Yes, that should avoid eventual complexities with
> > > vmalloc() etc.
> >
> > I finally spent some time digging into this again.
> >
> > Overall I'm not opposed to trying to clean up the code more but I feel like the
> > removal of TEE_SHM_USER_MAPPED is too complex for the main goal; to remove a
> > caller of kmap_to_page().
> >
> > Not only is that flag used in release_registered_pages() but it is also used in
> > tee_shm_fop_mmap().  I'm not following exactly why.  I think this is to allow
> > mmap of the tee_shm's allocated by kernel users?
>
> No, its rather to allow mmap of tee_shm allocated via
> tee_shm_alloc_user_buf(). Have a look at its user-space usage here
> [1]. So overall I agree here that we can't get rid of
> TEE_SHM_USER_MAPPED completely as it has a valid mmap use-case.
>
> [1] https://github.com/OP-TEE/optee_client/blob/master/libteec/src/tee_client_api.c#L907
>
> >  Which I _think_ is
> > orthogonal to the callers of tee_shm_register_kernel_buf()?
> >
> > >
> > > >
> > > > At that point TEE_SHM_USER_MAPPED should also go away, because then
> > > > it's the caller that should just do either the user space page
> > > > pinning, or pass in the kernel page pointer.
> > > >
> > > > JensW, is there some reason that wouldn't work?
> > >
> > > We still need to know if it's kernel or user pages in
> > > release_registered_pages().
> >
> > Yes.
> >
> > As I dug into this it seemed ok to define a tee_shm_kernel_free().  Pull out
> > the allocation of the page array from register_shm_helper() such that it could
> > be handled by tee_shm_register_kernel_buf() and this new tee_shm_kernel_free().
> >
>
> +1
>
> > This seems reasonable because the only callers of tee_shm_register_kernel_buf()
> > are in trusted_tee.c and they all call tee_shm_free() on the registered memory
> > prior to returning.
> >
> > Other callers[*] of tee_shm_free() obtained tee_shm from
> > tee_shm_alloc_kernel_buf() which AFAICT avoids all this nonsense.
> >
> > [*] such as .../drivers/firmware/broadcom/tee_bnxt_fw.c.
> >
> > >
> > > The struct tee_shm:s acquired with syscalls from user space are
> > > reference counted. As are the kernel tee_shm:s, but I believe we could
> > > separate them to avoid reference counting tee_shm:s used by kernel
> > > clients if needed. I'll need to look closer at this if we're going to
> > > use that approach.
> > >
> > > Without reference counting the caller of tee_shm_free() can be certain
> > > that the secure world is done with the memory so we could delegate the
> > > kernel pages part of release_registered_pages() to the caller instead.
> > >
> >
> > I'm not sure I follow you here.  Would this be along the lines of creating a
> > tee_shm_free_kernel() to be used in trusted_tee.c for those specific kernel
> > data?
>
> I can't find a reason/use-case for the TEE subsystem to keep a
> refcount of memory registered by other kernel drivers like
> trusted_tee.c. So yes I think it should be safe to directly free that
> shm via tee_shm_free_kernel(). Also with that we can simplify shm
> registration by kernel clients via directly passing page pointers to
> tee_shm_register_kernel_buf() (I would rather rename this API as
> tee_shm_register_kernel_pages()).

Okay, so I will take up this work and get rid of kmap_to_page
invocation from the TEE subsystem.

Ira,

You can then rebase your patchset over my work.

-Sumit

>
> >
> > Overall I feel like submitting this series again with Christoph and Al's
> > comments addressed is the best way forward to get rid of kmap_to_page().  I
> > would really like to get moving on that to avoid any further issues with the
> > kmap conversions.
> >
> > But if folks feel strongly enough about removing that flag I can certainly try
> > to do so.
> >
> > Ira
> >
> > > Cheers,
> > > Jens
> > >
> > > >
> > > >                  Linus
diff mbox series

Patch

diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
index 27295bda3e0b..527a6eabc03e 100644
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -24,37 +24,25 @@  static void shm_put_kernel_pages(struct page **pages, size_t page_count)
 static int shm_get_kernel_pages(unsigned long start, size_t page_count,
 				struct page **pages)
 {
+	struct kvec *kiov;
 	size_t n;
 	int rc;
 
-	if (is_vmalloc_addr((void *)start)) {
-		struct page *page;
-
-		for (n = 0; n < page_count; n++) {
-			page = vmalloc_to_page((void *)(start + PAGE_SIZE * n));
-			if (!page)
-				return -ENOMEM;
-
-			get_page(page);
-			pages[n] = page;
-		}
-		rc = page_count;
-	} else {
-		struct kvec *kiov;
-
-		kiov = kcalloc(page_count, sizeof(*kiov), GFP_KERNEL);
-		if (!kiov)
-			return -ENOMEM;
+	if (WARN_ON_ONCE(is_vmalloc_addr((void *)start)))
+		return -EINVAL;
 
-		for (n = 0; n < page_count; n++) {
-			kiov[n].iov_base = (void *)(start + n * PAGE_SIZE);
-			kiov[n].iov_len = PAGE_SIZE;
-		}
+	kiov = kcalloc(page_count, sizeof(*kiov), GFP_KERNEL);
+	if (!kiov)
+		return -ENOMEM;
 
-		rc = get_kernel_pages(kiov, page_count, 0, pages);
-		kfree(kiov);
+	for (n = 0; n < page_count; n++) {
+		kiov[n].iov_base = (void *)(start + n * PAGE_SIZE);
+		kiov[n].iov_len = PAGE_SIZE;
 	}
 
+	rc = get_kernel_pages(kiov, page_count, 0, pages);
+	kfree(kiov);
+
 	return rc;
 }