Message ID | 20240227232100.478238-18-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | TDX/SNP part 1 of n, for 6.9 | expand |
On Tue, Feb 27, 2024, Paolo Bonzini wrote: This needs a changelog, and also needs to be Cc'd to someone(s) that can give it a thumbs up. > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > include/linux/pagemap.h | 2 ++ > mm/filemap.c | 4 ++++ > 2 files changed, 6 insertions(+) > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > index 2df35e65557d..e8ac0b32f84d 100644 > --- a/include/linux/pagemap.h > +++ b/include/linux/pagemap.h > @@ -586,6 +586,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, > * * %FGP_CREAT - If no folio is present then a new folio is allocated, > * added to the page cache and the VM's LRU list. The folio is > * returned locked. > + * * %FGP_CREAT_ONLY - Fail if a folio is not present > * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the > * folio is already in cache. If the folio was allocated, unlock it > * before returning so the caller can do the same dance. > @@ -606,6 +607,7 @@ typedef unsigned int __bitwise fgf_t; > #define FGP_NOWAIT ((__force fgf_t)0x00000020) > #define FGP_FOR_MMAP ((__force fgf_t)0x00000040) > #define FGP_STABLE ((__force fgf_t)0x00000080) > +#define FGP_CREAT_ONLY ((__force fgf_t)0x00000100) > #define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */ > > #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) > diff --git a/mm/filemap.c b/mm/filemap.c > index 750e779c23db..d5107bd0cd09 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -1854,6 +1854,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, > folio = NULL; > if (!folio) > goto no_page; > + if (fgp_flags & FGP_CREAT_ONLY) { > + folio_put(folio); > + return ERR_PTR(-EEXIST); > + } > > if (fgp_flags & FGP_LOCK) { > if (fgp_flags & FGP_NOWAIT) { > -- > 2.39.0 > >
On Tue, Feb 27, 2024 at 6:15 PM Sean Christopherson <seanjc@google.com> wrote: > > On Tue, Feb 27, 2024, Paolo Bonzini wrote: > > This needs a changelog, and also needs to be Cc'd to someone(s) that can give it > a thumbs up. +Matthew Wilcox > > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > --- > > include/linux/pagemap.h | 2 ++ > > mm/filemap.c | 4 ++++ > > 2 files changed, 6 insertions(+) > > > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > > index 2df35e65557d..e8ac0b32f84d 100644 > > --- a/include/linux/pagemap.h > > +++ b/include/linux/pagemap.h > > @@ -586,6 +586,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, > > * * %FGP_CREAT - If no folio is present then a new folio is allocated, > > * added to the page cache and the VM's LRU list. The folio is > > * returned locked. > > + * * %FGP_CREAT_ONLY - Fail if a folio is not present > > * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the > > * folio is already in cache. If the folio was allocated, unlock it > > * before returning so the caller can do the same dance. > > @@ -606,6 +607,7 @@ typedef unsigned int __bitwise fgf_t; > > #define FGP_NOWAIT ((__force fgf_t)0x00000020) > > #define FGP_FOR_MMAP ((__force fgf_t)0x00000040) > > #define FGP_STABLE ((__force fgf_t)0x00000080) > > +#define FGP_CREAT_ONLY ((__force fgf_t)0x00000100) > > #define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */ > > > > #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) > > diff --git a/mm/filemap.c b/mm/filemap.c > > index 750e779c23db..d5107bd0cd09 100644 > > --- a/mm/filemap.c > > +++ b/mm/filemap.c > > @@ -1854,6 +1854,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, > > folio = NULL; > > if (!folio) > > goto no_page; > > + if (fgp_flags & FGP_CREAT_ONLY) { > > + folio_put(folio); > > + return ERR_PTR(-EEXIST); > > + } > > > > if (fgp_flags & FGP_LOCK) { > > if (fgp_flags & FGP_NOWAIT) { > > -- > > 2.39.0 > > > > >
On Tue, Feb 27, 2024 at 06:17:34PM -0800, Yosry Ahmed wrote: > On Tue, Feb 27, 2024 at 6:15 PM Sean Christopherson <seanjc@google.com> wrote: > > > > On Tue, Feb 27, 2024, Paolo Bonzini wrote: > > > > This needs a changelog, and also needs to be Cc'd to someone(s) that can give it > > a thumbs up. > > +Matthew Wilcox If only there were an entry in MAINTAINERS for filemap.c ... This looks bogus to me, and if it's not bogus, it's incomplete. But it's hard to judge without a commit message that describes what it's supposed to mean. > > > > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > > --- > > > include/linux/pagemap.h | 2 ++ > > > mm/filemap.c | 4 ++++ > > > 2 files changed, 6 insertions(+) > > > > > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > > > index 2df35e65557d..e8ac0b32f84d 100644 > > > --- a/include/linux/pagemap.h > > > +++ b/include/linux/pagemap.h > > > @@ -586,6 +586,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, > > > * * %FGP_CREAT - If no folio is present then a new folio is allocated, > > > * added to the page cache and the VM's LRU list. The folio is > > > * returned locked. > > > + * * %FGP_CREAT_ONLY - Fail if a folio is not present > > > * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the > > > * folio is already in cache. If the folio was allocated, unlock it > > > * before returning so the caller can do the same dance. > > > @@ -606,6 +607,7 @@ typedef unsigned int __bitwise fgf_t; > > > #define FGP_NOWAIT ((__force fgf_t)0x00000020) > > > #define FGP_FOR_MMAP ((__force fgf_t)0x00000040) > > > #define FGP_STABLE ((__force fgf_t)0x00000080) > > > +#define FGP_CREAT_ONLY ((__force fgf_t)0x00000100) > > > #define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */ > > > > > > #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) > > > diff --git a/mm/filemap.c b/mm/filemap.c > > > index 750e779c23db..d5107bd0cd09 100644 > > > --- a/mm/filemap.c > > > +++ b/mm/filemap.c > > > @@ -1854,6 +1854,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, > > > folio = NULL; > > > if (!folio) > > > goto no_page; > > > + if (fgp_flags & FGP_CREAT_ONLY) { > > > + folio_put(folio); > > > + return ERR_PTR(-EEXIST); > > > + } > > > > > > if (fgp_flags & FGP_LOCK) { > > > if (fgp_flags & FGP_NOWAIT) { > > > -- > > > 2.39.0 > > > > > > > >
On Wed, Feb 28, 2024 at 2:15 PM Matthew Wilcox <willy@infradead.org> wrote: > > On Tue, Feb 27, 2024 at 06:17:34PM -0800, Yosry Ahmed wrote: > > On Tue, Feb 27, 2024 at 6:15 PM Sean Christopherson <seanjc@google.com> wrote: > > > > > > On Tue, Feb 27, 2024, Paolo Bonzini wrote: > > > > > > This needs a changelog, and also needs to be Cc'd to someone(s) that can give it > > > a thumbs up. > > > > +Matthew Wilcox > > If only there were an entry in MAINTAINERS for filemap.c ... Not CCing you (or mm in general) was intentional because I first wanted a review of the KVM APIs; of course I wouldn't have committed it without an Acked-by. But yeah, not writing the changelog yet was pure laziness. Since you're here: KVM would like to add a ioctl to encrypt and install a page into guest_memfd, in preparation for launching an encrypted guest. For this API we want to rule out the possibility of overwriting a page that is already in the guest_memfd's filemap, therefore this API would pass FGP_CREAT_ONLY|FGP_CREAT into__filemap_get_folio. Do you think this is bogus... > This looks bogus to me, and if it's not bogus, it's incomplete. ... or if not, what incompleteness can you spot? Thanks, Paolo > But it's hard to judge without a commit message that describes what it's > supposed to mean. > > > > > > > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > > > --- > > > > include/linux/pagemap.h | 2 ++ > > > > mm/filemap.c | 4 ++++ > > > > 2 files changed, 6 insertions(+) > > > > > > > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > > > > index 2df35e65557d..e8ac0b32f84d 100644 > > > > --- a/include/linux/pagemap.h > > > > +++ b/include/linux/pagemap.h > > > > @@ -586,6 +586,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, > > > > * * %FGP_CREAT - If no folio is present then a new folio is allocated, > > > > * added to the page cache and the VM's LRU list. The folio is > > > > * returned locked. > > > > + * * %FGP_CREAT_ONLY - Fail if a folio is not present > > > > * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the > > > > * folio is already in cache. If the folio was allocated, unlock it > > > > * before returning so the caller can do the same dance. > > > > @@ -606,6 +607,7 @@ typedef unsigned int __bitwise fgf_t; > > > > #define FGP_NOWAIT ((__force fgf_t)0x00000020) > > > > #define FGP_FOR_MMAP ((__force fgf_t)0x00000040) > > > > #define FGP_STABLE ((__force fgf_t)0x00000080) > > > > +#define FGP_CREAT_ONLY ((__force fgf_t)0x00000100) > > > > #define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */ > > > > > > > > #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) > > > > diff --git a/mm/filemap.c b/mm/filemap.c > > > > index 750e779c23db..d5107bd0cd09 100644 > > > > --- a/mm/filemap.c > > > > +++ b/mm/filemap.c > > > > @@ -1854,6 +1854,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, > > > > folio = NULL; > > > > if (!folio) > > > > goto no_page; > > > > + if (fgp_flags & FGP_CREAT_ONLY) { > > > > + folio_put(folio); > > > > + return ERR_PTR(-EEXIST); > > > > + } > > > > > > > > if (fgp_flags & FGP_LOCK) { > > > > if (fgp_flags & FGP_NOWAIT) { > > > > -- > > > > 2.39.0 > > > > > > > > > > > >
On Wed, Feb 28, 2024 at 02:28:45PM +0100, Paolo Bonzini wrote: > Since you're here: KVM would like to add a ioctl to encrypt and > install a page into guest_memfd, in preparation for launching an > encrypted guest. For this API we want to rule out the possibility of > overwriting a page that is already in the guest_memfd's filemap, > therefore this API would pass FGP_CREAT_ONLY|FGP_CREAT > into__filemap_get_folio. Do you think this is bogus... Would it work to start out by either asserting the memfd is empty of pages, or by evicting any existing pages? Both those seem nicer than starting, realising you've got some unencrypted memory and aborting. > > This looks bogus to me, and if it's not bogus, it's incomplete. > > ... or if not, what incompleteness can you spot? The part where we race another caller passing FGP_CREAT_ONLY and one gets an EEXIST back from filemap_add_folio(). Maybe that's not something that can happen in your use case, but it's at least semantics that need documenting.
On Wed, Feb 28, 2024 at 8:24 PM Matthew Wilcox <willy@infradead.org> wrote: > > On Wed, Feb 28, 2024 at 02:28:45PM +0100, Paolo Bonzini wrote: > > Since you're here: KVM would like to add a ioctl to encrypt and > > install a page into guest_memfd, in preparation for launching an > > encrypted guest. For this API we want to rule out the possibility of > > overwriting a page that is already in the guest_memfd's filemap, > > therefore this API would pass FGP_CREAT_ONLY|FGP_CREAT > > into__filemap_get_folio. Do you think this is bogus... > > Would it work to start out by either asserting the memfd is empty of > pages, or by evicting any existing pages? Both those seem nicer than > starting, realising you've got some unencrypted memory and aborting. Unfortunately it would be quite ugly to force userspace to do all the initialization in one go. For example, there are different kinds of pages that probably would be initialized at different points (e.g. before vs. after vCPUs are created, because the initial vCPU state is also encrypted). The thing that I want to protect against is userspace trying to initialize the same encrypted page twice. > > > This looks bogus to me, and if it's not bogus, it's incomplete. > > > > ... or if not, what incompleteness can you spot? > > The part where we race another caller passing FGP_CREAT_ONLY and one gets > an EEXIST back from filemap_add_folio(). Maybe that's not something > that can happen in your use case, but it's at least semantics that > need documenting. From the point of view of filemap_add_folio(), one of the racers wins and one fails. It doesn't matter to filemap.c if the missing synchronization is in the kernel or in userspace. In the case of KVM, the ioctl will return the number of pages before it found an existing page, or -EEXIST if that number is zero (similar to what nonblocking read does with EAGAIN). I'll improve the documentation and changelog and make sure to Cc you on the next version. Thanks again! Paolo
On Wed, Feb 28, 2024 at 02:28:45PM +0100, Paolo Bonzini wrote: > On Wed, Feb 28, 2024 at 2:15 PM Matthew Wilcox <willy@infradead.org> wrote: > > > > On Tue, Feb 27, 2024 at 06:17:34PM -0800, Yosry Ahmed wrote: > > > On Tue, Feb 27, 2024 at 6:15 PM Sean Christopherson <seanjc@google.com> wrote: > > > > > > > > On Tue, Feb 27, 2024, Paolo Bonzini wrote: > > > > > > > > This needs a changelog, and also needs to be Cc'd to someone(s) that can give it > > > > a thumbs up. > > > > > > +Matthew Wilcox > > > > If only there were an entry in MAINTAINERS for filemap.c ... > > Not CCing you (or mm in general) was intentional because I first > wanted a review of the KVM APIs; of course I wouldn't have committed > it without an Acked-by. But yeah, not writing the changelog yet was > pure laziness. > > Since you're here: KVM would like to add a ioctl to encrypt and > install a page into guest_memfd, in preparation for launching an > encrypted guest. For this API we want to rule out the possibility of > overwriting a page that is already in the guest_memfd's filemap, > therefore this API would pass FGP_CREAT_ONLY|FGP_CREAT > into__filemap_get_folio. Do you think this is bogus... > > > This looks bogus to me, and if it's not bogus, it's incomplete. > > ... or if not, what incompleteness can you spot? > > Thanks, > > Paolo > > > But it's hard to judge without a commit message that describes what it's > > supposed to mean. > > > > > > > > > > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > > > > --- > > > > > include/linux/pagemap.h | 2 ++ > > > > > mm/filemap.c | 4 ++++ > > > > > 2 files changed, 6 insertions(+) > > > > > > > > > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > > > > > index 2df35e65557d..e8ac0b32f84d 100644 > > > > > --- a/include/linux/pagemap.h > > > > > +++ b/include/linux/pagemap.h > > > > > @@ -586,6 +586,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, > > > > > * * %FGP_CREAT - If no folio is present then a new folio is allocated, > > > > > * added to the page cache and the VM's LRU list. The folio is > > > > > * returned locked. > > > > > + * * %FGP_CREAT_ONLY - Fail if a folio is not present ^ So should be: Fail if a folio is present. Thanks, Yilun > > > > > * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the > > > > > * folio is already in cache. If the folio was allocated, unlock it > > > > > * before returning so the caller can do the same dance. > > > > > @@ -606,6 +607,7 @@ typedef unsigned int __bitwise fgf_t; > > > > > #define FGP_NOWAIT ((__force fgf_t)0x00000020) > > > > > #define FGP_FOR_MMAP ((__force fgf_t)0x00000040) > > > > > #define FGP_STABLE ((__force fgf_t)0x00000080) > > > > > +#define FGP_CREAT_ONLY ((__force fgf_t)0x00000100) > > > > > #define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */ > > > > > > > > > > #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) > > > > > diff --git a/mm/filemap.c b/mm/filemap.c > > > > > index 750e779c23db..d5107bd0cd09 100644 > > > > > --- a/mm/filemap.c > > > > > +++ b/mm/filemap.c > > > > > @@ -1854,6 +1854,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, > > > > > folio = NULL; > > > > > if (!folio) > > > > > goto no_page; > > > > > + if (fgp_flags & FGP_CREAT_ONLY) { > > > > > + folio_put(folio); > > > > > + return ERR_PTR(-EEXIST); > > > > > + } > > > > > > > > > > if (fgp_flags & FGP_LOCK) { > > > > > if (fgp_flags & FGP_NOWAIT) { > > > > > -- > > > > > 2.39.0 > > > > > > > > > > > > > > > > > >
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 2df35e65557d..e8ac0b32f84d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -586,6 +586,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, * * %FGP_CREAT - If no folio is present then a new folio is allocated, * added to the page cache and the VM's LRU list. The folio is * returned locked. + * * %FGP_CREAT_ONLY - Fail if a folio is not present * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the * folio is already in cache. If the folio was allocated, unlock it * before returning so the caller can do the same dance. @@ -606,6 +607,7 @@ typedef unsigned int __bitwise fgf_t; #define FGP_NOWAIT ((__force fgf_t)0x00000020) #define FGP_FOR_MMAP ((__force fgf_t)0x00000040) #define FGP_STABLE ((__force fgf_t)0x00000080) +#define FGP_CREAT_ONLY ((__force fgf_t)0x00000100) #define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */ #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) diff --git a/mm/filemap.c b/mm/filemap.c index 750e779c23db..d5107bd0cd09 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1854,6 +1854,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, folio = NULL; if (!folio) goto no_page; + if (fgp_flags & FGP_CREAT_ONLY) { + folio_put(folio); + return ERR_PTR(-EEXIST); + } if (fgp_flags & FGP_LOCK) { if (fgp_flags & FGP_NOWAIT) {
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- include/linux/pagemap.h | 2 ++ mm/filemap.c | 4 ++++ 2 files changed, 6 insertions(+)