Message ID | 20240903232241.43995-7-anthony.yznaga@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add support for shared PTEs across processes | expand |
On Tue, Sep 3, 2024 at 4:23 PM Anthony Yznaga <anthony.yznaga@oracle.com> wrote: > > From: Khalid Aziz <khalid@kernel.org> > > Add a bit to vm_flags to indicate a vma shares PTEs with others. Add > a function to determine if a vma shares PTEs by checking this flag. > This is to be used to find the shared page table entries on page fault > for vmas sharing PTEs. > > Signed-off-by: Khalid Aziz <khalid@kernel.org> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com> > --- > include/linux/mm.h | 7 +++++++ > include/trace/events/mmflags.h | 3 +++ > mm/internal.h | 5 +++++ > 3 files changed, 15 insertions(+) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 6549d0979b28..3aa0b3322284 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -413,6 +413,13 @@ extern unsigned int kobjsize(const void *objp); > #define VM_DROPPABLE VM_NONE > #endif > > +#ifdef CONFIG_64BIT > +#define VM_SHARED_PT_BIT 41 > +#define VM_SHARED_PT BIT(VM_SHARED_PT_BIT) > +#else > +#define VM_SHARED_PT VM_NONE > +#endif > + > #ifdef CONFIG_64BIT > /* VM is sealed, in vm_flags */ > #define VM_SEALED _BITUL(63) > diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h > index b63d211bd141..e1ae1e60d086 100644 > --- a/include/trace/events/mmflags.h > +++ b/include/trace/events/mmflags.h > @@ -167,8 +167,10 @@ IF_HAVE_PG_ARCH_X(arch_3) > > #ifdef CONFIG_64BIT > # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name}, > +# define IF_HAVE_VM_SHARED_PT(flag, name) {flag, name}, > #else > # define IF_HAVE_VM_DROPPABLE(flag, name) > +# define IF_HAVE_VM_SHARED_PT(flag, name) > #endif > > #define __def_vmaflag_names \ > @@ -204,6 +206,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ > {VM_HUGEPAGE, "hugepage" }, \ > {VM_NOHUGEPAGE, "nohugepage" }, \ > IF_HAVE_VM_DROPPABLE(VM_DROPPABLE, "droppable" ) \ > +IF_HAVE_VM_SHARED_PT(VM_SHARED_PT, "sharedpt" ) \ > {VM_MERGEABLE, "mergeable" } \ > > #define show_vma_flags(flags) \ > diff --git a/mm/internal.h b/mm/internal.h > index b4d86436565b..8005d5956b6e 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -1578,4 +1578,9 @@ void unlink_file_vma_batch_init(struct unlink_vma_file_batch *); > void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *); > void unlink_file_vma_batch_final(struct unlink_vma_file_batch *); > Hi Anthony, I'm really excited to see this series on the mailing list again! :) I won't have time to review this series in too much detail, but I hope something like it gets merged eventually. > +static inline bool vma_is_shared(const struct vm_area_struct *vma) > +{ > + return VM_SHARED_PT && (vma->vm_flags & VM_SHARED_PT); > +} Tiny comment - I find vma_is_shared() to be a bit of a confusing name, especially given how vma_is_shared_maywrite() is defined. (Sorry if this has already been discussed before.) How about vma_is_shared_pt()?
On 9/3/24 4:40 PM, James Houghton wrote: > On Tue, Sep 3, 2024 at 4:23 PM Anthony Yznaga <anthony.yznaga@oracle.com> wrote: >> From: Khalid Aziz <khalid@kernel.org> >> >> Add a bit to vm_flags to indicate a vma shares PTEs with others. Add >> a function to determine if a vma shares PTEs by checking this flag. >> This is to be used to find the shared page table entries on page fault >> for vmas sharing PTEs. >> >> Signed-off-by: Khalid Aziz <khalid@kernel.org> >> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> >> Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com> >> --- >> include/linux/mm.h | 7 +++++++ >> include/trace/events/mmflags.h | 3 +++ >> mm/internal.h | 5 +++++ >> 3 files changed, 15 insertions(+) >> >> diff --git a/include/linux/mm.h b/include/linux/mm.h >> index 6549d0979b28..3aa0b3322284 100644 >> --- a/include/linux/mm.h >> +++ b/include/linux/mm.h >> @@ -413,6 +413,13 @@ extern unsigned int kobjsize(const void *objp); >> #define VM_DROPPABLE VM_NONE >> #endif >> >> +#ifdef CONFIG_64BIT >> +#define VM_SHARED_PT_BIT 41 >> +#define VM_SHARED_PT BIT(VM_SHARED_PT_BIT) >> +#else >> +#define VM_SHARED_PT VM_NONE >> +#endif >> + >> #ifdef CONFIG_64BIT >> /* VM is sealed, in vm_flags */ >> #define VM_SEALED _BITUL(63) >> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h >> index b63d211bd141..e1ae1e60d086 100644 >> --- a/include/trace/events/mmflags.h >> +++ b/include/trace/events/mmflags.h >> @@ -167,8 +167,10 @@ IF_HAVE_PG_ARCH_X(arch_3) >> >> #ifdef CONFIG_64BIT >> # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name}, >> +# define IF_HAVE_VM_SHARED_PT(flag, name) {flag, name}, >> #else >> # define IF_HAVE_VM_DROPPABLE(flag, name) >> +# define IF_HAVE_VM_SHARED_PT(flag, name) >> #endif >> >> #define __def_vmaflag_names \ >> @@ -204,6 +206,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ >> {VM_HUGEPAGE, "hugepage" }, \ >> {VM_NOHUGEPAGE, "nohugepage" }, \ >> IF_HAVE_VM_DROPPABLE(VM_DROPPABLE, "droppable" ) \ >> +IF_HAVE_VM_SHARED_PT(VM_SHARED_PT, "sharedpt" ) \ >> {VM_MERGEABLE, "mergeable" } \ >> >> #define show_vma_flags(flags) \ >> diff --git a/mm/internal.h b/mm/internal.h >> index b4d86436565b..8005d5956b6e 100644 >> --- a/mm/internal.h >> +++ b/mm/internal.h >> @@ -1578,4 +1578,9 @@ void unlink_file_vma_batch_init(struct unlink_vma_file_batch *); >> void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *); >> void unlink_file_vma_batch_final(struct unlink_vma_file_batch *); >> Hi James, > Hi Anthony, > > I'm really excited to see this series on the mailing list again! :) I > won't have time to review this series in too much detail, but I hope > something like it gets merged eventually. Me, too. :-) > >> +static inline bool vma_is_shared(const struct vm_area_struct *vma) >> +{ >> + return VM_SHARED_PT && (vma->vm_flags & VM_SHARED_PT); >> +} > Tiny comment - I find vma_is_shared() to be a bit of a confusing name, > especially given how vma_is_shared_maywrite() is defined. (Sorry if > this has already been discussed before.) > > How about vma_is_shared_pt()? Good point. I'll make this change. Thanks for taking a look. Anthony
On 04.09.24 01:40, James Houghton wrote: > On Tue, Sep 3, 2024 at 4:23 PM Anthony Yznaga <anthony.yznaga@oracle.com> wrote: >> >> From: Khalid Aziz <khalid@kernel.org> >> >> Add a bit to vm_flags to indicate a vma shares PTEs with others. Add >> a function to determine if a vma shares PTEs by checking this flag. >> This is to be used to find the shared page table entries on page fault >> for vmas sharing PTEs. >> >> Signed-off-by: Khalid Aziz <khalid@kernel.org> >> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> >> Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com> >> --- >> include/linux/mm.h | 7 +++++++ >> include/trace/events/mmflags.h | 3 +++ >> mm/internal.h | 5 +++++ >> 3 files changed, 15 insertions(+) >> >> diff --git a/include/linux/mm.h b/include/linux/mm.h >> index 6549d0979b28..3aa0b3322284 100644 >> --- a/include/linux/mm.h >> +++ b/include/linux/mm.h >> @@ -413,6 +413,13 @@ extern unsigned int kobjsize(const void *objp); >> #define VM_DROPPABLE VM_NONE >> #endif >> >> +#ifdef CONFIG_64BIT >> +#define VM_SHARED_PT_BIT 41 >> +#define VM_SHARED_PT BIT(VM_SHARED_PT_BIT) >> +#else >> +#define VM_SHARED_PT VM_NONE >> +#endif >> + >> #ifdef CONFIG_64BIT >> /* VM is sealed, in vm_flags */ >> #define VM_SEALED _BITUL(63) >> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h >> index b63d211bd141..e1ae1e60d086 100644 >> --- a/include/trace/events/mmflags.h >> +++ b/include/trace/events/mmflags.h >> @@ -167,8 +167,10 @@ IF_HAVE_PG_ARCH_X(arch_3) >> >> #ifdef CONFIG_64BIT >> # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name}, >> +# define IF_HAVE_VM_SHARED_PT(flag, name) {flag, name}, >> #else >> # define IF_HAVE_VM_DROPPABLE(flag, name) >> +# define IF_HAVE_VM_SHARED_PT(flag, name) >> #endif >> >> #define __def_vmaflag_names \ >> @@ -204,6 +206,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ >> {VM_HUGEPAGE, "hugepage" }, \ >> {VM_NOHUGEPAGE, "nohugepage" }, \ >> IF_HAVE_VM_DROPPABLE(VM_DROPPABLE, "droppable" ) \ >> +IF_HAVE_VM_SHARED_PT(VM_SHARED_PT, "sharedpt" ) \ >> {VM_MERGEABLE, "mergeable" } \ >> >> #define show_vma_flags(flags) \ >> diff --git a/mm/internal.h b/mm/internal.h >> index b4d86436565b..8005d5956b6e 100644 >> --- a/mm/internal.h >> +++ b/mm/internal.h >> @@ -1578,4 +1578,9 @@ void unlink_file_vma_batch_init(struct unlink_vma_file_batch *); >> void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *); >> void unlink_file_vma_batch_final(struct unlink_vma_file_batch *); >> > > Hi Anthony, > > I'm really excited to see this series on the mailing list again! :) I > won't have time to review this series in too much detail, but I hope > something like it gets merged eventually. > >> +static inline bool vma_is_shared(const struct vm_area_struct *vma) >> +{ >> + return VM_SHARED_PT && (vma->vm_flags & VM_SHARED_PT); >> +} > > Tiny comment - I find vma_is_shared() to be a bit of a confusing name, > especially given how vma_is_shared_maywrite() is defined. (Sorry if > this has already been discussed before.) > > How about vma_is_shared_pt()? vma_is_mshare() ? ;) The whole "shared PT / shared PTE" is a bit misleading IMHO and a bit too dominant in the series. Yes, we're sharing PTEs/page tables, but the main point is that a single mshare VMA might cover multiple different VMAs (in a different process). I would describe mshare VMAs as being something that shares page tables with another MM, BUT, also that the VMA is a container and what exactly the *actual* VMAs in there are (including holes), only the owner knows. E.g., is_vm_hugetlb_page() might be *false* for an mshare VMA, but there might be hugetlb folios mapped into the page tables, described by a is_vm_hugetlb_page() VMA in the owner MM. So again, it's not just "sharing page tables".
On 10/7/24 3:24 AM, David Hildenbrand wrote: > On 04.09.24 01:40, James Houghton wrote: >> On Tue, Sep 3, 2024 at 4:23 PM Anthony Yznaga >> <anthony.yznaga@oracle.com> wrote: >>> >>> From: Khalid Aziz <khalid@kernel.org> >>> >>> Add a bit to vm_flags to indicate a vma shares PTEs with others. Add >>> a function to determine if a vma shares PTEs by checking this flag. >>> This is to be used to find the shared page table entries on page fault >>> for vmas sharing PTEs. >>> >>> Signed-off-by: Khalid Aziz <khalid@kernel.org> >>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> >>> Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com> >>> --- >>> include/linux/mm.h | 7 +++++++ >>> include/trace/events/mmflags.h | 3 +++ >>> mm/internal.h | 5 +++++ >>> 3 files changed, 15 insertions(+) >>> >>> diff --git a/include/linux/mm.h b/include/linux/mm.h >>> index 6549d0979b28..3aa0b3322284 100644 >>> --- a/include/linux/mm.h >>> +++ b/include/linux/mm.h >>> @@ -413,6 +413,13 @@ extern unsigned int kobjsize(const void *objp); >>> #define VM_DROPPABLE VM_NONE >>> #endif >>> >>> +#ifdef CONFIG_64BIT >>> +#define VM_SHARED_PT_BIT 41 >>> +#define VM_SHARED_PT BIT(VM_SHARED_PT_BIT) >>> +#else >>> +#define VM_SHARED_PT VM_NONE >>> +#endif >>> + >>> #ifdef CONFIG_64BIT >>> /* VM is sealed, in vm_flags */ >>> #define VM_SEALED _BITUL(63) >>> diff --git a/include/trace/events/mmflags.h >>> b/include/trace/events/mmflags.h >>> index b63d211bd141..e1ae1e60d086 100644 >>> --- a/include/trace/events/mmflags.h >>> +++ b/include/trace/events/mmflags.h >>> @@ -167,8 +167,10 @@ IF_HAVE_PG_ARCH_X(arch_3) >>> >>> #ifdef CONFIG_64BIT >>> # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name}, >>> +# define IF_HAVE_VM_SHARED_PT(flag, name) {flag, name}, >>> #else >>> # define IF_HAVE_VM_DROPPABLE(flag, name) >>> +# define IF_HAVE_VM_SHARED_PT(flag, name) >>> #endif >>> >>> #define __def_vmaflag_names \ >>> @@ -204,6 +206,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, >>> "softdirty" ) \ >>> {VM_HUGEPAGE, "hugepage" }, \ >>> {VM_NOHUGEPAGE, "nohugepage" }, \ >>> IF_HAVE_VM_DROPPABLE(VM_DROPPABLE, "droppable" ) \ >>> +IF_HAVE_VM_SHARED_PT(VM_SHARED_PT, "sharedpt" ) \ >>> {VM_MERGEABLE, "mergeable" } \ >>> >>> #define show_vma_flags(flags) \ >>> diff --git a/mm/internal.h b/mm/internal.h >>> index b4d86436565b..8005d5956b6e 100644 >>> --- a/mm/internal.h >>> +++ b/mm/internal.h >>> @@ -1578,4 +1578,9 @@ void unlink_file_vma_batch_init(struct >>> unlink_vma_file_batch *); >>> void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, >>> struct vm_area_struct *); >>> void unlink_file_vma_batch_final(struct unlink_vma_file_batch *); >>> >> >> Hi Anthony, >> >> I'm really excited to see this series on the mailing list again! :) I >> won't have time to review this series in too much detail, but I hope >> something like it gets merged eventually. >> >>> +static inline bool vma_is_shared(const struct vm_area_struct *vma) >>> +{ >>> + return VM_SHARED_PT && (vma->vm_flags & VM_SHARED_PT); >>> +} >> >> Tiny comment - I find vma_is_shared() to be a bit of a confusing name, >> especially given how vma_is_shared_maywrite() is defined. (Sorry if >> this has already been discussed before.) >> >> How about vma_is_shared_pt()? > > vma_is_mshare() ? ;) vma_is_vmas()? :-D > > The whole "shared PT / shared PTE" is a bit misleading IMHO and a bit > too dominant in the series. Yes, we're sharing PTEs/page tables, but > the main point is that a single mshare VMA might cover multiple > different VMAs (in a different process). > > I would describe mshare VMAs as being something that shares page > tables with another MM, BUT, also that the VMA is a container and what > exactly the *actual* VMAs in there are (including holes), only the > owner knows. > > E.g., is_vm_hugetlb_page() might be *false* for an mshare VMA, but > there might be hugetlb folios mapped into the page tables, described > by a is_vm_hugetlb_page() VMA in the owner MM. > > So again, it's not just "sharing page tables". Understood. I'm okay with something like vma_is_mshare() or some other shorthand for a "container" VMA. And I recognize that I need to identify which code paths need to be enlightened to container VMAs and which should expect to be operating on a real VMA or don't care. Anthony
diff --git a/include/linux/mm.h b/include/linux/mm.h index 6549d0979b28..3aa0b3322284 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -413,6 +413,13 @@ extern unsigned int kobjsize(const void *objp); #define VM_DROPPABLE VM_NONE #endif +#ifdef CONFIG_64BIT +#define VM_SHARED_PT_BIT 41 +#define VM_SHARED_PT BIT(VM_SHARED_PT_BIT) +#else +#define VM_SHARED_PT VM_NONE +#endif + #ifdef CONFIG_64BIT /* VM is sealed, in vm_flags */ #define VM_SEALED _BITUL(63) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index b63d211bd141..e1ae1e60d086 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -167,8 +167,10 @@ IF_HAVE_PG_ARCH_X(arch_3) #ifdef CONFIG_64BIT # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name}, +# define IF_HAVE_VM_SHARED_PT(flag, name) {flag, name}, #else # define IF_HAVE_VM_DROPPABLE(flag, name) +# define IF_HAVE_VM_SHARED_PT(flag, name) #endif #define __def_vmaflag_names \ @@ -204,6 +206,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ {VM_HUGEPAGE, "hugepage" }, \ {VM_NOHUGEPAGE, "nohugepage" }, \ IF_HAVE_VM_DROPPABLE(VM_DROPPABLE, "droppable" ) \ +IF_HAVE_VM_SHARED_PT(VM_SHARED_PT, "sharedpt" ) \ {VM_MERGEABLE, "mergeable" } \ #define show_vma_flags(flags) \ diff --git a/mm/internal.h b/mm/internal.h index b4d86436565b..8005d5956b6e 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1578,4 +1578,9 @@ void unlink_file_vma_batch_init(struct unlink_vma_file_batch *); void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *); void unlink_file_vma_batch_final(struct unlink_vma_file_batch *); +static inline bool vma_is_shared(const struct vm_area_struct *vma) +{ + return VM_SHARED_PT && (vma->vm_flags & VM_SHARED_PT); +} + #endif /* __MM_INTERNAL_H */