diff mbox series

[RFC,v3,06/10] mm/mshare: Add vm flag for shared PTEs

Message ID 20240903232241.43995-7-anthony.yznaga@oracle.com (mailing list archive)
State New
Headers show
Series Add support for shared PTEs across processes | expand

Commit Message

Anthony Yznaga Sept. 3, 2024, 11:22 p.m. UTC
From: Khalid Aziz <khalid@kernel.org>

Add a bit to vm_flags to indicate a vma shares PTEs with others. Add
a function to determine if a vma shares PTEs by checking this flag.
This is to be used to find the shared page table entries on page fault
for vmas sharing PTEs.

Signed-off-by: Khalid Aziz <khalid@kernel.org>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
---
 include/linux/mm.h             | 7 +++++++
 include/trace/events/mmflags.h | 3 +++
 mm/internal.h                  | 5 +++++
 3 files changed, 15 insertions(+)

Comments

James Houghton Sept. 3, 2024, 11:40 p.m. UTC | #1
On Tue, Sep 3, 2024 at 4:23 PM Anthony Yznaga <anthony.yznaga@oracle.com> wrote:
>
> From: Khalid Aziz <khalid@kernel.org>
>
> Add a bit to vm_flags to indicate a vma shares PTEs with others. Add
> a function to determine if a vma shares PTEs by checking this flag.
> This is to be used to find the shared page table entries on page fault
> for vmas sharing PTEs.
>
> Signed-off-by: Khalid Aziz <khalid@kernel.org>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
> ---
>  include/linux/mm.h             | 7 +++++++
>  include/trace/events/mmflags.h | 3 +++
>  mm/internal.h                  | 5 +++++
>  3 files changed, 15 insertions(+)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 6549d0979b28..3aa0b3322284 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -413,6 +413,13 @@ extern unsigned int kobjsize(const void *objp);
>  #define VM_DROPPABLE           VM_NONE
>  #endif
>
> +#ifdef CONFIG_64BIT
> +#define VM_SHARED_PT_BIT       41
> +#define VM_SHARED_PT           BIT(VM_SHARED_PT_BIT)
> +#else
> +#define VM_SHARED_PT           VM_NONE
> +#endif
> +
>  #ifdef CONFIG_64BIT
>  /* VM is sealed, in vm_flags */
>  #define VM_SEALED      _BITUL(63)
> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
> index b63d211bd141..e1ae1e60d086 100644
> --- a/include/trace/events/mmflags.h
> +++ b/include/trace/events/mmflags.h
> @@ -167,8 +167,10 @@ IF_HAVE_PG_ARCH_X(arch_3)
>
>  #ifdef CONFIG_64BIT
>  # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name},
> +# define IF_HAVE_VM_SHARED_PT(flag, name) {flag, name},
>  #else
>  # define IF_HAVE_VM_DROPPABLE(flag, name)
> +# define IF_HAVE_VM_SHARED_PT(flag, name)
>  #endif
>
>  #define __def_vmaflag_names                                            \
> @@ -204,6 +206,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,  "softdirty"     )               \
>         {VM_HUGEPAGE,                   "hugepage"      },              \
>         {VM_NOHUGEPAGE,                 "nohugepage"    },              \
>  IF_HAVE_VM_DROPPABLE(VM_DROPPABLE,     "droppable"     )               \
> +IF_HAVE_VM_SHARED_PT(VM_SHARED_PT,     "sharedpt"      )               \
>         {VM_MERGEABLE,                  "mergeable"     }               \
>
>  #define show_vma_flags(flags)                                          \
> diff --git a/mm/internal.h b/mm/internal.h
> index b4d86436565b..8005d5956b6e 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -1578,4 +1578,9 @@ void unlink_file_vma_batch_init(struct unlink_vma_file_batch *);
>  void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *);
>  void unlink_file_vma_batch_final(struct unlink_vma_file_batch *);
>

Hi Anthony,

I'm really excited to see this series on the mailing list again! :) I
won't have time to review this series in too much detail, but I hope
something like it gets merged eventually.

> +static inline bool vma_is_shared(const struct vm_area_struct *vma)
> +{
> +       return VM_SHARED_PT && (vma->vm_flags & VM_SHARED_PT);
> +}

Tiny comment - I find vma_is_shared() to be a bit of a confusing name,
especially given how vma_is_shared_maywrite() is defined. (Sorry if
this has already been discussed before.)

How about vma_is_shared_pt()?
Anthony Yznaga Sept. 3, 2024, 11:58 p.m. UTC | #2
On 9/3/24 4:40 PM, James Houghton wrote:
> On Tue, Sep 3, 2024 at 4:23 PM Anthony Yznaga <anthony.yznaga@oracle.com> wrote:
>> From: Khalid Aziz <khalid@kernel.org>
>>
>> Add a bit to vm_flags to indicate a vma shares PTEs with others. Add
>> a function to determine if a vma shares PTEs by checking this flag.
>> This is to be used to find the shared page table entries on page fault
>> for vmas sharing PTEs.
>>
>> Signed-off-by: Khalid Aziz <khalid@kernel.org>
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>> Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
>> ---
>>   include/linux/mm.h             | 7 +++++++
>>   include/trace/events/mmflags.h | 3 +++
>>   mm/internal.h                  | 5 +++++
>>   3 files changed, 15 insertions(+)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 6549d0979b28..3aa0b3322284 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -413,6 +413,13 @@ extern unsigned int kobjsize(const void *objp);
>>   #define VM_DROPPABLE           VM_NONE
>>   #endif
>>
>> +#ifdef CONFIG_64BIT
>> +#define VM_SHARED_PT_BIT       41
>> +#define VM_SHARED_PT           BIT(VM_SHARED_PT_BIT)
>> +#else
>> +#define VM_SHARED_PT           VM_NONE
>> +#endif
>> +
>>   #ifdef CONFIG_64BIT
>>   /* VM is sealed, in vm_flags */
>>   #define VM_SEALED      _BITUL(63)
>> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
>> index b63d211bd141..e1ae1e60d086 100644
>> --- a/include/trace/events/mmflags.h
>> +++ b/include/trace/events/mmflags.h
>> @@ -167,8 +167,10 @@ IF_HAVE_PG_ARCH_X(arch_3)
>>
>>   #ifdef CONFIG_64BIT
>>   # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name},
>> +# define IF_HAVE_VM_SHARED_PT(flag, name) {flag, name},
>>   #else
>>   # define IF_HAVE_VM_DROPPABLE(flag, name)
>> +# define IF_HAVE_VM_SHARED_PT(flag, name)
>>   #endif
>>
>>   #define __def_vmaflag_names                                            \
>> @@ -204,6 +206,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,  "softdirty"     )               \
>>          {VM_HUGEPAGE,                   "hugepage"      },              \
>>          {VM_NOHUGEPAGE,                 "nohugepage"    },              \
>>   IF_HAVE_VM_DROPPABLE(VM_DROPPABLE,     "droppable"     )               \
>> +IF_HAVE_VM_SHARED_PT(VM_SHARED_PT,     "sharedpt"      )               \
>>          {VM_MERGEABLE,                  "mergeable"     }               \
>>
>>   #define show_vma_flags(flags)                                          \
>> diff --git a/mm/internal.h b/mm/internal.h
>> index b4d86436565b..8005d5956b6e 100644
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -1578,4 +1578,9 @@ void unlink_file_vma_batch_init(struct unlink_vma_file_batch *);
>>   void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *);
>>   void unlink_file_vma_batch_final(struct unlink_vma_file_batch *);
>>
Hi James,
> Hi Anthony,
>
> I'm really excited to see this series on the mailing list again! :) I
> won't have time to review this series in too much detail, but I hope
> something like it gets merged eventually.
Me, too. :-)
>
>> +static inline bool vma_is_shared(const struct vm_area_struct *vma)
>> +{
>> +       return VM_SHARED_PT && (vma->vm_flags & VM_SHARED_PT);
>> +}
> Tiny comment - I find vma_is_shared() to be a bit of a confusing name,
> especially given how vma_is_shared_maywrite() is defined. (Sorry if
> this has already been discussed before.)
>
> How about vma_is_shared_pt()?

Good point. I'll make this change. Thanks for taking a look.


Anthony
David Hildenbrand Oct. 7, 2024, 10:24 a.m. UTC | #3
On 04.09.24 01:40, James Houghton wrote:
> On Tue, Sep 3, 2024 at 4:23 PM Anthony Yznaga <anthony.yznaga@oracle.com> wrote:
>>
>> From: Khalid Aziz <khalid@kernel.org>
>>
>> Add a bit to vm_flags to indicate a vma shares PTEs with others. Add
>> a function to determine if a vma shares PTEs by checking this flag.
>> This is to be used to find the shared page table entries on page fault
>> for vmas sharing PTEs.
>>
>> Signed-off-by: Khalid Aziz <khalid@kernel.org>
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>> Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
>> ---
>>   include/linux/mm.h             | 7 +++++++
>>   include/trace/events/mmflags.h | 3 +++
>>   mm/internal.h                  | 5 +++++
>>   3 files changed, 15 insertions(+)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 6549d0979b28..3aa0b3322284 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -413,6 +413,13 @@ extern unsigned int kobjsize(const void *objp);
>>   #define VM_DROPPABLE           VM_NONE
>>   #endif
>>
>> +#ifdef CONFIG_64BIT
>> +#define VM_SHARED_PT_BIT       41
>> +#define VM_SHARED_PT           BIT(VM_SHARED_PT_BIT)
>> +#else
>> +#define VM_SHARED_PT           VM_NONE
>> +#endif
>> +
>>   #ifdef CONFIG_64BIT
>>   /* VM is sealed, in vm_flags */
>>   #define VM_SEALED      _BITUL(63)
>> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
>> index b63d211bd141..e1ae1e60d086 100644
>> --- a/include/trace/events/mmflags.h
>> +++ b/include/trace/events/mmflags.h
>> @@ -167,8 +167,10 @@ IF_HAVE_PG_ARCH_X(arch_3)
>>
>>   #ifdef CONFIG_64BIT
>>   # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name},
>> +# define IF_HAVE_VM_SHARED_PT(flag, name) {flag, name},
>>   #else
>>   # define IF_HAVE_VM_DROPPABLE(flag, name)
>> +# define IF_HAVE_VM_SHARED_PT(flag, name)
>>   #endif
>>
>>   #define __def_vmaflag_names                                            \
>> @@ -204,6 +206,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,  "softdirty"     )               \
>>          {VM_HUGEPAGE,                   "hugepage"      },              \
>>          {VM_NOHUGEPAGE,                 "nohugepage"    },              \
>>   IF_HAVE_VM_DROPPABLE(VM_DROPPABLE,     "droppable"     )               \
>> +IF_HAVE_VM_SHARED_PT(VM_SHARED_PT,     "sharedpt"      )               \
>>          {VM_MERGEABLE,                  "mergeable"     }               \
>>
>>   #define show_vma_flags(flags)                                          \
>> diff --git a/mm/internal.h b/mm/internal.h
>> index b4d86436565b..8005d5956b6e 100644
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -1578,4 +1578,9 @@ void unlink_file_vma_batch_init(struct unlink_vma_file_batch *);
>>   void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *);
>>   void unlink_file_vma_batch_final(struct unlink_vma_file_batch *);
>>
> 
> Hi Anthony,
> 
> I'm really excited to see this series on the mailing list again! :) I
> won't have time to review this series in too much detail, but I hope
> something like it gets merged eventually.
> 
>> +static inline bool vma_is_shared(const struct vm_area_struct *vma)
>> +{
>> +       return VM_SHARED_PT && (vma->vm_flags & VM_SHARED_PT);
>> +}
> 
> Tiny comment - I find vma_is_shared() to be a bit of a confusing name,
> especially given how vma_is_shared_maywrite() is defined. (Sorry if
> this has already been discussed before.)
> 
> How about vma_is_shared_pt()?

vma_is_mshare() ? ;)

The whole "shared PT / shared PTE" is a bit misleading IMHO and a bit 
too dominant in the series. Yes, we're sharing PTEs/page tables, but the 
main point is that a single mshare VMA might cover multiple different 
VMAs (in a different process).

I would describe mshare VMAs as being something that shares page tables 
with another MM, BUT, also that the VMA is a container and what exactly 
the *actual* VMAs in there are (including holes), only the owner knows.

E.g., is_vm_hugetlb_page() might be *false* for an mshare VMA, but there 
might be hugetlb folios mapped into the page tables, described by a 
is_vm_hugetlb_page() VMA in the owner MM.

So again, it's not just "sharing page tables".
Anthony Yznaga Oct. 7, 2024, 11:03 p.m. UTC | #4
On 10/7/24 3:24 AM, David Hildenbrand wrote:
> On 04.09.24 01:40, James Houghton wrote:
>> On Tue, Sep 3, 2024 at 4:23 PM Anthony Yznaga 
>> <anthony.yznaga@oracle.com> wrote:
>>>
>>> From: Khalid Aziz <khalid@kernel.org>
>>>
>>> Add a bit to vm_flags to indicate a vma shares PTEs with others. Add
>>> a function to determine if a vma shares PTEs by checking this flag.
>>> This is to be used to find the shared page table entries on page fault
>>> for vmas sharing PTEs.
>>>
>>> Signed-off-by: Khalid Aziz <khalid@kernel.org>
>>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>>> Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
>>> ---
>>>   include/linux/mm.h             | 7 +++++++
>>>   include/trace/events/mmflags.h | 3 +++
>>>   mm/internal.h                  | 5 +++++
>>>   3 files changed, 15 insertions(+)
>>>
>>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>>> index 6549d0979b28..3aa0b3322284 100644
>>> --- a/include/linux/mm.h
>>> +++ b/include/linux/mm.h
>>> @@ -413,6 +413,13 @@ extern unsigned int kobjsize(const void *objp);
>>>   #define VM_DROPPABLE           VM_NONE
>>>   #endif
>>>
>>> +#ifdef CONFIG_64BIT
>>> +#define VM_SHARED_PT_BIT       41
>>> +#define VM_SHARED_PT           BIT(VM_SHARED_PT_BIT)
>>> +#else
>>> +#define VM_SHARED_PT           VM_NONE
>>> +#endif
>>> +
>>>   #ifdef CONFIG_64BIT
>>>   /* VM is sealed, in vm_flags */
>>>   #define VM_SEALED      _BITUL(63)
>>> diff --git a/include/trace/events/mmflags.h 
>>> b/include/trace/events/mmflags.h
>>> index b63d211bd141..e1ae1e60d086 100644
>>> --- a/include/trace/events/mmflags.h
>>> +++ b/include/trace/events/mmflags.h
>>> @@ -167,8 +167,10 @@ IF_HAVE_PG_ARCH_X(arch_3)
>>>
>>>   #ifdef CONFIG_64BIT
>>>   # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name},
>>> +# define IF_HAVE_VM_SHARED_PT(flag, name) {flag, name},
>>>   #else
>>>   # define IF_HAVE_VM_DROPPABLE(flag, name)
>>> +# define IF_HAVE_VM_SHARED_PT(flag, name)
>>>   #endif
>>>
>>>   #define __def_vmaflag_names \
>>> @@ -204,6 +206,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, 
>>> "softdirty"     )               \
>>>          {VM_HUGEPAGE,                   "hugepage" },              \
>>>          {VM_NOHUGEPAGE,                 "nohugepage" },              \
>>>   IF_HAVE_VM_DROPPABLE(VM_DROPPABLE,     "droppable" )               \
>>> +IF_HAVE_VM_SHARED_PT(VM_SHARED_PT,     "sharedpt" )               \
>>>          {VM_MERGEABLE,                  "mergeable" }               \
>>>
>>>   #define show_vma_flags(flags) \
>>> diff --git a/mm/internal.h b/mm/internal.h
>>> index b4d86436565b..8005d5956b6e 100644
>>> --- a/mm/internal.h
>>> +++ b/mm/internal.h
>>> @@ -1578,4 +1578,9 @@ void unlink_file_vma_batch_init(struct 
>>> unlink_vma_file_batch *);
>>>   void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, 
>>> struct vm_area_struct *);
>>>   void unlink_file_vma_batch_final(struct unlink_vma_file_batch *);
>>>
>>
>> Hi Anthony,
>>
>> I'm really excited to see this series on the mailing list again! :) I
>> won't have time to review this series in too much detail, but I hope
>> something like it gets merged eventually.
>>
>>> +static inline bool vma_is_shared(const struct vm_area_struct *vma)
>>> +{
>>> +       return VM_SHARED_PT && (vma->vm_flags & VM_SHARED_PT);
>>> +}
>>
>> Tiny comment - I find vma_is_shared() to be a bit of a confusing name,
>> especially given how vma_is_shared_maywrite() is defined. (Sorry if
>> this has already been discussed before.)
>>
>> How about vma_is_shared_pt()?
>
> vma_is_mshare() ? ;)

vma_is_vmas()? :-D


>
> The whole "shared PT / shared PTE" is a bit misleading IMHO and a bit 
> too dominant in the series. Yes, we're sharing PTEs/page tables, but 
> the main point is that a single mshare VMA might cover multiple 
> different VMAs (in a different process).
>
> I would describe mshare VMAs as being something that shares page 
> tables with another MM, BUT, also that the VMA is a container and what 
> exactly the *actual* VMAs in there are (including holes), only the 
> owner knows.
>
> E.g., is_vm_hugetlb_page() might be *false* for an mshare VMA, but 
> there might be hugetlb folios mapped into the page tables, described 
> by a is_vm_hugetlb_page() VMA in the owner MM.
>
> So again, it's not just "sharing page tables".

Understood. I'm okay with something like vma_is_mshare() or some other 
shorthand for a "container" VMA. And I recognize that I need to identify 
which code paths need to be enlightened to container VMAs and which 
should expect to be operating on a real VMA or don't care.


Anthony
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6549d0979b28..3aa0b3322284 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -413,6 +413,13 @@  extern unsigned int kobjsize(const void *objp);
 #define VM_DROPPABLE		VM_NONE
 #endif
 
+#ifdef CONFIG_64BIT
+#define VM_SHARED_PT_BIT	41
+#define VM_SHARED_PT		BIT(VM_SHARED_PT_BIT)
+#else
+#define VM_SHARED_PT		VM_NONE
+#endif
+
 #ifdef CONFIG_64BIT
 /* VM is sealed, in vm_flags */
 #define VM_SEALED	_BITUL(63)
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index b63d211bd141..e1ae1e60d086 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -167,8 +167,10 @@  IF_HAVE_PG_ARCH_X(arch_3)
 
 #ifdef CONFIG_64BIT
 # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name},
+# define IF_HAVE_VM_SHARED_PT(flag, name) {flag, name},
 #else
 # define IF_HAVE_VM_DROPPABLE(flag, name)
+# define IF_HAVE_VM_SHARED_PT(flag, name)
 #endif
 
 #define __def_vmaflag_names						\
@@ -204,6 +206,7 @@  IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,	"softdirty"	)		\
 	{VM_HUGEPAGE,			"hugepage"	},		\
 	{VM_NOHUGEPAGE,			"nohugepage"	},		\
 IF_HAVE_VM_DROPPABLE(VM_DROPPABLE,	"droppable"	)		\
+IF_HAVE_VM_SHARED_PT(VM_SHARED_PT,	"sharedpt"	)		\
 	{VM_MERGEABLE,			"mergeable"	}		\
 
 #define show_vma_flags(flags)						\
diff --git a/mm/internal.h b/mm/internal.h
index b4d86436565b..8005d5956b6e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1578,4 +1578,9 @@  void unlink_file_vma_batch_init(struct unlink_vma_file_batch *);
 void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *);
 void unlink_file_vma_batch_final(struct unlink_vma_file_batch *);
 
+static inline bool vma_is_shared(const struct vm_area_struct *vma)
+{
+	return VM_SHARED_PT && (vma->vm_flags & VM_SHARED_PT);
+}
+
 #endif	/* __MM_INTERNAL_H */