Message ID | 20240424111017.3160195-3-ryan.roberts@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64/mm: Enable userfaultfd write-protect | expand |
Hi, Ryan, On Wed, Apr 24, 2024 at 12:10:17PM +0100, Ryan Roberts wrote: > Let's use the newly-free PTE SW bit (58) to add support for uffd-wp. > > The standard handlers are implemented for set/test/clear for both pte > and pmd. Additionally we must also track the uffd-wp state as a pte swp > bit, so use a free swap entry pte bit (3). > > Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> Looks all sane here from userfault perspective, just one comment below. > --- > arch/arm64/Kconfig | 1 + > arch/arm64/include/asm/pgtable-prot.h | 8 ++++ > arch/arm64/include/asm/pgtable.h | 55 +++++++++++++++++++++++++++ > 3 files changed, 64 insertions(+) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 7b11c98b3e84..763e221f2169 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -255,6 +255,7 @@ config ARM64 > select SYSCTL_EXCEPTION_TRACE > select THREAD_INFO_IN_TASK > select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD > + select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD > select TRACE_IRQFLAGS_SUPPORT > select TRACE_IRQFLAGS_NMI_SUPPORT > select HAVE_SOFTIRQ_ON_OWN_STACK > diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h > index ef952d69fd04..f1e1f6306e03 100644 > --- a/arch/arm64/include/asm/pgtable-prot.h > +++ b/arch/arm64/include/asm/pgtable-prot.h > @@ -20,6 +20,14 @@ > #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) > #define PTE_PROT_NONE (PTE_UXN) /* Reuse PTE_UXN; only when !PTE_VALID */ > > +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP > +#define PTE_UFFD_WP (_AT(pteval_t, 1) << 58) /* uffd-wp tracking */ > +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 1) << 3) /* only for swp ptes */ > +#else > +#define PTE_UFFD_WP (_AT(pteval_t, 0)) > +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 0)) > +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ > + > /* > * This bit indicates that the entry is present i.e. pmd_page() > * still points to a valid huge page in memory even if the pmd > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index 23aabff4fa6f..3f4748741fdb 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -271,6 +271,34 @@ static inline pte_t pte_mkdevmap(pte_t pte) > return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); > } > > +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP > +static inline int pte_uffd_wp(pte_t pte) > +{ > + bool wp = !!(pte_val(pte) & PTE_UFFD_WP); > + > +#ifdef CONFIG_DEBUG_VM > + /* > + * Having write bit for wr-protect-marked present ptes is fatal, because > + * it means the uffd-wp bit will be ignored and write will just go > + * through. See comment in x86 implementation. > + */ > + WARN_ON_ONCE(wp && pte_write(pte)); > +#endif Feel free to drop this line, see: https://lore.kernel.org/r/20240417212549.2766883-1-peterx@redhat.com It's still in mm-unstable only. AFAICT ARM64 also is supported by check_page_table, I also checked ARM's ptep_modify_prot_commit() which uses set_pte_at(), so it should cover everything in a superior way already. With that dropped, feel free to add: Acked-by: Peter Xu <peterx@redhat.com> Thanks,
On 24/04/2024 12:57, Peter Xu wrote: > Hi, Ryan, > > On Wed, Apr 24, 2024 at 12:10:17PM +0100, Ryan Roberts wrote: >> Let's use the newly-free PTE SW bit (58) to add support for uffd-wp. >> >> The standard handlers are implemented for set/test/clear for both pte >> and pmd. Additionally we must also track the uffd-wp state as a pte swp >> bit, so use a free swap entry pte bit (3). >> >> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> > > Looks all sane here from userfault perspective, just one comment below. > >> --- >> arch/arm64/Kconfig | 1 + >> arch/arm64/include/asm/pgtable-prot.h | 8 ++++ >> arch/arm64/include/asm/pgtable.h | 55 +++++++++++++++++++++++++++ >> 3 files changed, 64 insertions(+) >> >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index 7b11c98b3e84..763e221f2169 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -255,6 +255,7 @@ config ARM64 >> select SYSCTL_EXCEPTION_TRACE >> select THREAD_INFO_IN_TASK >> select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD >> + select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD >> select TRACE_IRQFLAGS_SUPPORT >> select TRACE_IRQFLAGS_NMI_SUPPORT >> select HAVE_SOFTIRQ_ON_OWN_STACK >> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h >> index ef952d69fd04..f1e1f6306e03 100644 >> --- a/arch/arm64/include/asm/pgtable-prot.h >> +++ b/arch/arm64/include/asm/pgtable-prot.h >> @@ -20,6 +20,14 @@ >> #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) >> #define PTE_PROT_NONE (PTE_UXN) /* Reuse PTE_UXN; only when !PTE_VALID */ >> >> +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP >> +#define PTE_UFFD_WP (_AT(pteval_t, 1) << 58) /* uffd-wp tracking */ >> +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 1) << 3) /* only for swp ptes */ >> +#else >> +#define PTE_UFFD_WP (_AT(pteval_t, 0)) >> +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 0)) >> +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ >> + >> /* >> * This bit indicates that the entry is present i.e. pmd_page() >> * still points to a valid huge page in memory even if the pmd >> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h >> index 23aabff4fa6f..3f4748741fdb 100644 >> --- a/arch/arm64/include/asm/pgtable.h >> +++ b/arch/arm64/include/asm/pgtable.h >> @@ -271,6 +271,34 @@ static inline pte_t pte_mkdevmap(pte_t pte) >> return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); >> } >> >> +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP >> +static inline int pte_uffd_wp(pte_t pte) >> +{ >> + bool wp = !!(pte_val(pte) & PTE_UFFD_WP); >> + >> +#ifdef CONFIG_DEBUG_VM >> + /* >> + * Having write bit for wr-protect-marked present ptes is fatal, because >> + * it means the uffd-wp bit will be ignored and write will just go >> + * through. See comment in x86 implementation. >> + */ >> + WARN_ON_ONCE(wp && pte_write(pte)); >> +#endif > > Feel free to drop this line, see: > > https://lore.kernel.org/r/20240417212549.2766883-1-peterx@redhat.com Ahh nice! In that case, I'm going to convert this to a macro, which is the arm64 style for these getters (for some reason...): #define pte_uffd_wp(pte_t pte) (!!(pte_val(pte) & PTE_UFFD_WP)) Will send out a v2 once others have had time to comment. > > It's still in mm-unstable only. > > AFAICT ARM64 also is supported by check_page_table, I also checked ARM's > ptep_modify_prot_commit() which uses set_pte_at(), so it should cover > everything in a superior way already. > > With that dropped, feel free to add: > > Acked-by: Peter Xu <peterx@redhat.com> Thanks! > > Thanks, >
On Wed, Apr 24, 2024 at 12:10:17PM +0100, Ryan Roberts wrote: > @@ -1248,6 +1302,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, > * Encode and decode a swap entry: > * bits 0-1: present (must be zero) > * bits 2: remember PG_anon_exclusive > + * bit 3: remember uffd-wp state > * bits 4-53: swap offset > * bit 54: PTE_PROT_NONE (overlays PTE_UXN) (must be zero) > * bits 55-59: swap type Ah, I did not realise we need to free up bit 3 from the swap pte as well. Though maybe patch 1 is fine as is but for the record, it would be good to justify the decision to go with PTE_UXN. For this patch: Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+ Muhammad Usama Anjum <usama.anjum@collabora.com> Hi Peter, Muhammad, On 24/04/2024 12:57, Peter Xu wrote: > Hi, Ryan, > > On Wed, Apr 24, 2024 at 12:10:17PM +0100, Ryan Roberts wrote: >> Let's use the newly-free PTE SW bit (58) to add support for uffd-wp. >> >> The standard handlers are implemented for set/test/clear for both pte >> and pmd. Additionally we must also track the uffd-wp state as a pte swp >> bit, so use a free swap entry pte bit (3). >> >> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> > > Looks all sane here from userfault perspective, just one comment below. > >> --- >> arch/arm64/Kconfig | 1 + >> arch/arm64/include/asm/pgtable-prot.h | 8 ++++ >> arch/arm64/include/asm/pgtable.h | 55 +++++++++++++++++++++++++++ >> 3 files changed, 64 insertions(+) >> >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index 7b11c98b3e84..763e221f2169 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -255,6 +255,7 @@ config ARM64 >> select SYSCTL_EXCEPTION_TRACE >> select THREAD_INFO_IN_TASK >> select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD >> + select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD >> select TRACE_IRQFLAGS_SUPPORT >> select TRACE_IRQFLAGS_NMI_SUPPORT >> select HAVE_SOFTIRQ_ON_OWN_STACK >> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h >> index ef952d69fd04..f1e1f6306e03 100644 >> --- a/arch/arm64/include/asm/pgtable-prot.h >> +++ b/arch/arm64/include/asm/pgtable-prot.h >> @@ -20,6 +20,14 @@ >> #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) >> #define PTE_PROT_NONE (PTE_UXN) /* Reuse PTE_UXN; only when !PTE_VALID */ >> >> +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP >> +#define PTE_UFFD_WP (_AT(pteval_t, 1) << 58) /* uffd-wp tracking */ >> +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 1) << 3) /* only for swp ptes */ I've just noticed code in task_mmu.c: static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, unsigned long end, struct mm_walk *walk) { ... if (!p->arg.category_anyof_mask && !p->arg.category_inverted && p->arg.category_mask == PAGE_IS_WRITTEN && p->arg.return_mask == PAGE_IS_WRITTEN) { for (addr = start; addr < end; pte++, addr += PAGE_SIZE) { unsigned long next = addr + PAGE_SIZE; if (pte_uffd_wp(ptep_get(pte))) <<<<<< continue; ... } } } As far as I can see, you don't know that the pte is present when you do this. So does this imply that the UFFD-WP bit is expected to be in the same position for both present ptes and swap ptes? I had assumed pte_uffd_wp() was for present ptes and pte_swp_uffd_wp() was for swap ptes. As you can see, the way I've implemented this for arm64 the bit is in a different position for these 2 cases. I've just done a slightly different implementation that changes the first patch in this series quite a bit and a bunch of pagemap_ioctl mm kselftests are now failing. I think this is the root cause, but haven't proven it definitively yet. I'm inclined towords thinking the above is a bug and should be fixed so that I can store the bit in different places. What do you think? Thanks, Ryan >> +#else >> +#define PTE_UFFD_WP (_AT(pteval_t, 0)) >> +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 0)) >> +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ >> + >> /* >> * This bit indicates that the entry is present i.e. pmd_page() >> * still points to a valid huge page in memory even if the pmd >> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h >> index 23aabff4fa6f..3f4748741fdb 100644 >> --- a/arch/arm64/include/asm/pgtable.h >> +++ b/arch/arm64/include/asm/pgtable.h >> @@ -271,6 +271,34 @@ static inline pte_t pte_mkdevmap(pte_t pte) >> return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); >> } >> >> +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP >> +static inline int pte_uffd_wp(pte_t pte) >> +{ >> + bool wp = !!(pte_val(pte) & PTE_UFFD_WP); >> + >> +#ifdef CONFIG_DEBUG_VM >> + /* >> + * Having write bit for wr-protect-marked present ptes is fatal, because >> + * it means the uffd-wp bit will be ignored and write will just go >> + * through. See comment in x86 implementation. >> + */ >> + WARN_ON_ONCE(wp && pte_write(pte)); >> +#endif > > Feel free to drop this line, see: > > https://lore.kernel.org/r/20240417212549.2766883-1-peterx@redhat.com > > It's still in mm-unstable only. > > AFAICT ARM64 also is supported by check_page_table, I also checked ARM's > ptep_modify_prot_commit() which uses set_pte_at(), so it should cover > everything in a superior way already. > > With that dropped, feel free to add: > > Acked-by: Peter Xu <peterx@redhat.com> > > Thanks, >
On Fri, Apr 26, 2024 at 02:17:41PM +0100, Ryan Roberts wrote: > + Muhammad Usama Anjum <usama.anjum@collabora.com> > > Hi Peter, Muhammad, > > > On 24/04/2024 12:57, Peter Xu wrote: > > Hi, Ryan, > > > > On Wed, Apr 24, 2024 at 12:10:17PM +0100, Ryan Roberts wrote: > >> Let's use the newly-free PTE SW bit (58) to add support for uffd-wp. > >> > >> The standard handlers are implemented for set/test/clear for both pte > >> and pmd. Additionally we must also track the uffd-wp state as a pte swp > >> bit, so use a free swap entry pte bit (3). > >> > >> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> > > > > Looks all sane here from userfault perspective, just one comment below. > > > >> --- > >> arch/arm64/Kconfig | 1 + > >> arch/arm64/include/asm/pgtable-prot.h | 8 ++++ > >> arch/arm64/include/asm/pgtable.h | 55 +++++++++++++++++++++++++++ > >> 3 files changed, 64 insertions(+) > >> > >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > >> index 7b11c98b3e84..763e221f2169 100644 > >> --- a/arch/arm64/Kconfig > >> +++ b/arch/arm64/Kconfig > >> @@ -255,6 +255,7 @@ config ARM64 > >> select SYSCTL_EXCEPTION_TRACE > >> select THREAD_INFO_IN_TASK > >> select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD > >> + select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD > >> select TRACE_IRQFLAGS_SUPPORT > >> select TRACE_IRQFLAGS_NMI_SUPPORT > >> select HAVE_SOFTIRQ_ON_OWN_STACK > >> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h > >> index ef952d69fd04..f1e1f6306e03 100644 > >> --- a/arch/arm64/include/asm/pgtable-prot.h > >> +++ b/arch/arm64/include/asm/pgtable-prot.h > >> @@ -20,6 +20,14 @@ > >> #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) > >> #define PTE_PROT_NONE (PTE_UXN) /* Reuse PTE_UXN; only when !PTE_VALID */ > >> > >> +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP > >> +#define PTE_UFFD_WP (_AT(pteval_t, 1) << 58) /* uffd-wp tracking */ > >> +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 1) << 3) /* only for swp ptes */ > > I've just noticed code in task_mmu.c: > > static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, > unsigned long end, struct mm_walk *walk) > { > ... > > if (!p->arg.category_anyof_mask && !p->arg.category_inverted && > p->arg.category_mask == PAGE_IS_WRITTEN && > p->arg.return_mask == PAGE_IS_WRITTEN) { > for (addr = start; addr < end; pte++, addr += PAGE_SIZE) { > unsigned long next = addr + PAGE_SIZE; > > if (pte_uffd_wp(ptep_get(pte))) <<<<<< > continue; > > ... > } > } > } > > As far as I can see, you don't know that the pte is present when you do this. So > does this imply that the UFFD-WP bit is expected to be in the same position for > both present ptes and swap ptes? I had assumed pte_uffd_wp() was for present > ptes and pte_swp_uffd_wp() was for swap ptes. > > As you can see, the way I've implemented this for arm64 the bit is in a > different position for these 2 cases. I've just done a slightly different > implementation that changes the first patch in this series quite a bit and a > bunch of pagemap_ioctl mm kselftests are now failing. I think this is the root > cause, but haven't proven it definitively yet. > > I'm inclined towords thinking the above is a bug and should be fixed so that I > can store the bit in different places. What do you think? Yep I agree. Even on x86_64 they should be defined differently. It looks like some sheer luck the test constantly pass on x86 even if it checked the wrong one. Worth checking all the relevant paths in the pagemap code to make sure it's checked, e.g. I also see one fast path above this chunk of code which looks like to have the same issue. Thanks,
On 26/04/2024 14:54, Peter Xu wrote: > On Fri, Apr 26, 2024 at 02:17:41PM +0100, Ryan Roberts wrote: >> + Muhammad Usama Anjum <usama.anjum@collabora.com> >> >> Hi Peter, Muhammad, >> >> >> On 24/04/2024 12:57, Peter Xu wrote: >>> Hi, Ryan, >>> >>> On Wed, Apr 24, 2024 at 12:10:17PM +0100, Ryan Roberts wrote: >>>> Let's use the newly-free PTE SW bit (58) to add support for uffd-wp. >>>> >>>> The standard handlers are implemented for set/test/clear for both pte >>>> and pmd. Additionally we must also track the uffd-wp state as a pte swp >>>> bit, so use a free swap entry pte bit (3). >>>> >>>> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> >>> >>> Looks all sane here from userfault perspective, just one comment below. >>> >>>> --- >>>> arch/arm64/Kconfig | 1 + >>>> arch/arm64/include/asm/pgtable-prot.h | 8 ++++ >>>> arch/arm64/include/asm/pgtable.h | 55 +++++++++++++++++++++++++++ >>>> 3 files changed, 64 insertions(+) >>>> >>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >>>> index 7b11c98b3e84..763e221f2169 100644 >>>> --- a/arch/arm64/Kconfig >>>> +++ b/arch/arm64/Kconfig >>>> @@ -255,6 +255,7 @@ config ARM64 >>>> select SYSCTL_EXCEPTION_TRACE >>>> select THREAD_INFO_IN_TASK >>>> select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD >>>> + select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD >>>> select TRACE_IRQFLAGS_SUPPORT >>>> select TRACE_IRQFLAGS_NMI_SUPPORT >>>> select HAVE_SOFTIRQ_ON_OWN_STACK >>>> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h >>>> index ef952d69fd04..f1e1f6306e03 100644 >>>> --- a/arch/arm64/include/asm/pgtable-prot.h >>>> +++ b/arch/arm64/include/asm/pgtable-prot.h >>>> @@ -20,6 +20,14 @@ >>>> #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) >>>> #define PTE_PROT_NONE (PTE_UXN) /* Reuse PTE_UXN; only when !PTE_VALID */ >>>> >>>> +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP >>>> +#define PTE_UFFD_WP (_AT(pteval_t, 1) << 58) /* uffd-wp tracking */ >>>> +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 1) << 3) /* only for swp ptes */ >> >> I've just noticed code in task_mmu.c: >> >> static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, >> unsigned long end, struct mm_walk *walk) >> { >> ... >> >> if (!p->arg.category_anyof_mask && !p->arg.category_inverted && >> p->arg.category_mask == PAGE_IS_WRITTEN && >> p->arg.return_mask == PAGE_IS_WRITTEN) { >> for (addr = start; addr < end; pte++, addr += PAGE_SIZE) { >> unsigned long next = addr + PAGE_SIZE; >> >> if (pte_uffd_wp(ptep_get(pte))) <<<<<< >> continue; >> >> ... >> } >> } >> } >> >> As far as I can see, you don't know that the pte is present when you do this. So >> does this imply that the UFFD-WP bit is expected to be in the same position for >> both present ptes and swap ptes? I had assumed pte_uffd_wp() was for present >> ptes and pte_swp_uffd_wp() was for swap ptes. >> >> As you can see, the way I've implemented this for arm64 the bit is in a >> different position for these 2 cases. I've just done a slightly different >> implementation that changes the first patch in this series quite a bit and a >> bunch of pagemap_ioctl mm kselftests are now failing. I think this is the root >> cause, but haven't proven it definitively yet. >> >> I'm inclined towords thinking the above is a bug and should be fixed so that I >> can store the bit in different places. What do you think? > > Yep I agree. OK great - I'll spin a patch to fix this. > > Even on x86_64 they should be defined differently. It looks like some > sheer luck the test constantly pass on x86 even if it checked the wrong one. > > Worth checking all the relevant paths in the pagemap code to make sure it's > checked, e.g. I also see one fast path above this chunk of code which looks > like to have the same issue. Yes, spotted that one. I'll audit other sites too. Thanks! > > Thanks, >
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7b11c98b3e84..763e221f2169 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -255,6 +255,7 @@ config ARM64 select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD + select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD select TRACE_IRQFLAGS_SUPPORT select TRACE_IRQFLAGS_NMI_SUPPORT select HAVE_SOFTIRQ_ON_OWN_STACK diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index ef952d69fd04..f1e1f6306e03 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -20,6 +20,14 @@ #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) #define PTE_PROT_NONE (PTE_UXN) /* Reuse PTE_UXN; only when !PTE_VALID */ +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define PTE_UFFD_WP (_AT(pteval_t, 1) << 58) /* uffd-wp tracking */ +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 1) << 3) /* only for swp ptes */ +#else +#define PTE_UFFD_WP (_AT(pteval_t, 0)) +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 0)) +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + /* * This bit indicates that the entry is present i.e. pmd_page() * still points to a valid huge page in memory even if the pmd diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 23aabff4fa6f..3f4748741fdb 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -271,6 +271,34 @@ static inline pte_t pte_mkdevmap(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pte_uffd_wp(pte_t pte) +{ + bool wp = !!(pte_val(pte) & PTE_UFFD_WP); + +#ifdef CONFIG_DEBUG_VM + /* + * Having write bit for wr-protect-marked present ptes is fatal, because + * it means the uffd-wp bit will be ignored and write will just go + * through. See comment in x86 implementation. + */ + WARN_ON_ONCE(wp && pte_write(pte)); +#endif + + return wp; +} + +static inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte_wrprotect(set_pte_bit(pte, __pgprot(PTE_UFFD_WP))); +} + +static inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_UFFD_WP)); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + static inline void __set_pte(pte_t *ptep, pte_t pte) { WRITE_ONCE(*ptep, pte); @@ -463,6 +491,23 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline pte_t pte_swp_mkuffd_wp(pte_t pte) +{ + return set_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); +} + +static inline int pte_swp_uffd_wp(pte_t pte) +{ + return !!(pte_val(pte) & PTE_SWP_UFFD_WP); +} + +static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + #ifdef CONFIG_NUMA_BALANCING /* * See the comment in include/linux/pgtable.h @@ -508,6 +553,15 @@ static inline int pmd_trans_huge(pmd_t pmd) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define pmd_uffd_wp(pmd) pte_uffd_wp(pmd_pte(pmd)) +#define pmd_mkuffd_wp(pmd) pte_pmd(pte_mkuffd_wp(pmd_pte(pmd))) +#define pmd_clear_uffd_wp(pmd) pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd))) +#define pmd_swp_uffd_wp(pmd) pte_swp_uffd_wp(pmd_pte(pmd)) +#define pmd_swp_mkuffd_wp(pmd) pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd))) +#define pmd_swp_clear_uffd_wp(pmd) \ + pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd))) +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ static inline pmd_t pmd_mkinvalid(pmd_t pmd) { @@ -1248,6 +1302,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, * Encode and decode a swap entry: * bits 0-1: present (must be zero) * bits 2: remember PG_anon_exclusive + * bit 3: remember uffd-wp state * bits 4-53: swap offset * bit 54: PTE_PROT_NONE (overlays PTE_UXN) (must be zero) * bits 55-59: swap type
Let's use the newly-free PTE SW bit (58) to add support for uffd-wp. The standard handlers are implemented for set/test/clear for both pte and pmd. Additionally we must also track the uffd-wp state as a pte swp bit, so use a free swap entry pte bit (3). Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/pgtable-prot.h | 8 ++++ arch/arm64/include/asm/pgtable.h | 55 +++++++++++++++++++++++++++ 3 files changed, 64 insertions(+)