diff mbox series

[RFC,1/4] mm: Introduce lazy exec permission setting on a page

Message ID 1550045191-27483-2-git-send-email-anshuman.khandual@arm.com (mailing list archive)
State New, archived
Headers show
Series mm: Introduce lazy exec permission setting on a page | expand

Commit Message

Anshuman Khandual Feb. 13, 2019, 8:06 a.m. UTC
Setting an exec permission on a page normally triggers I-cache invalidation
which might be expensive. I-cache invalidation is not mandatory on a given
page if there is no immediate exec access on it. Non-fault modification of
user page table from generic memory paths like migration can be improved if
setting of the exec permission on the page can be deferred till actual use.

This introduces [pte|pmd]_mklazyexec() which clears the exec permission on
a page during migration. This exec permission deferral must be enabled back
with maybe_[pmd]_mkexec() during exec page fault (FAULT_FLAG_INSTRUCTION)
if the corresponding VMA contains exec flag (VM_EXEC).

This framework is encapsulated under CONFIG_ARCH_SUPPORTS_LAZY_EXEC so that
non-subscribing architectures don't take any performance hit. For now only
generic memory migration path will be using this framework but later it can
be extended to other generic memory paths as well.

Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
 include/asm-generic/pgtable.h | 12 ++++++++++++
 include/linux/mm.h            | 26 ++++++++++++++++++++++++++
 mm/Kconfig                    |  9 +++++++++
 mm/huge_memory.c              |  5 +++++
 mm/hugetlb.c                  |  2 ++
 mm/memory.c                   |  4 ++++
 mm/migrate.c                  |  2 ++
 7 files changed, 60 insertions(+)

Comments

Matthew Wilcox Feb. 13, 2019, 1:17 p.m. UTC | #1
On Wed, Feb 13, 2019 at 01:36:28PM +0530, Anshuman Khandual wrote:
> +#ifdef CONFIG_ARCH_SUPPORTS_LAZY_EXEC
> +static inline pte_t maybe_mkexec(pte_t entry, struct vm_area_struct *vma)
> +{
> +	if (unlikely(vma->vm_flags & VM_EXEC))
> +		return pte_mkexec(entry);
> +	return entry;
> +}
> +#else
> +static inline pte_t maybe_mkexec(pte_t entry, struct vm_area_struct *vma)
> +{
> +	return entry;
> +}
> +#endif

> +++ b/mm/memory.c
> @@ -2218,6 +2218,8 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
>  	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>  	entry = pte_mkyoung(vmf->orig_pte);
>  	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> +	if (vmf->flags & FAULT_FLAG_INSTRUCTION)
> +		entry = maybe_mkexec(entry, vma);

I don't understand this bit.  We have a fault based on an instruction
fetch.  But we're only going to _maybe_ set the exec bit?  Why not call
pte_mkexec() unconditionally?
Anshuman Khandual Feb. 13, 2019, 1:53 p.m. UTC | #2
On 02/13/2019 06:47 PM, Matthew Wilcox wrote:
> On Wed, Feb 13, 2019 at 01:36:28PM +0530, Anshuman Khandual wrote:
>> +#ifdef CONFIG_ARCH_SUPPORTS_LAZY_EXEC
>> +static inline pte_t maybe_mkexec(pte_t entry, struct vm_area_struct *vma)
>> +{
>> +	if (unlikely(vma->vm_flags & VM_EXEC))
>> +		return pte_mkexec(entry);
>> +	return entry;
>> +}
>> +#else
>> +static inline pte_t maybe_mkexec(pte_t entry, struct vm_area_struct *vma)
>> +{
>> +	return entry;
>> +}
>> +#endif
> 
>> +++ b/mm/memory.c
>> @@ -2218,6 +2218,8 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
>>  	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>>  	entry = pte_mkyoung(vmf->orig_pte);
>>  	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>> +	if (vmf->flags & FAULT_FLAG_INSTRUCTION)
>> +		entry = maybe_mkexec(entry, vma);
> 
> I don't understand this bit.  We have a fault based on an instruction
> fetch.  But we're only going to _maybe_ set the exec bit?  Why not call
> pte_mkexec() unconditionally?

Because the arch might not have subscribed to this in which case the fall
back function does nothing and return the same entry. But in case this is
enabled it also checks for VMA exec flag (VM_EXEC) before calling into
pte_mkexec() something similar to existing maybe_mkwrite().
Mike Rapoport Feb. 14, 2019, 9:06 a.m. UTC | #3
On Wed, Feb 13, 2019 at 07:23:18PM +0530, Anshuman Khandual wrote:
> 
> 
> On 02/13/2019 06:47 PM, Matthew Wilcox wrote:
> > On Wed, Feb 13, 2019 at 01:36:28PM +0530, Anshuman Khandual wrote:
> >> +#ifdef CONFIG_ARCH_SUPPORTS_LAZY_EXEC
> >> +static inline pte_t maybe_mkexec(pte_t entry, struct vm_area_struct *vma)
> >> +{
> >> +	if (unlikely(vma->vm_flags & VM_EXEC))
> >> +		return pte_mkexec(entry);
> >> +	return entry;
> >> +}
> >> +#else
> >> +static inline pte_t maybe_mkexec(pte_t entry, struct vm_area_struct *vma)
> >> +{
> >> +	return entry;
> >> +}
> >> +#endif
> > 
> >> +++ b/mm/memory.c
> >> @@ -2218,6 +2218,8 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
> >>  	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
> >>  	entry = pte_mkyoung(vmf->orig_pte);
> >>  	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> >> +	if (vmf->flags & FAULT_FLAG_INSTRUCTION)
> >> +		entry = maybe_mkexec(entry, vma);
> > 
> > I don't understand this bit.  We have a fault based on an instruction
> > fetch.  But we're only going to _maybe_ set the exec bit?  Why not call
> > pte_mkexec() unconditionally?
> 
> Because the arch might not have subscribed to this in which case the fall
> back function does nothing and return the same entry. But in case this is
> enabled it also checks for VMA exec flag (VM_EXEC) before calling into
> pte_mkexec() something similar to existing maybe_mkwrite().

Than why not pass vmf->flags to maybe_mkexec() so that only arches
subscribed to this will have the check for 'flags & FAULT_FLAG_INSTRUCTION' ?
Anshuman Khandual Feb. 15, 2019, 8:11 a.m. UTC | #4
On 02/14/2019 02:36 PM, Mike Rapoport wrote:
> On Wed, Feb 13, 2019 at 07:23:18PM +0530, Anshuman Khandual wrote:
>>
>>
>> On 02/13/2019 06:47 PM, Matthew Wilcox wrote:
>>> On Wed, Feb 13, 2019 at 01:36:28PM +0530, Anshuman Khandual wrote:
>>>> +#ifdef CONFIG_ARCH_SUPPORTS_LAZY_EXEC
>>>> +static inline pte_t maybe_mkexec(pte_t entry, struct vm_area_struct *vma)
>>>> +{
>>>> +	if (unlikely(vma->vm_flags & VM_EXEC))
>>>> +		return pte_mkexec(entry);
>>>> +	return entry;
>>>> +}
>>>> +#else
>>>> +static inline pte_t maybe_mkexec(pte_t entry, struct vm_area_struct *vma)
>>>> +{
>>>> +	return entry;
>>>> +}
>>>> +#endif
>>>
>>>> +++ b/mm/memory.c
>>>> @@ -2218,6 +2218,8 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
>>>>  	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>>>>  	entry = pte_mkyoung(vmf->orig_pte);
>>>>  	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>>>> +	if (vmf->flags & FAULT_FLAG_INSTRUCTION)
>>>> +		entry = maybe_mkexec(entry, vma);
>>>
>>> I don't understand this bit.  We have a fault based on an instruction
>>> fetch.  But we're only going to _maybe_ set the exec bit?  Why not call
>>> pte_mkexec() unconditionally?
>>
>> Because the arch might not have subscribed to this in which case the fall
>> back function does nothing and return the same entry. But in case this is
>> enabled it also checks for VMA exec flag (VM_EXEC) before calling into
>> pte_mkexec() something similar to existing maybe_mkwrite().
> 
> Than why not pass vmf->flags to maybe_mkexec() so that only arches
> subscribed to this will have the check for 'flags & FAULT_FLAG_INSTRUCTION' ?

Right it can help remove couple of instructions from un-subscribing archs.
Catalin Marinas Feb. 15, 2019, 9:49 a.m. UTC | #5
On Fri, Feb 15, 2019 at 01:41:16PM +0530, Anshuman Khandual wrote:
> On 02/14/2019 02:36 PM, Mike Rapoport wrote:
> > On Wed, Feb 13, 2019 at 07:23:18PM +0530, Anshuman Khandual wrote:
> >> On 02/13/2019 06:47 PM, Matthew Wilcox wrote:
> >>> On Wed, Feb 13, 2019 at 01:36:28PM +0530, Anshuman Khandual wrote:
> >>>> +#ifdef CONFIG_ARCH_SUPPORTS_LAZY_EXEC
> >>>> +static inline pte_t maybe_mkexec(pte_t entry, struct vm_area_struct *vma)
> >>>> +{
> >>>> +	if (unlikely(vma->vm_flags & VM_EXEC))
> >>>> +		return pte_mkexec(entry);
> >>>> +	return entry;
> >>>> +}
> >>>> +#else
> >>>> +static inline pte_t maybe_mkexec(pte_t entry, struct vm_area_struct *vma)
> >>>> +{
> >>>> +	return entry;
> >>>> +}
> >>>> +#endif
> >>>
> >>>> +++ b/mm/memory.c
> >>>> @@ -2218,6 +2218,8 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
> >>>>  	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
> >>>>  	entry = pte_mkyoung(vmf->orig_pte);
> >>>>  	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> >>>> +	if (vmf->flags & FAULT_FLAG_INSTRUCTION)
> >>>> +		entry = maybe_mkexec(entry, vma);
> >>>
> >>> I don't understand this bit.  We have a fault based on an instruction
> >>> fetch.  But we're only going to _maybe_ set the exec bit?  Why not call
> >>> pte_mkexec() unconditionally?
> >>
> >> Because the arch might not have subscribed to this in which case the fall
> >> back function does nothing and return the same entry. But in case this is
> >> enabled it also checks for VMA exec flag (VM_EXEC) before calling into
> >> pte_mkexec() something similar to existing maybe_mkwrite().
> > 
> > Than why not pass vmf->flags to maybe_mkexec() so that only arches
> > subscribed to this will have the check for 'flags & FAULT_FLAG_INSTRUCTION' ?
> 
> Right it can help remove couple of instructions from un-subscribing archs. 

If the arch does not enable CONFIG_ARCH_SUPPORTS_LAZY_EXEC, wouldn't the
compiler eliminate the FAULT_FLAG_INSTRUCTION check anyway? The current
maybe_mkexec() proposal here looks slightly nicer as it matches the
maybe_mkwrite() prototype.
diff mbox series

Patch

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 05e61e6..d35d129 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -26,6 +26,18 @@ 
 #define USER_PGTABLES_CEILING	0UL
 #endif
 
+#ifndef CONFIG_ARCH_SUPPORTS_LAZY_EXEC
+static inline pte_t pte_mklazyexec(pte_t entry)
+{
+	return entry;
+}
+
+static inline pmd_t pmd_mklazyexec(pmd_t entry)
+{
+	return entry;
+}
+#endif
+
 #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 extern int ptep_set_access_flags(struct vm_area_struct *vma,
 				 unsigned long address, pte_t *ptep,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80bb640..04d7a0a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -755,6 +755,32 @@  static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 	return pte;
 }
 
+#ifdef CONFIG_ARCH_SUPPORTS_LAZY_EXEC
+static inline pte_t maybe_mkexec(pte_t entry, struct vm_area_struct *vma)
+{
+	if (unlikely(vma->vm_flags & VM_EXEC))
+		return pte_mkexec(entry);
+	return entry;
+}
+
+static inline pmd_t maybe_pmd_mkexec(pmd_t entry, struct vm_area_struct *vma)
+{
+	if (unlikely(vma->vm_flags & VM_EXEC))
+		return pmd_mkexec(entry);
+	return entry;
+}
+#else
+static inline pte_t maybe_mkexec(pte_t entry, struct vm_area_struct *vma)
+{
+	return entry;
+}
+
+static inline pmd_t maybe_pmd_mkexec(pmd_t entry, struct vm_area_struct *vma)
+{
+	return entry;
+}
+#endif
+
 vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
 		struct page *page);
 vm_fault_t finish_fault(struct vm_fault *vmf);
diff --git a/mm/Kconfig b/mm/Kconfig
index 25c71eb..5c046cb 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -322,6 +322,15 @@  config DEFAULT_MMAP_MIN_ADDR
 	  This value can be changed after boot using the
 	  /proc/sys/vm/mmap_min_addr tunable.
 
+config ARCH_SUPPORTS_LAZY_EXEC
+	bool "Architecture supports deferred exec permission setting"
+	help
+	  Some architectures can improve performance during non-fault page
+	  table modifications paths with deferred exec permission setting
+	  which helps in avoiding expensive I-cache invalidations. This
+	  requires arch implementation of ptep_set_access_flags() to allow
+	  non-exec to exec transition.
+
 config ARCH_SUPPORTS_MEMORY_FAILURE
 	bool
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index faf357e..9ef7662 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1126,6 +1126,8 @@  void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
 	if (write)
 		entry = pmd_mkdirty(entry);
 	haddr = vmf->address & HPAGE_PMD_MASK;
+	if (vmf->flags & FAULT_FLAG_INSTRUCTION)
+		entry = maybe_pmd_mkexec(entry, vmf->vma);
 	if (pmdp_set_access_flags(vmf->vma, haddr, vmf->pmd, entry, write))
 		update_mmu_cache_pmd(vmf->vma, vmf->address, vmf->pmd);
 
@@ -1290,6 +1292,8 @@  vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 		pmd_t entry;
 		entry = pmd_mkyoung(orig_pmd);
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+		if (vmf->flags & FAULT_FLAG_INSTRUCTION)
+			entry = maybe_pmd_mkexec(entry, vma);
 		if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry,  1))
 			update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 		ret |= VM_FAULT_WRITE;
@@ -2944,6 +2948,7 @@  void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
 		pmde = pmd_mksoft_dirty(pmde);
 	if (is_write_migration_entry(entry))
 		pmde = maybe_pmd_mkwrite(pmde, vma);
+	pmde = pmd_mklazyexec(pmde);
 
 	flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE);
 	if (PageAnon(new))
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index afef616..ea41832 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4018,6 +4018,8 @@  vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		entry = huge_pte_mkdirty(entry);
 	}
 	entry = pte_mkyoung(entry);
+	if (flags & FAULT_FLAG_INSTRUCTION)
+		entry = maybe_mkexec(entry, vma);
 	if (huge_ptep_set_access_flags(vma, haddr, ptep, entry,
 						flags & FAULT_FLAG_WRITE))
 		update_mmu_cache(vma, haddr, ptep);
diff --git a/mm/memory.c b/mm/memory.c
index e11ca9d..74c406b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2218,6 +2218,8 @@  static inline void wp_page_reuse(struct vm_fault *vmf)
 	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
 	entry = pte_mkyoung(vmf->orig_pte);
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+	if (vmf->flags & FAULT_FLAG_INSTRUCTION)
+		entry = maybe_mkexec(entry, vma);
 	if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
 		update_mmu_cache(vma, vmf->address, vmf->pte);
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -3804,6 +3806,8 @@  static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 		entry = pte_mkdirty(entry);
 	}
 	entry = pte_mkyoung(entry);
+	if (vmf->flags & FAULT_FLAG_INSTRUCTION)
+		entry = maybe_mkexec(entry, vmf->vma);
 	if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry,
 				vmf->flags & FAULT_FLAG_WRITE)) {
 		update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
diff --git a/mm/migrate.c b/mm/migrate.c
index d4fd680..7587717 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -257,6 +257,7 @@  static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
 		if (PageHuge(new)) {
 			pte = pte_mkhuge(pte);
 			pte = arch_make_huge_pte(pte, vma, new, 0);
+			pte = pte_mklazyexec(pte);
 			set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
 			if (PageAnon(new))
 				hugepage_add_anon_rmap(new, vma, pvmw.address);
@@ -265,6 +266,7 @@  static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
 		} else
 #endif
 		{
+			pte = pte_mklazyexec(pte);
 			set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
 
 			if (PageAnon(new))