diff mbox series

[1/3] mm: set page fault address for update_mmu_cache_pmd

Message ID 1592990792-1923-1-git-send-email-maobibo@loongson.cn (mailing list archive)
State Rejected
Headers show
Series [1/3] mm: set page fault address for update_mmu_cache_pmd | expand

Commit Message

Bibo Mao June 24, 2020, 9:26 a.m. UTC
update_mmu_cache_pmd is used to update tlb for the pmd entry by
software. On MIPS system, the tlb entry indexed by page fault
address maybe exists already, only that tlb entry may be small
page, also it may be huge page. Before updating pmd entry with
huge page size, older tlb entry need to be invalidated.

Here page fault address is passed to function update_mmu_cache_pmd,
rather than pmd huge page start address. The page fault address
can be used for invalidating older tlb entry.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
---
 arch/mips/include/asm/pgtable.h | 9 +++++++++
 mm/huge_memory.c                | 7 ++++---
 mm/memory.c                     | 2 +-
 3 files changed, 14 insertions(+), 4 deletions(-)

Comments

Andrew Morton June 24, 2020, 7:49 p.m. UTC | #1
On Wed, 24 Jun 2020 17:26:30 +0800 Bibo Mao <maobibo@loongson.cn> wrote:

> update_mmu_cache_pmd is used to update tlb for the pmd entry by
> software. On MIPS system, the tlb entry indexed by page fault
> address maybe exists already, only that tlb entry may be small
> page, also it may be huge page. Before updating pmd entry with
> huge page size, older tlb entry need to be invalidated.
> 
> Here page fault address is passed to function update_mmu_cache_pmd,
> rather than pmd huge page start address. The page fault address
> can be used for invalidating older tlb entry.
> 
> ...
>
> --- a/arch/mips/include/asm/pgtable.h
> +++ b/arch/mips/include/asm/pgtable.h
> @@ -554,11 +554,20 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
>  #define	__HAVE_ARCH_UPDATE_MMU_TLB
>  #define update_mmu_tlb	update_mmu_cache
>  
> +extern void local_flush_tlb_page(struct vm_area_struct *vma,
> +				unsigned long page);

This is unfortunate.  We can't #include <asm/'tlbflush.h>?

>  static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
>  	unsigned long address, pmd_t *pmdp)
>  {
>  	pte_t pte = *(pte_t *)pmdp;
>  
> +	/*
> +	 * If pmd_none is true, older tlb entry will be normal page.
> +	 * here to invalidate older tlb entry indexed by address
> +	 * parameter address must be page fault address rather than
> +	 * start address of pmd huge page
> +	 */
> +	local_flush_tlb_page(vma, address);
>  	__update_tlb(vma, address, pte);
>  }
>
Kirill A. Shutemov June 30, 2020, 10:09 a.m. UTC | #2
On Wed, Jun 24, 2020 at 05:26:30PM +0800, Bibo Mao wrote:
> update_mmu_cache_pmd is used to update tlb for the pmd entry by
> software. On MIPS system, the tlb entry indexed by page fault
> address maybe exists already, only that tlb entry may be small
> page, also it may be huge page. Before updating pmd entry with
> huge page size, older tlb entry need to be invalidated.
> 
> Here page fault address is passed to function update_mmu_cache_pmd,
> rather than pmd huge page start address. The page fault address
> can be used for invalidating older tlb entry.
> 
> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
> ---
>  arch/mips/include/asm/pgtable.h | 9 +++++++++
>  mm/huge_memory.c                | 7 ++++---
>  mm/memory.c                     | 2 +-
>  3 files changed, 14 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
> index dd7a0f5..bd81661 100644
> --- a/arch/mips/include/asm/pgtable.h
> +++ b/arch/mips/include/asm/pgtable.h
> @@ -554,11 +554,20 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
>  #define	__HAVE_ARCH_UPDATE_MMU_TLB
>  #define update_mmu_tlb	update_mmu_cache
>  
> +extern void local_flush_tlb_page(struct vm_area_struct *vma,
> +				unsigned long page);
>  static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
>  	unsigned long address, pmd_t *pmdp)
>  {
>  	pte_t pte = *(pte_t *)pmdp;
>  
> +	/*
> +	 * If pmd_none is true, older tlb entry will be normal page.
> +	 * here to invalidate older tlb entry indexed by address
> +	 * parameter address must be page fault address rather than
> +	 * start address of pmd huge page
> +	 */
> +	local_flush_tlb_page(vma, address);

Can't say I follow what is going on.

Why local? What happens on SMP?

And don't you want to flush PMD_SIZE range around the address?

>  	__update_tlb(vma, address, pte);
>  }
>  
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 78c84be..0f9187b 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -780,6 +780,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>  		pgtable_t pgtable)
>  {
>  	struct mm_struct *mm = vma->vm_mm;
> +	unsigned long start = addr & PMD_MASK;
>  	pmd_t entry;
>  	spinlock_t *ptl;
>  
> @@ -792,7 +793,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>  			}
>  			entry = pmd_mkyoung(*pmd);
>  			entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
> -			if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
> +			if (pmdp_set_access_flags(vma, start, pmd, entry, 1))
>  				update_mmu_cache_pmd(vma, addr, pmd);
>  		}
>  
> @@ -813,7 +814,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>  		pgtable = NULL;
>  	}
>  
> -	set_pmd_at(mm, addr, pmd, entry);
> +	set_pmd_at(mm, start, pmd, entry);
>  	update_mmu_cache_pmd(vma, addr, pmd);
>  
>  out_unlock:
> @@ -864,7 +865,7 @@ vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
>  
>  	track_pfn_insert(vma, &pgprot, pfn);
>  
> -	insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
> +	insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, pgprot, write, pgtable);
>  	return VM_FAULT_NOPAGE;
>  }
>  EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd_prot);
> diff --git a/mm/memory.c b/mm/memory.c
> index dc7f354..c703458 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3592,7 +3592,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
>  
>  	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
>  
> -	update_mmu_cache_pmd(vma, haddr, vmf->pmd);
> +	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
>  
>  	/* fault is handled */
>  	ret = 0;
> -- 
> 1.8.3.1
> 
>
Bibo Mao June 30, 2020, 10:42 a.m. UTC | #3
On 06/30/2020 06:09 PM, Kirill A. Shutemov wrote:
> On Wed, Jun 24, 2020 at 05:26:30PM +0800, Bibo Mao wrote:
>> update_mmu_cache_pmd is used to update tlb for the pmd entry by
>> software. On MIPS system, the tlb entry indexed by page fault
>> address maybe exists already, only that tlb entry may be small
>> page, also it may be huge page. Before updating pmd entry with
>> huge page size, older tlb entry need to be invalidated.
>>
>> Here page fault address is passed to function update_mmu_cache_pmd,
>> rather than pmd huge page start address. The page fault address
>> can be used for invalidating older tlb entry.
>>
>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
>> ---
>>  arch/mips/include/asm/pgtable.h | 9 +++++++++
>>  mm/huge_memory.c                | 7 ++++---
>>  mm/memory.c                     | 2 +-
>>  3 files changed, 14 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
>> index dd7a0f5..bd81661 100644
>> --- a/arch/mips/include/asm/pgtable.h
>> +++ b/arch/mips/include/asm/pgtable.h
>> @@ -554,11 +554,20 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
>>  #define	__HAVE_ARCH_UPDATE_MMU_TLB
>>  #define update_mmu_tlb	update_mmu_cache
>>  
>> +extern void local_flush_tlb_page(struct vm_area_struct *vma,
>> +				unsigned long page);
>>  static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
>>  	unsigned long address, pmd_t *pmdp)
>>  {
>>  	pte_t pte = *(pte_t *)pmdp;
>>  
>> +	/*
>> +	 * If pmd_none is true, older tlb entry will be normal page.
>> +	 * here to invalidate older tlb entry indexed by address
>> +	 * parameter address must be page fault address rather than
>> +	 * start address of pmd huge page
>> +	 */
>> +	local_flush_tlb_page(vma, address);
> 
> Can't say I follow what is going on.
> 
> Why local? What happens on SMP?
> 
> And don't you want to flush PMD_SIZE range around the address?
There exists two conditions:
1. The address is accessed for the first time, there will be one tlb entry with normal page
   size, and privilege for the tlb entry is none. If new tlb entry wants to be added with
   huge page size, the older tlb entry needs to be removed.  Local flushing is enough, if there
   are smp threads running, there will be page fault handing since privilege level is none. During
   page fault handling, the other threads will do the same work, flush local entry, update new entry
   with huge page size.

2. It is not accessed by the first time, there exists old tlb entry with huge page such as COW scenery.
   local_flush_tlb_page is not necessary here, old tlb with huge page will be replace with new tlb
   in function __update_tlb.

For PMD_SIZE range around the address, there exists one tlb entry with huge page size, or one tlb entry
with normal page size and zero privilege. It is impossible that there exists two or more tlb entries
with normal page within PMD_SIZE range, so we do not need flush pmd range size, just flush one tlb entry
is ok.

regards
bibo,mao

> 
>>  	__update_tlb(vma, address, pte);
>>  }
>>  
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 78c84be..0f9187b 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -780,6 +780,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>>  		pgtable_t pgtable)
>>  {
>>  	struct mm_struct *mm = vma->vm_mm;
>> +	unsigned long start = addr & PMD_MASK;
>>  	pmd_t entry;
>>  	spinlock_t *ptl;
>>  
>> @@ -792,7 +793,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>>  			}
>>  			entry = pmd_mkyoung(*pmd);
>>  			entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
>> -			if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
>> +			if (pmdp_set_access_flags(vma, start, pmd, entry, 1))
>>  				update_mmu_cache_pmd(vma, addr, pmd);
>>  		}
>>  
>> @@ -813,7 +814,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>>  		pgtable = NULL;
>>  	}
>>  
>> -	set_pmd_at(mm, addr, pmd, entry);
>> +	set_pmd_at(mm, start, pmd, entry);
>>  	update_mmu_cache_pmd(vma, addr, pmd);
>>  
>>  out_unlock:
>> @@ -864,7 +865,7 @@ vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
>>  
>>  	track_pfn_insert(vma, &pgprot, pfn);
>>  
>> -	insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
>> +	insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, pgprot, write, pgtable);
>>  	return VM_FAULT_NOPAGE;
>>  }
>>  EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd_prot);
>> diff --git a/mm/memory.c b/mm/memory.c
>> index dc7f354..c703458 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -3592,7 +3592,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
>>  
>>  	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
>>  
>> -	update_mmu_cache_pmd(vma, haddr, vmf->pmd);
>> +	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
>>  
>>  	/* fault is handled */
>>  	ret = 0;
>> -- 
>> 1.8.3.1
>>
>>
>
Bibo Mao July 1, 2020, 2:54 a.m. UTC | #4
On 06/30/2020 06:42 PM, maobibo wrote:
> 
> 
> On 06/30/2020 06:09 PM, Kirill A. Shutemov wrote:
>> On Wed, Jun 24, 2020 at 05:26:30PM +0800, Bibo Mao wrote:
>>> update_mmu_cache_pmd is used to update tlb for the pmd entry by
>>> software. On MIPS system, the tlb entry indexed by page fault
>>> address maybe exists already, only that tlb entry may be small
>>> page, also it may be huge page. Before updating pmd entry with
>>> huge page size, older tlb entry need to be invalidated.
>>>
>>> Here page fault address is passed to function update_mmu_cache_pmd,
>>> rather than pmd huge page start address. The page fault address
>>> can be used for invalidating older tlb entry.
>>>
>>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
>>> ---
>>>  arch/mips/include/asm/pgtable.h | 9 +++++++++
>>>  mm/huge_memory.c                | 7 ++++---
>>>  mm/memory.c                     | 2 +-
>>>  3 files changed, 14 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
>>> index dd7a0f5..bd81661 100644
>>> --- a/arch/mips/include/asm/pgtable.h
>>> +++ b/arch/mips/include/asm/pgtable.h
>>> @@ -554,11 +554,20 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
>>>  #define	__HAVE_ARCH_UPDATE_MMU_TLB
>>>  #define update_mmu_tlb	update_mmu_cache
>>>  
>>> +extern void local_flush_tlb_page(struct vm_area_struct *vma,
>>> +				unsigned long page);
>>>  static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
>>>  	unsigned long address, pmd_t *pmdp)
>>>  {
>>>  	pte_t pte = *(pte_t *)pmdp;
>>>  
>>> +	/*
>>> +	 * If pmd_none is true, older tlb entry will be normal page.
>>> +	 * here to invalidate older tlb entry indexed by address
>>> +	 * parameter address must be page fault address rather than
>>> +	 * start address of pmd huge page
>>> +	 */
>>> +	local_flush_tlb_page(vma, address);
>>
>> Can't say I follow what is going on.
>>
>> Why local? What happens on SMP?
>>
>> And don't you want to flush PMD_SIZE range around the address?
> There exists two conditions:
> 1. The address is accessed for the first time, there will be one tlb entry with normal page
>    size, and privilege for the tlb entry is none. If new tlb entry wants to be added with
>    huge page size, the older tlb entry needs to be removed.  Local flushing is enough, if there
>    are smp threads running, there will be page fault handing since privilege level is none. During
>    page fault handling, the other threads will do the same work, flush local entry, update new entry
>    with huge page size.
> 
> 2. It is not accessed by the first time, there exists old tlb entry with huge page such as COW scenery.
>    local_flush_tlb_page is not necessary here, old tlb with huge page will be replace with new tlb
>    in function __update_tlb.
> 
> For PMD_SIZE range around the address, there exists one tlb entry with huge page size, or one tlb entry
> with normal page size and zero privilege. It is impossible that there exists two or more tlb entries
> with normal page within PMD_SIZE range, so we do not need flush pmd range size, just flush one tlb entry
> is ok.
Sorry for the noise, please discard the patch.

Actually there exists two or more tlb entries with normal page within 
PMD_SIZE range. If multiple threads run on UP or one CPU, these threads
are access the same huge page but different normal pages. Page fault
happens on thread1 and thread1 is sched out during page fault handing.
thread2 is sched in and page fault happens again, there will be two
tlb entries with normal page. This problem exists even without the patch.


> 
> regards
> bibo,mao
> 
>>
>>>  	__update_tlb(vma, address, pte);
>>>  }
>>>  
>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>> index 78c84be..0f9187b 100644
>>> --- a/mm/huge_memory.c
>>> +++ b/mm/huge_memory.c
>>> @@ -780,6 +780,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>>>  		pgtable_t pgtable)
>>>  {
>>>  	struct mm_struct *mm = vma->vm_mm;
>>> +	unsigned long start = addr & PMD_MASK;
>>>  	pmd_t entry;
>>>  	spinlock_t *ptl;
>>>  
>>> @@ -792,7 +793,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>>>  			}
>>>  			entry = pmd_mkyoung(*pmd);
>>>  			entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
>>> -			if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
>>> +			if (pmdp_set_access_flags(vma, start, pmd, entry, 1))
>>>  				update_mmu_cache_pmd(vma, addr, pmd);
>>>  		}
>>>  
>>> @@ -813,7 +814,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>>>  		pgtable = NULL;
>>>  	}
>>>  
>>> -	set_pmd_at(mm, addr, pmd, entry);
>>> +	set_pmd_at(mm, start, pmd, entry);
>>>  	update_mmu_cache_pmd(vma, addr, pmd);
>>>  
>>>  out_unlock:
>>> @@ -864,7 +865,7 @@ vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
>>>  
>>>  	track_pfn_insert(vma, &pgprot, pfn);
>>>  
>>> -	insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
>>> +	insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, pgprot, write, pgtable);
>>>  	return VM_FAULT_NOPAGE;
>>>  }
>>>  EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd_prot);
>>> diff --git a/mm/memory.c b/mm/memory.c
>>> index dc7f354..c703458 100644
>>> --- a/mm/memory.c
>>> +++ b/mm/memory.c
>>> @@ -3592,7 +3592,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
>>>  
>>>  	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
>>>  
>>> -	update_mmu_cache_pmd(vma, haddr, vmf->pmd);
>>> +	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
>>>  
>>>  	/* fault is handled */
>>>  	ret = 0;
>>> -- 
>>> 1.8.3.1
>>>
>>>
>>
diff mbox series

Patch

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index dd7a0f5..bd81661 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -554,11 +554,20 @@  static inline void update_mmu_cache(struct vm_area_struct *vma,
 #define	__HAVE_ARCH_UPDATE_MMU_TLB
 #define update_mmu_tlb	update_mmu_cache
 
+extern void local_flush_tlb_page(struct vm_area_struct *vma,
+				unsigned long page);
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
 	unsigned long address, pmd_t *pmdp)
 {
 	pte_t pte = *(pte_t *)pmdp;
 
+	/*
+	 * If pmd_none is true, older tlb entry will be normal page.
+	 * here to invalidate older tlb entry indexed by address
+	 * parameter address must be page fault address rather than
+	 * start address of pmd huge page
+	 */
+	local_flush_tlb_page(vma, address);
 	__update_tlb(vma, address, pte);
 }
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 78c84be..0f9187b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -780,6 +780,7 @@  static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		pgtable_t pgtable)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	unsigned long start = addr & PMD_MASK;
 	pmd_t entry;
 	spinlock_t *ptl;
 
@@ -792,7 +793,7 @@  static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 			}
 			entry = pmd_mkyoung(*pmd);
 			entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
-			if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
+			if (pmdp_set_access_flags(vma, start, pmd, entry, 1))
 				update_mmu_cache_pmd(vma, addr, pmd);
 		}
 
@@ -813,7 +814,7 @@  static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		pgtable = NULL;
 	}
 
-	set_pmd_at(mm, addr, pmd, entry);
+	set_pmd_at(mm, start, pmd, entry);
 	update_mmu_cache_pmd(vma, addr, pmd);
 
 out_unlock:
@@ -864,7 +865,7 @@  vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
 
 	track_pfn_insert(vma, &pgprot, pfn);
 
-	insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
+	insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, pgprot, write, pgtable);
 	return VM_FAULT_NOPAGE;
 }
 EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd_prot);
diff --git a/mm/memory.c b/mm/memory.c
index dc7f354..c703458 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3592,7 +3592,7 @@  static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 
 	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
 
-	update_mmu_cache_pmd(vma, haddr, vmf->pmd);
+	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 
 	/* fault is handled */
 	ret = 0;