From patchwork Mon Apr 19 16:26:54 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John David Anglin X-Patchwork-Id: 93520 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o3JGRUlJ022810 for ; Mon, 19 Apr 2010 16:27:30 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755542Ab0DSQ06 (ORCPT ); Mon, 19 Apr 2010 12:26:58 -0400 Received: from hiauly1.hia.nrc.ca ([132.246.100.193]:3768 "EHLO hiauly1.hia.nrc.ca" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755450Ab0DSQ05 (ORCPT ); Mon, 19 Apr 2010 12:26:57 -0400 Received: by hiauly1.hia.nrc.ca (Postfix, from userid 1000) id 4C07C4E77; Mon, 19 Apr 2010 12:26:54 -0400 (EDT) Date: Mon, 19 Apr 2010 12:26:54 -0400 From: John David Anglin To: Helge Deller Cc: linux-parisc@vger.kernel.org, gniibe@fsij.org, carlos@systemhalted.org, dave.anglin@nrc-cnrc.gc.ca Subject: Re: threads and fork on machine with VIPT-WB cache Message-ID: <20100419162653.GA106@hiauly1.hia.nrc.ca> Reply-To: John David Anglin References: <20100412214118.46D925160@hiauly1.hia.nrc.ca> <20100413115501.307040@gmx.net> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <20100413115501.307040@gmx.net> Organization: nrc.ca User-Agent: Mutt/1.5.16 (2007-06-09) Sender: linux-parisc-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-parisc@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Mon, 19 Apr 2010 16:27:31 +0000 (UTC) diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 7a73b61..ab87176 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -2,6 +2,7 @@ #define _PARISC_CACHEFLUSH_H #include +#include /* The usual comment is "Caches aren't brain-dead on the ". * Unfortunately, that doesn't apply to PA-RISC. */ @@ -113,11 +114,20 @@ static inline void *kmap(struct page *page) #define kunmap(page) kunmap_parisc(page_address(page)) -#define kmap_atomic(page, idx) page_address(page) +static inline void *kmap_atomic(struct page *page, enum km_type idx) +{ + pagefault_disable(); + return page_address(page); +} -#define kunmap_atomic(addr, idx) kunmap_parisc(addr) +static inline void kunmap_atomic(void *addr, enum km_type idx) +{ + kunmap_parisc(addr); + pagefault_enable(); +} -#define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn)) +#define kmap_atomic_prot(page, idx, prot) kmap_atomic(page, idx) +#define kmap_atomic_pfn(pfn, idx) kmap_atomic(pfn_to_page(pfn), (idx)) #define kmap_atomic_to_page(ptr) virt_to_page(ptr) #endif diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index a27d2e2..6a221af 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -38,7 +38,8 @@ do{ \ *(pteptr) = (pteval); \ } while(0) -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) +#define set_pte_at(mm,addr,ptep,pteval) \ + do { set_pte(ptep,pteval); purge_tlb_page(mm, addr); } while(0) #endif /* !__ASSEMBLY__ */ @@ -410,6 +411,8 @@ extern void paging_init (void); #define PG_dcache_dirty PG_arch_1 +extern void flush_cache_page(struct vm_area_struct *, unsigned long, unsigned long); +extern void purge_tlb_page(struct mm_struct *, unsigned long); extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); /* Encode and de-code a swap entry */ @@ -423,22 +426,39 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +extern spinlock_t pa_dbit_lock; + +static inline void pte_update_lock (void) { #ifdef CONFIG_SMP - if (!pte_young(*ptep)) - return 0; - return test_and_clear_bit(xlate_pabit(_PAGE_ACCESSED_BIT), &pte_val(*ptep)); -#else - pte_t pte = *ptep; - if (!pte_young(pte)) - return 0; - set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte)); - return 1; + preempt_disable(); + spin_lock(&pa_dbit_lock); +#endif +} +static inline void pte_update_unlock (void) +{ +#ifdef CONFIG_SMP + spin_unlock(&pa_dbit_lock); + preempt_enable(); #endif } -extern spinlock_t pa_dbit_lock; +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ + pte_t pte; + + pte_update_lock(); + pte = *ptep; + if (!pte_young(pte)) { + pte_update_unlock(); + return 0; + } + set_pte(ptep, pte_mkold(pte)); + pte_update_unlock(); + purge_tlb_page(vma->vm_mm, addr); + + return 1; +} struct mm_struct; static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -446,29 +466,29 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t old_pte; pte_t pte; - spin_lock(&pa_dbit_lock); + pte_update_lock(); pte = old_pte = *ptep; pte_val(pte) &= ~_PAGE_PRESENT; pte_val(pte) |= _PAGE_FLUSH; - set_pte_at(mm,addr,ptep,pte); - spin_unlock(&pa_dbit_lock); + set_pte(ptep,pte); + pte_update_unlock(); + purge_tlb_page(mm, addr); return old_pte; } -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static inline void ptep_set_wrprotect(struct vm_area_struct *vma, struct mm_struct *mm, unsigned long addr, pte_t *ptep) { -#ifdef CONFIG_SMP - unsigned long new, old; + pte_t old_pte; - do { - old = pte_val(*ptep); - new = pte_val(pte_wrprotect(__pte (old))); - } while (cmpxchg((unsigned long *) ptep, old, new) != old); -#else - pte_t old_pte = *ptep; - set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); -#endif + pte_update_lock(); + old_pte = *ptep; + set_pte(ptep, pte_wrprotect(old_pte)); + pte_update_unlock(); + + if (pte_present(old_pte) && pte_dirty(old_pte)) + flush_cache_page(vma, addr, pte_pfn(*ptep)); + purge_tlb_page(mm, addr); } #define pte_same(A,B) (pte_val(A) == pte_val(B)) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index b6ed34d..cd64e38 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -577,3 +577,17 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long __flush_cache_page(vma, vmaddr); } + +void purge_tlb_page(struct mm_struct *mm, unsigned long addr) +{ + unsigned long flags; + + /* For one page, it's not worth testing the split_tlb variable */ + + mb(); + mtsp(mm->context,1); + purge_tlb_start(flags); + pdtlb(addr); + pitlb(addr); + purge_tlb_end(flags); +} diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 3a44f7f..12ebb8a 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -490,19 +464,57 @@ /* Set the _PAGE_ACCESSED bit of the PTE. Be clever and * don't needlessly dirty the cache line if it was already set */ - .macro update_ptep ptep,pte,tmp,tmp1 + .macro update_ptep ptep,pte,spc,tmp,tmp1 +#ifdef CONFIG_SMP + bb,<,n \pte,_PAGE_ACCESSED_BIT,3f + cmpib,COND(=),n 0,\spc,2f + load32 PA(pa_dbit_lock),\tmp +1: + LDCW 0(\tmp),\tmp1 + cmpib,COND(=) 0,\tmp1,1b + nop + LDREG 0(\ptep),\pte +2: + ldi _PAGE_ACCESSED,\tmp1 + or \tmp1,\pte,\pte + STREG \pte,0(\ptep) + + cmpib,COND(=),n 0,\spc,3f + ldi 1,\tmp1 + stw \tmp1,0(\tmp) +3: +#else ldi _PAGE_ACCESSED,\tmp1 or \tmp1,\pte,\tmp and,COND(<>) \tmp1,\pte,%r0 STREG \tmp,0(\ptep) +#endif .endm /* Set the dirty bit (and accessed bit). No need to be * clever, this is only used from the dirty fault */ - .macro update_dirty ptep,pte,tmp - ldi _PAGE_ACCESSED|_PAGE_DIRTY,\tmp - or \tmp,\pte,\pte + .macro update_dirty ptep,pte,spc,tmp,tmp1 +#ifdef CONFIG_SMP + cmpib,COND(=),n 0,\spc,2f + load32 PA(pa_dbit_lock),\tmp +1: + LDCW 0(\tmp),\tmp1 + cmpib,COND(=) 0,\tmp1,1b + nop + LDREG 0(\ptep),\pte +2: +#endif + + ldi _PAGE_ACCESSED|_PAGE_DIRTY,\tmp1 + or \tmp1,\pte,\pte STREG \pte,0(\ptep) + +#ifdef CONFIG_SMP + cmpib,COND(=),n 0,\spc,3f + ldi 1,\tmp1 + stw \tmp1,0(\tmp) +3: +#endif .endm /* bitshift difference between a PFN (based on kernel's PAGE_SIZE) @@ -1214,7 +1224,7 @@ dtlb_miss_20w: L3_ptep ptp,pte,t0,va,dtlb_check_alias_20w - update_ptep ptp,pte,t0,t1 + update_ptep ptp,pte,spc,t0,t1 make_insert_tlb spc,pte,prot @@ -1238,7 +1248,7 @@ nadtlb_miss_20w: L3_ptep ptp,pte,t0,va,nadtlb_check_flush_20w - update_ptep ptp,pte,t0,t1 + update_ptep ptp,pte,spc,t0,t1 make_insert_tlb spc,pte,prot @@ -1272,7 +1282,7 @@ dtlb_miss_11: L2_ptep ptp,pte,t0,va,dtlb_check_alias_11 - update_ptep ptp,pte,t0,t1 + update_ptep ptp,pte,spc,t0,t1 make_insert_tlb_11 spc,pte,prot @@ -1321,7 +1331,7 @@ nadtlb_miss_11: L2_ptep ptp,pte,t0,va,nadtlb_check_flush_11 - update_ptep ptp,pte,t0,t1 + update_ptep ptp,pte,spc,t0,t1 make_insert_tlb_11 spc,pte,prot @@ -1368,7 +1378,7 @@ dtlb_miss_20: L2_ptep ptp,pte,t0,va,dtlb_check_alias_20 - update_ptep ptp,pte,t0,t1 + update_ptep ptp,pte,spc,t0,t1 make_insert_tlb spc,pte,prot @@ -1394,7 +1404,7 @@ nadtlb_miss_20: L2_ptep ptp,pte,t0,va,nadtlb_check_flush_20 - update_ptep ptp,pte,t0,t1 + update_ptep ptp,pte,spc,t0,t1 make_insert_tlb spc,pte,prot @@ -1508,7 +1518,7 @@ itlb_miss_20w: L3_ptep ptp,pte,t0,va,itlb_fault - update_ptep ptp,pte,t0,t1 + update_ptep ptp,pte,spc,t0,t1 make_insert_tlb spc,pte,prot @@ -1526,7 +1536,7 @@ itlb_miss_11: L2_ptep ptp,pte,t0,va,itlb_fault - update_ptep ptp,pte,t0,t1 + update_ptep ptp,pte,spc,t0,t1 make_insert_tlb_11 spc,pte,prot @@ -1548,7 +1558,7 @@ itlb_miss_20: L2_ptep ptp,pte,t0,va,itlb_fault - update_ptep ptp,pte,t0,t1 + update_ptep ptp,pte,spc,t0,t1 make_insert_tlb spc,pte,prot @@ -1570,29 +1580,11 @@ dbit_trap_20w: L3_ptep ptp,pte,t0,va,dbit_fault -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nolock_20w - load32 PA(pa_dbit_lock),t0 - -dbit_spin_20w: - LDCW 0(t0),t1 - cmpib,COND(=) 0,t1,dbit_spin_20w - nop - -dbit_nolock_20w: -#endif - update_dirty ptp,pte,t1 + update_dirty ptp,pte,spc,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nounlock_20w - ldi 1,t1 - stw t1,0(t0) - -dbit_nounlock_20w: -#endif rfir nop @@ -1606,18 +1598,7 @@ dbit_trap_11: L2_ptep ptp,pte,t0,va,dbit_fault -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nolock_11 - load32 PA(pa_dbit_lock),t0 - -dbit_spin_11: - LDCW 0(t0),t1 - cmpib,= 0,t1,dbit_spin_11 - nop - -dbit_nolock_11: -#endif - update_dirty ptp,pte,t1 + update_dirty ptp,pte,spc,t0,t1 make_insert_tlb_11 spc,pte,prot @@ -1628,13 +1609,6 @@ dbit_nolock_11: idtlbp prot,(%sr1,va) mtsp t1, %sr1 /* Restore sr1 */ -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nounlock_11 - ldi 1,t1 - stw t1,0(t0) - -dbit_nounlock_11: -#endif rfir nop @@ -1646,18 +1620,7 @@ dbit_trap_20: L2_ptep ptp,pte,t0,va,dbit_fault -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nolock_20 - load32 PA(pa_dbit_lock),t0 - -dbit_spin_20: - LDCW 0(t0),t1 - cmpib,= 0,t1,dbit_spin_20 - nop - -dbit_nolock_20: -#endif - update_dirty ptp,pte,t1 + update_dirty ptp,pte,spc,t0,t1 make_insert_tlb spc,pte,prot @@ -1665,14 +1628,6 @@ dbit_nolock_20: idtlbt pte,prot -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nounlock_20 - ldi 1,t1 - stw t1,0(t0) - -dbit_nounlock_20: -#endif - rfir nop #endif diff --git a/mm/memory.c b/mm/memory.c index 09e4b1b..21c2916 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -616,7 +616,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, * in the parent and the child */ if (is_cow_mapping(vm_flags)) { - ptep_set_wrprotect(src_mm, addr, src_pte); + ptep_set_wrprotect(vma, src_mm, addr, src_pte); pte = pte_wrprotect(pte); }