diff mbox series

parisc: Re: Best parisc kernel?

Message ID ef615a38-24ce-78f6-523b-85dec0668536@bell.net (mailing list archive)
State Superseded
Headers show
Series parisc: Re: Best parisc kernel? | expand

Commit Message

John David Anglin Dec. 9, 2021, 8:46 p.m. UTC
On 2021-12-02 3:33 p.m., Helge Deller wrote:
> On 12/2/21 18:47, John David Anglin wrote:
>> On 2021-12-02 12:15 p.m., John David Anglin wrote:
>>> On 2021-12-01 7:32 p.m., John David Anglin wrote:
>>>> On 2021-12-01 4:05 p.m., Helge Deller wrote:
>>>>> On 12/1/21 20:53, John David Anglin wrote:
>>>>>> On 2021-11-26 2:05 p.m., John David Anglin wrote:
>>>>>>> diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
>>>>>>> index f124c301b7a..e8cc81511aa 100644
>>>>>>> --- a/gcc/config/pa/pa.md
>>>>>>> +++ b/gcc/config/pa/pa.md
>>>>>>> @@ -10366,10 +10366,11 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
>>>>>>>    {
>>>>>>>      if (TARGET_SYNC_LIBCALL)
>>>>>>>        {
>>>>>>> -      rtx mem = operands[0];
>>>>>>> -      rtx val = operands[1];
>>>>>>> -      if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val))
>>>>>>> -       DONE;
>>>>>>> +      rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, QImode);
>>>>>>> +      emit_library_call (libfunc, LCT_NORMAL, VOIDmode,
>>>>>>> +                        XEXP (operands[0], 0), Pmode,
>>>>>>> +                        operands[1], QImode);
>>>>>>> +      DONE;
>>>>>>>        }
>>>>>>>      FAIL;
>>>>>>>    })
>>>>>>>
>>>>>>> However, doing this causes soft lockups in glibc testsuite:
>>>>>>>
>>>>>>> Message from syslogd@atlas at Nov 25 23:03:01 ...
>>>>>>>    kernel:watchdog: BUG: soft lockup - CPU#0 stuck for 354s! [ld.so.1:22095]
>>>>>>>
>>>>>>> Message from syslogd@atlas at Nov 25 23:03:01 ...
>>>>>>>    kernel:watchdog: BUG: soft lockup - CPU#1 stuck for 361s! [ld.so.1:22093]
>>>>>>>
>>>>>>> Message from syslogd@atlas at Nov 25 23:08:30 ...
>>>>>>>
>>>>>>>    kernel:watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [ld.so.1:16025]
>>>>>>>
>>>>>>> Message from syslogd@atlas at Nov 25 23:10:28 ...
>>>>>>>    kernel:watchdog: BUG: soft lockup - CPU#3 stuck for 23s! [ld.so.1:22086]
>>>>>>>
>>>>>>> Message from syslogd@atlas at Nov 25 23:10:30 ...
>>>>>>>    kernel:watchdog: BUG: soft lockup - CPU#0 stuck for 21s! [ld.so.1:16025]
>>>>>>>
>>>>>>> This happens both with and without lws_atomic_xchg.  The lockups aren't permanent but they clearly
>>>>>>> impact performance.  Maybe we need to call sched_yield() if we spin too many times?  I think scheduling
>>>>>>> is blocked when we spend too much time on gateway page.
>>>>>> The above soft lockups are not caused by the above change to pa.md.
>>>>>>
>>>>>> They all occur on gateway page in thread related tests. They are not real lockups but I would guess scheduling
>>>>>> is not optimal when we spend a lot of time on gateway page.
>>>>> Or maybe calling cond_resched() from inside the kernel (in the locking functions):
>>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=2a8bc5316adc998951e8f726c31e231a6021eae2
>>>> I think the problem is related to COW breaks and the lock hashing which is shared between multiple processes/threads.
>>>> This can leave ldcw lock in the held state for an extended period. There's probably a hole in the logic preventing processes
>>>> from being scheduled on gateway page.
>>>>
>>>> The problem was probably aggravated by the patch to leave interrupts enabled when we try to take lock.
>>>>
>>>> A COW break can occur on the store instruction in the CAS operation.
>>> I wonder if we should deprecate LWS implementation and use a full syscall?  See sys_atomic_cmpxchg_32()
>>> in arch/m68k/kernel/sys_m68k.c for m68k implementation.  I believe arm has one too.
> interesting.
>
>> The big concern about the current implementation is whether or not an IRQ or page fault can cause another
>> thread/process to be scheduled in the middle of the critical sequences.  So far, I haven't seen this but it would take
>> a lot of testing to be sure.
> True.
>
>> Can process be killed if it sleeps in a critical region?
> Don't know.
The attached patch against v5.14.21 fixes the LWS CAS behavior.  COW breaks no longer occur in the critical
region.  The COW break now occurs on the stbys,e instruction.  It magically does a store without writing anything 
diff mbox series

Patch

diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 2d019aa73b8f..7011f9ba9678 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -83,7 +83,7 @@  endif
 # Currently we save and restore fpregs on all kernel entry/interruption paths.
 # If that gets optimized, we might need to disable the use of fpregs in the
 # kernel.
-cflags-y	+= -mdisable-fpregs
+# cflags-y	+= -mdisable-fpregs
 
 # Use long jumps instead of long branches (needed if your linker fails to
 # link a too big vmlinux executable). Not enabled for building modules.
diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile
index dff453687530..81f64a90857d 100644
--- a/arch/parisc/boot/compressed/Makefile
+++ b/arch/parisc/boot/compressed/Makefile
@@ -18,7 +18,8 @@  KBUILD_CFLAGS := -D__KERNEL__ -O2 -DBOOTLOADER
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 KBUILD_CFLAGS += -fno-strict-aliasing
 KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -fno-builtin-printf
-KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs -Os
+# KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs -Os
+KBUILD_CFLAGS += -fno-PIE -mno-space-regs -Os
 ifndef CONFIG_64BIT
 KBUILD_CFLAGS += -mfast-indirect-calls
 endif
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index a39250cb7dfc..13ed4b809b05 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -135,6 +135,16 @@ 
 	extrd,u \r, 63-(\sa), 64-(\sa), \t
 	.endm
 
+	/* The extru instruction leaves the most significant 32 bits of the
+	 * target register in an undefined state on PA 2.0 systems. */
+	.macro extru_safe r, p, len, t
+#ifdef CONFIG_64BIT
+	extrd,u	\r, 32+(\p), \len, \t
+#else
+	extru	\r, \p, \len, \t
+#endif
+	.endm
+
 	/* load 32-bit 'value' into 'reg' compensating for the ldil
 	 * sign-extension when running in wide mode.
 	 * WARNING!! neither 'value' nor 'reg' can be expressions
diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h
index fceb9cf02fb3..e0c7f8d61365 100644
--- a/arch/parisc/include/asm/futex.h
+++ b/arch/parisc/include/asm/futex.h
@@ -10,23 +10,28 @@ 
 #include <asm/errno.h>
 
 /* The following has to match the LWS code in syscall.S.  We have
-   sixteen four-word locks. */
+   256 four-word locks. We xor three 8-bit hunks of the futex physical
+   address to create the hash index. */
+
+static unsigned long
+_futex_hash_index(unsigned long pa)
+{
+	return ((pa >> 6) ^ (pa >> 14) ^ (pa >> 22)) & 0x3fc;
+}
 
 static inline void
-_futex_spin_lock_irqsave(u32 __user *uaddr, unsigned long int *flags)
+_futex_spin_lock_irqsave(unsigned long index, unsigned long int *flags)
 {
 	extern u32 lws_lock_start[];
-	long index = ((long)uaddr & 0x3f8) >> 1;
 	arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index];
 	local_irq_save(*flags);
 	arch_spin_lock(s);
 }
 
 static inline void
-_futex_spin_unlock_irqrestore(u32 __user *uaddr, unsigned long int *flags)
+_futex_spin_unlock_irqrestore(unsigned long index, unsigned long int *flags)
 {
 	extern u32 lws_lock_start[];
-	long index = ((long)uaddr & 0x3f8) >> 1;
 	arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index];
 	arch_spin_unlock(s);
 	local_irq_restore(*flags);
@@ -35,11 +40,14 @@  _futex_spin_unlock_irqrestore(u32 __user *uaddr, unsigned long int *flags)
 static inline int
 arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
+	unsigned long ua = (unsigned long)uaddr;
+	unsigned long index;
 	unsigned long int flags;
 	int oldval, ret;
 	u32 tmp;
 
-	_futex_spin_lock_irqsave(uaddr, &flags);
+	index = _futex_hash_index(ua);
+	_futex_spin_lock_irqsave(index, &flags);
 
 	ret = -EFAULT;
 	if (unlikely(get_user(oldval, uaddr) != 0))
@@ -72,7 +80,7 @@  arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 		ret = -EFAULT;
 
 out_pagefault_enable:
-	_futex_spin_unlock_irqrestore(uaddr, &flags);
+	_futex_spin_unlock_irqrestore(index, &flags);
 
 	if (!ret)
 		*oval = oldval;
@@ -84,10 +92,12 @@  static inline int
 futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 			      u32 oldval, u32 newval)
 {
+	unsigned long ua = (unsigned long)uaddr;
+	unsigned long index;
 	u32 val;
 	unsigned long flags;
 
-	/* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is
+ 	/* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is
 	 * our gateway page, and causes no end of trouble...
 	 */
 	if (uaccess_kernel() && !uaddr)
@@ -98,23 +108,25 @@  futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 
 	/* HPPA has no cmpxchg in hardware and therefore the
 	 * best we can do here is use an array of locks. The
-	 * lock selected is based on a hash of the userspace
-	 * address. This should scale to a couple of CPUs.
+	 * lock selected is based on a hash of the virtual
+	 * address of the futex. This should scale to a couple
+	 * of CPUs.
 	 */
 
-	_futex_spin_lock_irqsave(uaddr, &flags);
+	index = _futex_hash_index(ua);
+	_futex_spin_lock_irqsave(index, &flags);
 	if (unlikely(get_user(val, uaddr) != 0)) {
-		_futex_spin_unlock_irqrestore(uaddr, &flags);
+		_futex_spin_unlock_irqrestore(index, &flags);
 		return -EFAULT;
 	}
 
 	if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) {
-		_futex_spin_unlock_irqrestore(uaddr, &flags);
+		_futex_spin_unlock_irqrestore(index, &flags);
 		return -EFAULT;
 	}
 
 	*uval = val;
-	_futex_spin_unlock_irqrestore(uaddr, &flags);
+	_futex_spin_unlock_irqrestore(index, &flags);
 
 	return 0;
 }
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 1f2fea3bfacd..7c6a3fcd51bd 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -65,6 +65,8 @@  extern int pa_serialize_tlb_flushes;
  * are slow on SMP machines since the purge must be broadcast to all CPUs.
  */
 
+extern void __update_cache(pte_t pte);
+
 static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 {
 	unsigned long flags;
diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h
index a303ae9a77f4..0493cbf70012 100644
--- a/arch/parisc/include/asm/special_insns.h
+++ b/arch/parisc/include/asm/special_insns.h
@@ -2,26 +2,30 @@ 
 #ifndef __PARISC_SPECIAL_INSNS_H
 #define __PARISC_SPECIAL_INSNS_H
 
-#define lpa(va)	({			\
-	unsigned long pa;		\
-	__asm__ __volatile__(		\
-		"copy %%r0,%0\n\t"	\
-		"lpa %%r0(%1),%0"	\
-		: "=r" (pa)		\
-		: "r" (va)		\
-		: "memory"		\
-	);				\
-	pa;				\
+#define lpa(va)	({					\
+	unsigned long pa;				\
+	__asm__ __volatile__(				\
+		"copy %%r0,%0\n"			\
+		"9998:\tlpa %%r0(%1),%0\n"		\
+		"9999:\n"				\
+		ASM_EXCEPTIONTABLE_ENTRY(9998b, 9999b)	\
+		: "=&r" (pa)				\
+		: "r" (va)				\
+		: "memory"				\
+	);						\
+	pa;						\
 })
 
-#define lpa_user(va)	({		\
-	unsigned long pa;		\
-	__asm__ __volatile__(		\
-		"copy %%r0,%0\n\t"	\
-		"lpa %%r0(%%sr3,%1),%0"	\
-		: "=r" (pa)		\
-		: "r" (va)		\
-		: "memory"		\
+#define lpa_user(va)	({				\
+	unsigned long pa;				\
+	__asm__ __volatile__(				\
+		"copy %%r0,%0\n"			\
+		"9998:\tlpa %%r0(%%sr3,%1),%0\n"	\
+		"9999:\n"				\
+		ASM_EXCEPTIONTABLE_ENTRY(9998b, 9999b)	\
+		: "=&r" (pa)				\
+		: "r" (va)				\
+		: "memory"				\
 	);				\
 	pa;				\
 })
diff --git a/arch/parisc/install.sh b/arch/parisc/install.sh
index 056d588befdd..70d3cffb0251 100644
--- a/arch/parisc/install.sh
+++ b/arch/parisc/install.sh
@@ -39,6 +39,7 @@  verify "$3"
 if [ -n "${INSTALLKERNEL}" ]; then
   if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
   if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
+  if [ -x /usr/sbin/${INSTALLKERNEL} ]; then exec /usr/sbin/${INSTALLKERNEL} "$@"; fi
 fi
 
 # Default install
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index c81ab0cb8925..c9f09d2a4461 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -559,6 +559,7 @@  void flush_cache_mm(struct mm_struct *mm)
 		return;
 	}
 
+	preempt_disable();
 	if (mm->context == mfsp(3)) {
 		for (vma = mm->mmap; vma; vma = vma->vm_next) {
 			flush_user_dcache_range_asm(vma->vm_start, vma->vm_end);
@@ -566,8 +567,10 @@  void flush_cache_mm(struct mm_struct *mm)
 				flush_user_icache_range_asm(vma->vm_start, vma->vm_end);
 			flush_tlb_range(vma, vma->vm_start, vma->vm_end);
 		}
+		preempt_enable();
 		return;
 	}
+	preempt_enable();
 
 	pgd = mm->pgd;
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
@@ -606,13 +609,16 @@  void flush_cache_range(struct vm_area_struct *vma,
 		return;
 	}
 
+	preempt_disable();
 	if (vma->vm_mm->context == mfsp(3)) {
 		flush_user_dcache_range_asm(start, end);
 		if (vma->vm_flags & VM_EXEC)
 			flush_user_icache_range_asm(start, end);
 		flush_tlb_range(vma, start, end);
+		preempt_enable();
 		return;
 	}
+	preempt_enable();
 
 	pgd = vma->vm_mm->pgd;
 	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 437c8d31f390..7f33e3236a1e 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -393,17 +393,9 @@ 
 	 */
 	.macro		L2_ptep	pmd,pte,index,va,fault
 #if CONFIG_PGTABLE_LEVELS == 3
-	extru		\va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index
+	extru_safe	\va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index
 #else
-# if defined(CONFIG_64BIT)
-	extrd,u		\va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
-  #else
-  # if PAGE_SIZE > 4096
-	extru		\va,31-ASM_PGDIR_SHIFT,32-ASM_PGDIR_SHIFT,\index
-  # else
-	extru		\va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
-  # endif
-# endif
+	extru_safe	\va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
 #endif
 	dep             %r0,31,PAGE_SHIFT,\pmd  /* clear offset */
 #if CONFIG_PGTABLE_LEVELS < 3
@@ -413,7 +405,7 @@ 
 	bb,>=,n		\pmd,_PxD_PRESENT_BIT,\fault
 	dep		%r0,31,PxD_FLAG_SHIFT,\pmd /* clear flags */
 	SHLREG		\pmd,PxD_VALUE_SHIFT,\pmd
-	extru		\va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
+	extru_safe	\va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
 	dep		%r0,31,PAGE_SHIFT,\pmd  /* clear offset */
 	shladd		\index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */
 	.endm
@@ -974,8 +966,8 @@  intr_do_preempt:
 
 	/* current_thread_info()->preempt_count */
 	mfctl	%cr30, %r1
-	LDREG	TI_PRE_COUNT(%r1), %r19
-	cmpib,COND(<>)	0, %r19, intr_restore	/* if preempt_count > 0 */
+	ldw	TI_PRE_COUNT(%r1), %r19
+	cmpib,<>	0, %r19, intr_restore	/* if preempt_count > 0 */
 	nop				/* prev insn branched backwards */
 
 	/* check if we interrupted a critical path */
diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c
index 80a0ab372802..799795bc4210 100644
--- a/arch/parisc/kernel/patch.c
+++ b/arch/parisc/kernel/patch.c
@@ -67,8 +67,8 @@  void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
 	int mapped;
 
 	/* Make sure we don't have any aliases in cache */
-	flush_kernel_vmap_range(addr, len);
 	flush_icache_range(start, end);
+	flush_tlb_kernel_range(start, end);
 
 	p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped);
 
@@ -93,7 +93,6 @@  void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
 	flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p));
 	if (mapped)
 		patch_unmap(FIX_TEXT_POKE0, &flags);
-	flush_icache_range(start, end);
 }
 
 void __kprobes __patch_text(void *addr, u32 insn)
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index cf92ece20b75..0cd97fa004c5 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -228,11 +228,13 @@  static inline void
 send_IPI_allbutself(enum ipi_message_type op)
 {
 	int i;
-	
+
+	preempt_disable();
 	for_each_online_cpu(i) {
 		if (i != smp_processor_id())
 			send_IPI_single(i, op);
 	}
+	preempt_enable();
 }
 
 #ifdef CONFIG_KGDB
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 3f24a0af1e04..2f468292a545 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -74,7 +74,7 @@  ENTRY(linux_gateway_page)
 	/* ADDRESS 0xb0 to 0xb8, lws uses two insns for entry */
 	/* Light-weight-syscall entry must always be located at 0xb0 */
 	/* WARNING: Keep this number updated with table size changes */
-#define __NR_lws_entries (3)
+#define __NR_lws_entries (5)
 
 lws_entry:
 	gate	lws_start, %r0		/* increase privilege */
@@ -496,6 +496,11 @@  lws_start:
 	/* Jump to lws, lws table pointers already relocated */
 	be,n	0(%sr2,%r21)
 
+lws_stby_fault:
+	ldo	1(%r0),%r28
+	b	lws_exit
+	ldo	-EFAULT(%r0),%r21	/* set errno */
+
 lws_exit_nosys:
 	ldo	-ENOSYS(%r0),%r21		   /* set errno */
 	/* Fall through: Return to userspace */
@@ -567,15 +572,25 @@  lws_compare_and_swap32:
 #endif
 
 lws_compare_and_swap:
-	/* Load start of lock table */
-	ldil	L%lws_lock_start, %r20
-	ldo	R%lws_lock_start(%r20), %r28
+	/* Dirty cache line at r26 to avoid COW break in critical region */
+	b	3f
+1:	stbys,e	%r0, 0(%r26)
+2:	b,n	lws_stby_fault
+	ASM_EXCEPTIONTABLE_ENTRY(1b-linux_gateway_page, 2b-linux_gateway_page)
+
+	/* Calculate 8-bit hash index from virtual address */
+3:	extru_safe  %r26, 23, 8, %r28
+	extru_safe  %r26, 15, 8, %r1
+	xor	%r1, %r28, %r28
+	extru_safe  %r26, 7, 8, %r1
+	xor	%r1, %r28, %r20
 
-	/* Extract eight bits from r26 and hash lock (Bits 3-11) */
-	extru  %r26, 28, 8, %r20
+	/* Load start of lock table */
+	ldil	L%lws_lock_start, %r28
+	ldo	R%lws_lock_start(%r28), %r28
 
-	/* Find lock to use, the hash is either one of 0 to
-	   15, multiplied by 16 (keep it 16-byte aligned)
+	/* Find lock to use, the hash index is one of 0 to
+	   255, multiplied by 16 (keep it 16-byte aligned)
 	   and add to the lock table offset. */
 	shlw	%r20, 4, %r20
 	add	%r20, %r28, %r20
@@ -603,10 +618,20 @@  cas_nocontend:
 # endif
 /* ENABLE_LWS_DEBUG */
 
+	/* We disable interrupts around the critical region. However,
+	   the store instruction in the critical region might cause a
+	   COW break. The trap handler needs to re-enable interrupts
+	   when page faults are enabled as the process might sleep.
+	   When this happens, the lock may be held by a process for
+	   an extended period and affect unrelated processes because
+	   locks are shared. */
 	rsm	PSW_SM_I, %r0				/* Disable interrupts */
-	/* COW breaks can cause contention on UP systems */
+
+	/* Check lock first to minimize cache line bounce */
+	ldw	0(%sr2,%r20), %r28			/* Load the lock */
+	cmpb,=,n	%r0, %r28, cas_wouldblock	/* Spin if locked */
 	LDCW	0(%sr2,%r20), %r28			/* Try to acquire the lock */
-	cmpb,<>,n	%r0, %r28, cas_action		/* Did we get it? */
+	cmpb,<>,n	%r28, %r0,  cas_action		/* Did we get it? */
 cas_wouldblock:
 	ldo	2(%r0), %r28				/* 2nd case */
 	ssm	PSW_SM_I, %r0
@@ -621,7 +646,7 @@  cas_wouldblock:
 	*/
 
 	/* NOTES:
-		This all works becuse intr_do_signal
+		This all works because intr_do_signal
 		and schedule both check the return iasq
 		and see that we are on the kernel page
 		so this process is never scheduled off
@@ -629,6 +654,7 @@  cas_wouldblock:
 		thus it is wholly atomic from usrspaces
 		perspective
 	*/
+
 cas_action:
 #if defined CONFIG_SMP && ENABLE_LWS_DEBUG
 	/* DEBUG */
@@ -757,23 +783,49 @@  lws_compare_and_swap_2:
 #endif
 
 cas2_lock_start:
-	/* Load start of lock table */
-	ldil	L%lws_lock_start, %r20
-	ldo	R%lws_lock_start(%r20), %r28
+	/* Dirty cache line at r26 to avoid COW break in critical region */
+	copy	%r26, %r28
+#ifdef CONFIG_64BIT
+	depdi	0, 63, 2, %r28
+#else
+	depi	0, 31, 2, %r28
+#endif
+	b	3f
+1:	stbys,e	%r0, 0(%r28)
+2:	b,n	lws_stby_fault
+	ASM_EXCEPTIONTABLE_ENTRY(1b-linux_gateway_page, 2b-linux_gateway_page)
+
+	/* Calculate 8-bit hash index from virtual address */
+3:	extru_safe  %r26, 23, 8, %r28
+	extru_safe  %r26, 15, 8, %r1
+	xor	%r1, %r28, %r28
+	extru_safe  %r26, 7, 8, %r1
+	xor	%r1, %r28, %r20
 
-	/* Extract eight bits from r26 and hash lock (Bits 3-11) */
-	extru  %r26, 28, 8, %r20
+	/* Load start of lock table */
+	ldil	L%lws_lock_start, %r28
+	ldo	R%lws_lock_start(%r28), %r28
 
-	/* Find lock to use, the hash is either one of 0 to
-	   15, multiplied by 16 (keep it 16-byte aligned)
+	/* Find lock to use, the hash index is one of 0 to
+	   255, multiplied by 16 (keep it 16-byte aligned)
 	   and add to the lock table offset. */
 	shlw	%r20, 4, %r20
 	add	%r20, %r28, %r20
 
+	/* We disable interrupts around the critical region. However,
+	   the store instruction in the critical region might cause a
+	   COW break. The trap handler needs to re-enable interrupts
+	   when page faults are enabled as the process might sleep.
+	   When this happens, the lock may be held by a process for
+	   an extended period and affect unrelated processes because
+	   locks are shared. */
 	rsm	PSW_SM_I, %r0			/* Disable interrupts */
-	/* COW breaks can cause contention on UP systems */
+
+	/* Check lock first to minimize cache line bounce */
+	ldw	0(%sr2,%r20), %r28			/* Load the lock */
+	cmpb,=,n	%r0, %r28, cas2_wouldblock	/* Spin if locked */
 	LDCW	0(%sr2,%r20), %r28		/* Try to acquire the lock */
-	cmpb,<>,n	%r0, %r28, cas2_action	/* Did we get it? */
+	cmpb,<>,n	%r28, %r0,  cas2_action	/* Did we get it? */
 cas2_wouldblock:
 	ldo	2(%r0), %r28			/* 2nd case */
 	ssm	PSW_SM_I, %r0
@@ -788,7 +840,7 @@  cas2_wouldblock:
 	*/
 
 	/* NOTES:
-		This all works becuse intr_do_signal
+		This all works because intr_do_signal
 		and schedule both check the return iasq
 		and see that we are on the kernel page
 		so this process is never scheduled off
@@ -796,6 +848,7 @@  cas2_wouldblock:
 		thus it is wholly atomic from usrspaces
 		perspective
 	*/
+
 cas2_action:
 	/* Jump to the correct function */
 	blr	%r29, %r0
@@ -897,6 +950,342 @@  cas2_end:
 	ASM_EXCEPTIONTABLE_ENTRY(21b-linux_gateway_page, 22b-linux_gateway_page)
 #endif
 
+	/***************************************************
+		LWS atomic exchange.  When using using a synthesized CAS
+		operation, we need to perform atomic loads using the CAS
+		lock to ensure sequential consistency.
+
+		%r26 - Exchange address
+		%r25 - Size of the variable (0/1/2/3 for 8/16/32/64 bit)
+		%r24 - Address of new value
+		%r23 - Address of old value
+		%r28 - Return non-zero on failure
+		%r21 - Kernel error code
+
+		%r21 has the following meanings:
+
+		EAGAIN - Lock is busy, ldcw failed, try again.
+		EFAULT - Read failed.
+
+		Scratch: r20, r1
+
+	****************************************************/
+
+	/* ELF32 Process entry path */
+lws_atomic_xchg:
+#ifdef CONFIG_64BIT
+	/* Clip the input registers. We don't need to clip %r25 as we
+	   only use it for word operations */
+	depdi	0, 31, 32, %r26
+	depdi	0, 31, 32, %r25
+	depdi	0, 31, 32, %r24
+	depdi	0, 31, 32, %r23
+#endif
+
+	/* Check the validity of the size pointer */
+	subi,>>= 3, %r25, %r0
+	b,n	lws_exit_nosys
+
+	/* Dirty cache line at r26 to avoid COW break in critical region */
+	copy	%r26, %r28
+#ifdef CONFIG_64BIT
+	depdi	0, 63, 2, %r28
+#else
+	depi	0, 31, 2, %r28
+#endif
+	b	3f
+1:	stbys,e	%r0, 0(%r28)
+2:	b,n	lws_stby_fault
+	ASM_EXCEPTIONTABLE_ENTRY(1b-linux_gateway_page, 2b-linux_gateway_page)
+
+	/* Calculate 8-bit hash index from virtual address */
+3:	extru_safe  %r26, 23, 8, %r28
+	extru_safe  %r26, 15, 8, %r1
+	xor	%r1, %r28, %r28
+	extru_safe  %r26, 7, 8, %r1
+	xor	%r1, %r28, %r20
+
+	/* Load start of lock table */
+	ldil	L%lws_lock_start, %r28
+	ldo	R%lws_lock_start(%r28), %r28
+
+	/* Find lock to use, the hash index is one of 0 to
+	   255, multiplied by 16 (keep it 16-byte aligned)
+	   and add to the lock table offset. */
+	shlw	%r20, 4, %r20
+	add	%r20, %r28, %r20
+
+	shlw	%r25, 2, %r1
+
+	/* We disable interrupts around the critical region. However,
+	   the store instruction in the critical region might cause a
+	   COW break. The trap handler needs to re-enable interrupts
+	   when page faults are enabled as the process might sleep.
+	   When this happens, the lock may be held by a process for
+	   an extended period and affect unrelated processes because
+	   locks are shared. */
+	rsm	PSW_SM_I, %r0			/* Disable interrupts */
+
+	/* Check lock first to minimize cache line bounce */
+	ldw	0(%sr2,%r20), %r28		/* Load the lock */
+	cmpb,=,n	%r0, %r28, 1f		/* Spin if locked */
+	LDCW	0(%sr2,%r20), %r28		/* Try to acquire the lock */
+	cmpb,<>,n	%r28, %r0, atomic_xchg_action	/* Did we get it? */
+1:	ldo	2(%r0), %r28			/* 2nd case */
+	ssm	PSW_SM_I, %r0
+	b	lws_exit			/* Contended... */
+	ldo	-EAGAIN(%r0), %r21		/* Spin in userspace */
+
+	/* NOTES:
+		This all works because intr_do_signal
+		and schedule both check the return iasq
+		and see that we are on the kernel page
+		so this process is never scheduled off
+		or is ever sent any signal of any sort,
+		thus it is wholly atomic from usrspaces
+		perspective
+	*/
+
+atomic_xchg_action:
+	/* Jump to the correct function */
+	blr	%r1, %r0
+	/* Set %r28 as non-zero for now */
+	ldo	1(%r0),%r28
+
+	/* 8bit exchange */
+1:	ldb	0(%r26), %r1
+2:	stb	%r1, 0(%r23)
+3:	ldb	0(%r24), %r1
+4:	stb	%r1, 0(%r26)
+	b	atomic_xchg_end
+	copy	%r0, %r28
+	nop
+	nop
+
+	/* 16bit exchange */
+5:	ldh	0(%r26), %r1
+6:	sth	%r1, 0(%r23)
+7:	ldh	0(%r24), %r1
+8:	sth	%r1, 0(%r26)
+	b	atomic_xchg_end
+	copy	%r0, %r28
+	nop
+	nop
+
+	/* 32bit exchange */
+9:	ldw	0(%r26), %r1
+10:	stw	%r1, 0(%r23)
+11:	ldw	0(%r24), %r1
+12:	stw	%r1, 0(%r26)
+	b	atomic_xchg_end
+	copy	%r0, %r28
+	nop
+	nop
+
+	/* 64bit exchange */
+#ifdef CONFIG_64BIT
+13:	ldd	0(%r26), %r1
+14:	std	%r1, 0(%r23)
+15:	ldd	0(%r24), %r1
+16:	std	%r1, 0(%r26)
+#else
+13:	flddx	0(%r26), %fr4
+14:	fstdx	%fr4, 0(%r23)
+15:	flddx	0(%r24), %fr4
+16:	fstdx	%fr4, 0(%r26)
+#endif
+	copy	%r0, %r28
+
+atomic_xchg_end:
+	/* Free lock */
+	stw,ma	%r20, 0(%sr2,%r20)
+	/* Return to userspace, set no error */
+	b	lws_exit
+	copy	%r0, %r21
+
+17:
+	/* Error occurred on load or store */
+	/* Free lock */
+	stw,ma	%r20, 0(%sr2,%r20)
+	ldo	1(%r0),%r28
+	b	lws_exit
+	ldo	-EFAULT(%r0),%r21	/* set errno */
+
+	/* Exception table entries, for the load and store, return EFAULT.
+	   Each of the entries must be relocated. */
+	ASM_EXCEPTIONTABLE_ENTRY(1b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(2b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(3b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(4b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(5b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(6b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(7b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(8b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(9b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(10b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(11b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(12b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(13b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(14b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(15b-linux_gateway_page, 17b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(16b-linux_gateway_page, 17b-linux_gateway_page)
+
+	/***************************************************
+		LWS atomic store.  When using using a synthesized CAS
+		operation, we need to perform atomic loads using the CAS
+		lock to ensure sequential consistency.
+
+		%r26 - Address to store 
+		%r25 - Size of the variable (0/1/2/3 for 8/16/32/64 bit)
+		%r24 - Address of value to store
+		%r28 - Return non-zero on failure
+		%r21 - Kernel error code
+
+		%r21 has the following meanings:
+
+		EAGAIN - Lock is busy, ldcw failed, try again.
+		EFAULT - Read failed.
+
+		Scratch: r20, r1
+
+	****************************************************/
+
+	/* ELF32 Process entry path */
+lws_atomic_store:
+#ifdef CONFIG_64BIT
+	/* Clip the input registers. We don't need to clip %r25 as we
+	   only use it for word operations */
+	depdi	0, 31, 32, %r26
+	depdi	0, 31, 32, %r25
+	depdi	0, 31, 32, %r24
+#endif
+
+	/* Check the validity of the size pointer */
+	subi,>>= 3, %r25, %r0
+	b,n	lws_exit_nosys
+
+	shlw	%r25, 1, %r1
+
+	/* Dirty cache line at r26 to avoid COW break in critical region */
+	copy	%r26, %r28
+#ifdef CONFIG_64BIT
+	depdi	0, 63, 2, %r28
+#else
+	depi	0, 31, 2, %r28
+#endif
+	b	3f
+1:	stbys,e	%r0, 0(%r28)
+2:	b,n	lws_stby_fault
+	ASM_EXCEPTIONTABLE_ENTRY(1b-linux_gateway_page, 2b-linux_gateway_page)
+
+	/* Calculate 8-bit hash index from virtual address */
+3:	extru_safe  %r26, 23, 8, %r28
+	extru_safe  %r26, 15, 8, %r1
+	xor	%r1, %r28, %r28
+	extru_safe  %r26, 7, 8, %r1
+	xor	%r1, %r28, %r20
+
+	/* Load start of lock table */
+	ldil	L%lws_lock_start, %r28
+	ldo	R%lws_lock_start(%r28), %r28
+
+	/* Find lock to use, the hash index is one of 0 to
+	   255, multiplied by 16 (keep it 16-byte aligned)
+	   and add to the lock table offset. */
+	shlw	%r20, 4, %r20
+	add	%r20, %r28, %r20
+
+	shlw	%r25, 1, %r1
+
+	/* We disable interrupts around the critical region. However,
+	   the store instruction in the critical region might cause a
+	   COW break. The trap handler needs to re-enable interrupts
+	   when page faults are enabled as the process might sleep.
+	   When this happens, the lock may be held by a process for
+	   an extended period and affect unrelated processes because
+	   locks are shared. */
+	rsm	PSW_SM_I, %r0			/* Disable interrupts */
+
+	/* Check lock first to minimize cache line bounce */
+	ldw	0(%sr2,%r20), %r28		/* Load the lock */
+	cmpb,=,n	%r0, %r28, 1f		/* Spin if locked */
+	LDCW	0(%sr2,%r20), %r28		/* Try to acquire the lock */
+	cmpb,<>,n	%r0, %r28, atomic_store_action	/* Did we get it? */
+1:	ldo	2(%r0), %r28			/* 2nd case */
+	ssm	PSW_SM_I, %r0
+	b	lws_exit			/* Contended... */
+	ldo	-EAGAIN(%r0), %r21		/* Spin in userspace */
+
+	/* NOTES:
+		This all works because intr_do_signal
+		and schedule both check the return iasq
+		and see that we are on the kernel page
+		so this process is never scheduled off
+		or is ever sent any signal of any sort,
+		thus it is wholly atomic from usrspaces
+		perspective
+	*/
+
+atomic_store_action:
+	/* Jump to the correct function */
+	blr	%r1, %r0
+	/* Set %r28 as non-zero for now */
+	ldo	1(%r0),%r28
+
+	/* 8bit store */
+1:	ldb	0(%r24), %r1
+2:	stb	%r1, 0(%r26)
+	b	atomic_store_end
+	copy	%r0, %r28
+
+	/* 16bit store */
+3:	ldh	0(%r24), %r1
+4:	sth	%r1, 0(%r26)
+	b	atomic_store_end
+	copy	%r0, %r28
+
+	/* 32bit store */
+5:	ldw	0(%r24), %r1
+6:	stw	%r1, 0(%r26)
+	b	atomic_store_end
+	copy	%r0, %r28
+
+	/* 64bit store */
+#ifdef CONFIG_64BIT
+7:	ldd	0(%r24), %r1
+8:	std	%r1, 0(%r26)
+#else
+7:	flddx	0(%r24), %fr4
+8:	fstdx	%fr4, 0(%r26)
+#endif
+	copy	%r0, %r28
+
+atomic_store_end:
+	/* Free lock */
+	stw,ma	%r20, 0(%sr2,%r20)
+	/* Return to userspace, set no error */
+	b	lws_exit
+	copy	%r0, %r21
+
+9:
+	/* Error occurred on store */
+	/* Free lock */
+	stw,ma	%r20, 0(%sr2,%r20)
+	ldo	1(%r0),%r28
+	b	lws_exit
+	ldo	-EFAULT(%r0),%r21	/* set errno */
+
+	/* Exception table entries, return EFAULT.
+	   Each of the entries must be relocated. */
+	ASM_EXCEPTIONTABLE_ENTRY(1b-linux_gateway_page, 9b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(2b-linux_gateway_page, 9b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(3b-linux_gateway_page, 9b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(4b-linux_gateway_page, 9b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(5b-linux_gateway_page, 9b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(6b-linux_gateway_page, 9b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(7b-linux_gateway_page, 9b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(8b-linux_gateway_page, 9b-linux_gateway_page)
+
 	/* Make sure nothing else is placed on this page */
 	.align PAGE_SIZE
 END(linux_gateway_page)
@@ -916,6 +1305,8 @@  ENTRY(lws_table)
 	LWS_ENTRY(compare_and_swap32)		/* 0 - ELF32 Atomic 32bit CAS */
 	LWS_ENTRY(compare_and_swap64)		/* 1 - ELF64 Atomic 32bit CAS */
 	LWS_ENTRY(compare_and_swap_2)		/* 2 - ELF32 Atomic 64bit CAS */
+	LWS_ENTRY(atomic_xchg)			/* 3 - ELF32 Atomic Exchange */
+	LWS_ENTRY(atomic_store)			/* 4 - ELF32 Atomic Store */
 END(lws_table)
 	/* End of lws table */
 
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 08e4d480abe1..08860a015811 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -249,27 +249,16 @@  void __init time_init(void)
 static int __init init_cr16_clocksource(void)
 {
 	/*
-	 * The cr16 interval timers are not syncronized across CPUs on
-	 * different sockets, so mark them unstable and lower rating on
-	 * multi-socket SMP systems.
+	 * The cr16 interval timers are not syncronized across CPUs, even if
+	 * they share the same socket.
 	 */
 	if (num_online_cpus() > 1 && !running_on_qemu) {
-		int cpu;
-		unsigned long cpu0_loc;
-		cpu0_loc = per_cpu(cpu_data, 0).cpu_loc;
-
-		for_each_online_cpu(cpu) {
-			if (cpu == 0)
-				continue;
-			if ((cpu0_loc != 0) &&
-			    (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc))
-				continue;
-
-			clocksource_cr16.name = "cr16_unstable";
-			clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
-			clocksource_cr16.rating = 0;
-			break;
-		}
+		/* mark sched_clock unstable */
+		clear_sched_clock_stable();
+
+		clocksource_cr16.name = "cr16_unstable";
+		clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
+		clocksource_cr16.rating = 0;
 	}
 
 	/* XXX: We may want to mark sched_clock stable here if cr16 clocks are
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 8d8441d4562a..725dbf4554dd 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -479,9 +479,9 @@  void notrace handle_interruption(int code, struct pt_regs *regs)
 	int si_code;
 
 	if (code == 1)
-	    pdc_console_restart();  /* switch back to pdc if HPMC */
-	else
-	    local_irq_enable();
+		pdc_console_restart();  /* switch back to pdc if HPMC */
+	else if (!irqs_disabled_flags(regs->gr[0]))
+		local_irq_enable();
 
 	/* Security check:
 	 * If the priority level is still user, and the
@@ -729,6 +729,8 @@  void notrace handle_interruption(int code, struct pt_regs *regs)
 			}
 			mmap_read_unlock(current->mm);
 		}
+		/* CPU could not fetch instruction, so clear IIR stale value. */
+		regs->iir = 0xbaadf00d;
 		fallthrough;
 	case 27: 
 		/* Data memory protection ID trap */
@@ -782,13 +784,13 @@  void notrace handle_interruption(int code, struct pt_regs *regs)
 	     * unless pagefault_disable() was called before.
 	     */
 
-	    if (fault_space == 0 && !faulthandler_disabled())
+	    if (faulthandler_disabled() || fault_space == 0)
 	    {
 		/* Clean up and return if in exception table. */
 		if (fixup_exception(regs))
 			return;
 		pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC);
-		parisc_terminate("Kernel Fault", regs, code, fault_address);
+		parisc_terminate("Kernel Fault", regs, code, 0);
 	    }
 	}
 
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 86a57fb0e6fa..34676658c040 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -309,7 +309,8 @@  static void unwind_frame_regs(struct unwind_frame_info *info)
 				break;
 			}
 
-			if (get_user(tmp, (unsigned long *)(info->prev_sp - RP_OFFSET))) 
+			if (copy_from_kernel_nofault(&tmp,
+			    (void *)info->prev_sp - RP_OFFSET, sizeof(tmp)))
 				break;
 			info->prev_ip = tmp;
 			sp = info->prev_sp;
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 3d208afd15bc..2769eb991f58 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -57,8 +57,6 @@  SECTIONS
 {
 	. = KERNEL_BINARY_TEXT_START;
 
-	_stext = .;	/* start of kernel text, includes init code & data */
-
 	__init_begin = .;
 	HEAD_TEXT_SECTION
 	MLONGCALL_DISCARD(INIT_TEXT_SECTION(8))
@@ -82,6 +80,7 @@  SECTIONS
 	/* freed after init ends here */
 
 	_text = .;		/* Text and read-only data */
+	_stext = .;
 	MLONGCALL_KEEP(INIT_TEXT_SECTION(8))
 	.text ALIGN(PAGE_SIZE) : {
 		TEXT_TEXT
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index bf33f4b0de40..c4721acb367b 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -341,7 +341,7 @@  static void __init setup_bootmem(void)
 
 static bool kernel_set_to_readonly;
 
-static void __init map_pages(unsigned long start_vaddr,
+static void map_pages(unsigned long start_vaddr,
 			     unsigned long start_paddr, unsigned long size,
 			     pgprot_t pgprot, int force)
 {
diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c
index ed3c4c42fc23..d68d05d5d383 100644
--- a/drivers/char/agp/parisc-agp.c
+++ b/drivers/char/agp/parisc-agp.c
@@ -281,7 +281,7 @@  agp_ioc_init(void __iomem *ioc_regs)
         return 0;
 }
 
-static int
+static int __init
 lba_find_capability(int cap)
 {
 	struct _parisc_agp_info *info = &parisc_agp_info;
@@ -366,7 +366,7 @@  parisc_agp_setup(void __iomem *ioc_hpa, void __iomem *lba_hpa)
 	return error;
 }
 
-static int
+static int __init
 find_quicksilver(struct device *dev, void *data)
 {
 	struct parisc_device **lba = data;
@@ -378,7 +378,7 @@  find_quicksilver(struct device *dev, void *data)
 	return 0;
 }
 
-static int
+static int __init
 parisc_agp_init(void)
 {
 	extern struct sba_device *sba_list;