diff mbox series

[RFC,V3,8/9] x86/fault: Report the PKRS state on fault

Message ID 20201009194258.3207172-9-ira.weiny@intel.com (mailing list archive)
State New
Headers show
Series PKS: Add Protection Keys Supervisor (PKS) support RFC v3 | expand

Commit Message

Ira Weiny Oct. 9, 2020, 7:42 p.m. UTC
From: Ira Weiny <ira.weiny@intel.com>

When only user space pkeys are enabled faulting within the kernel was an
unexpected condition which should never happen, therefore a WARN_ON was
added to the kernel fault handler to detect if it ever did.  Now that
PKS can be enabled this is no longer the case.

Report a Pkey fault with a normal splat and add the PKRS state to the
fault splat text.  Note the PKS register is reset during an exception
therefore the saved PKRS value from before the beginning of the
exception is passed down.

If PKS is not enabled, or not active, maintain the WARN_ON_ONCE() from
before.

Because each fault has its own state the pkrs information will be
correctly reported even if a fault 'faults'.

Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 arch/x86/mm/fault.c | 59 ++++++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 25 deletions(-)

Comments

Dave Hansen Oct. 13, 2020, 6:56 p.m. UTC | #1
> @@ -548,6 +549,11 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
>  		 (error_code & X86_PF_PK)    ? "protection keys violation" :
>  					       "permissions violation");
>  
> +#ifdef CONFIG_ARCH_HAS_SUPERVISOR_PKEYS
> +	if (irq_state && (error_code & X86_PF_PK))
> +		pr_alert("PKRS: 0x%x\n", irq_state->pkrs);
> +#endif

This means everyone will see 'PKRS: 0x0', even if they're on non-PKS
hardware.  I think I'd rather have this only show PKRS when we're on
cpu_feature_enabled(PKS) hardware.

...
> @@ -1148,14 +1156,15 @@ static int fault_in_kernel_space(unsigned long address)
>   */
>  static void
>  do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
> -		   unsigned long address)
> +		   unsigned long address, irqentry_state_t *irq_state)
>  {
>  	/*
> -	 * Protection keys exceptions only happen on user pages.  We
> -	 * have no user pages in the kernel portion of the address
> -	 * space, so do not expect them here.
> +	 * If protection keys are not enabled for kernel space
> +	 * do not expect Pkey errors here.
>  	 */

Let's fix the double-negative:

	/*
	 * PF_PK is only expected on kernel addresses whenn
	 * supervisor pkeys are enabled:
	 */

> -	WARN_ON_ONCE(hw_error_code & X86_PF_PK);
> +	if (!IS_ENABLED(CONFIG_ARCH_HAS_SUPERVISOR_PKEYS) ||
> +	    !cpu_feature_enabled(X86_FEATURE_PKS))
> +		WARN_ON_ONCE(hw_error_code & X86_PF_PK);

Yeah, please stick X86_FEATURE_PKS in disabled-features so you can use
cpu_feature_enabled(X86_FEATURE_PKS) by itself here..
Ira Weiny Oct. 15, 2020, 4:13 a.m. UTC | #2
On Tue, Oct 13, 2020 at 11:56:53AM -0700, Dave Hansen wrote:
> > @@ -548,6 +549,11 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
> >  		 (error_code & X86_PF_PK)    ? "protection keys violation" :
> >  					       "permissions violation");
> >  
> > +#ifdef CONFIG_ARCH_HAS_SUPERVISOR_PKEYS
> > +	if (irq_state && (error_code & X86_PF_PK))
> > +		pr_alert("PKRS: 0x%x\n", irq_state->pkrs);
> > +#endif
> 
> This means everyone will see 'PKRS: 0x0', even if they're on non-PKS
> hardware.  I think I'd rather have this only show PKRS when we're on
> cpu_feature_enabled(PKS) hardware.

Good catch, thanks.

> 
> ...
> > @@ -1148,14 +1156,15 @@ static int fault_in_kernel_space(unsigned long address)
> >   */
> >  static void
> >  do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
> > -		   unsigned long address)
> > +		   unsigned long address, irqentry_state_t *irq_state)
> >  {
> >  	/*
> > -	 * Protection keys exceptions only happen on user pages.  We
> > -	 * have no user pages in the kernel portion of the address
> > -	 * space, so do not expect them here.
> > +	 * If protection keys are not enabled for kernel space
> > +	 * do not expect Pkey errors here.
> >  	 */
> 
> Let's fix the double-negative:
> 
> 	/*
> 	 * PF_PK is only expected on kernel addresses whenn
> 	 * supervisor pkeys are enabled:
> 	 */

done. thanks.

> 
> > -	WARN_ON_ONCE(hw_error_code & X86_PF_PK);
> > +	if (!IS_ENABLED(CONFIG_ARCH_HAS_SUPERVISOR_PKEYS) ||
> > +	    !cpu_feature_enabled(X86_FEATURE_PKS))
> > +		WARN_ON_ONCE(hw_error_code & X86_PF_PK);
> 
> Yeah, please stick X86_FEATURE_PKS in disabled-features so you can use
> cpu_feature_enabled(X86_FEATURE_PKS) by itself here..

done.

thanks,
Ira
diff mbox series

Patch

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e55bc4bff389..ee761c993f58 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -504,7 +504,8 @@  static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index)
 }
 
 static void
-show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address,
+		irqentry_state_t *irq_state)
 {
 	if (!oops_may_print())
 		return;
@@ -548,6 +549,11 @@  show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
 		 (error_code & X86_PF_PK)    ? "protection keys violation" :
 					       "permissions violation");
 
+#ifdef CONFIG_ARCH_HAS_SUPERVISOR_PKEYS
+	if (irq_state && (error_code & X86_PF_PK))
+		pr_alert("PKRS: 0x%x\n", irq_state->pkrs);
+#endif
+
 	if (!(error_code & X86_PF_USER) && user_mode(regs)) {
 		struct desc_ptr idt, gdt;
 		u16 ldtr, tr;
@@ -626,7 +632,8 @@  static void set_signal_archinfo(unsigned long address,
 
 static noinline void
 no_context(struct pt_regs *regs, unsigned long error_code,
-	   unsigned long address, int signal, int si_code)
+	   unsigned long address, int signal, int si_code,
+	   irqentry_state_t *irq_state)
 {
 	struct task_struct *tsk = current;
 	unsigned long flags;
@@ -732,7 +739,7 @@  no_context(struct pt_regs *regs, unsigned long error_code,
 	 */
 	flags = oops_begin();
 
-	show_fault_oops(regs, error_code, address);
+	show_fault_oops(regs, error_code, address, irq_state);
 
 	if (task_stack_end_corrupted(tsk))
 		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
@@ -785,7 +792,8 @@  static bool is_vsyscall_vaddr(unsigned long vaddr)
 
 static void
 __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
-		       unsigned long address, u32 pkey, int si_code)
+		       unsigned long address, u32 pkey, int si_code,
+		       irqentry_state_t *state)
 {
 	struct task_struct *tsk = current;
 
@@ -832,14 +840,14 @@  __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 	if (is_f00f_bug(regs, address))
 		return;
 
-	no_context(regs, error_code, address, SIGSEGV, si_code);
+	no_context(regs, error_code, address, SIGSEGV, si_code, state);
 }
 
 static noinline void
 bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
-		     unsigned long address)
+		     unsigned long address, irqentry_state_t *state)
 {
-	__bad_area_nosemaphore(regs, error_code, address, 0, SEGV_MAPERR);
+	__bad_area_nosemaphore(regs, error_code, address, 0, SEGV_MAPERR, state);
 }
 
 static void
@@ -853,7 +861,7 @@  __bad_area(struct pt_regs *regs, unsigned long error_code,
 	 */
 	mmap_read_unlock(mm);
 
-	__bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
+	__bad_area_nosemaphore(regs, error_code, address, pkey, si_code, NULL);
 }
 
 static noinline void
@@ -923,7 +931,7 @@  do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
 {
 	/* Kernel mode? Handle exceptions or die: */
 	if (!(error_code & X86_PF_USER)) {
-		no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
+		no_context(regs, error_code, address, SIGBUS, BUS_ADRERR, NULL);
 		return;
 	}
 
@@ -957,7 +965,7 @@  mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 	       unsigned long address, vm_fault_t fault)
 {
 	if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
-		no_context(regs, error_code, address, 0, 0);
+		no_context(regs, error_code, address, 0, 0, NULL);
 		return;
 	}
 
@@ -965,7 +973,7 @@  mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 		/* Kernel mode? Handle exceptions or die: */
 		if (!(error_code & X86_PF_USER)) {
 			no_context(regs, error_code, address,
-				   SIGSEGV, SEGV_MAPERR);
+				   SIGSEGV, SEGV_MAPERR, NULL);
 			return;
 		}
 
@@ -980,7 +988,7 @@  mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 			     VM_FAULT_HWPOISON_LARGE))
 			do_sigbus(regs, error_code, address, fault);
 		else if (fault & VM_FAULT_SIGSEGV)
-			bad_area_nosemaphore(regs, error_code, address);
+			bad_area_nosemaphore(regs, error_code, address, NULL);
 		else
 			BUG();
 	}
@@ -1148,14 +1156,15 @@  static int fault_in_kernel_space(unsigned long address)
  */
 static void
 do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
-		   unsigned long address)
+		   unsigned long address, irqentry_state_t *irq_state)
 {
 	/*
-	 * Protection keys exceptions only happen on user pages.  We
-	 * have no user pages in the kernel portion of the address
-	 * space, so do not expect them here.
+	 * If protection keys are not enabled for kernel space
+	 * do not expect Pkey errors here.
 	 */
-	WARN_ON_ONCE(hw_error_code & X86_PF_PK);
+	if (!IS_ENABLED(CONFIG_ARCH_HAS_SUPERVISOR_PKEYS) ||
+	    !cpu_feature_enabled(X86_FEATURE_PKS))
+		WARN_ON_ONCE(hw_error_code & X86_PF_PK);
 
 #ifdef CONFIG_X86_32
 	/*
@@ -1204,7 +1213,7 @@  do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
 	 * Don't take the mm semaphore here. If we fixup a prefetch
 	 * fault we could otherwise deadlock:
 	 */
-	bad_area_nosemaphore(regs, hw_error_code, address);
+	bad_area_nosemaphore(regs, hw_error_code, address, irq_state);
 }
 NOKPROBE_SYMBOL(do_kern_addr_fault);
 
@@ -1245,7 +1254,7 @@  void do_user_addr_fault(struct pt_regs *regs,
 		     !(hw_error_code & X86_PF_USER) &&
 		     !(regs->flags & X86_EFLAGS_AC)))
 	{
-		bad_area_nosemaphore(regs, hw_error_code, address);
+		bad_area_nosemaphore(regs, hw_error_code, address, NULL);
 		return;
 	}
 
@@ -1254,7 +1263,7 @@  void do_user_addr_fault(struct pt_regs *regs,
 	 * in a region with pagefaults disabled then we must not take the fault
 	 */
 	if (unlikely(faulthandler_disabled() || !mm)) {
-		bad_area_nosemaphore(regs, hw_error_code, address);
+		bad_area_nosemaphore(regs, hw_error_code, address, NULL);
 		return;
 	}
 
@@ -1316,7 +1325,7 @@  void do_user_addr_fault(struct pt_regs *regs,
 			 * Fault from code in kernel from
 			 * which we do not expect faults.
 			 */
-			bad_area_nosemaphore(regs, hw_error_code, address);
+			bad_area_nosemaphore(regs, hw_error_code, address, NULL);
 			return;
 		}
 retry:
@@ -1375,7 +1384,7 @@  void do_user_addr_fault(struct pt_regs *regs,
 	if (fault_signal_pending(fault, regs)) {
 		if (!user_mode(regs))
 			no_context(regs, hw_error_code, address, SIGBUS,
-				   BUS_ADRERR);
+				   BUS_ADRERR, NULL);
 		return;
 	}
 
@@ -1415,7 +1424,7 @@  trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
 
 static __always_inline void
 handle_page_fault(struct pt_regs *regs, unsigned long error_code,
-			      unsigned long address)
+		  unsigned long address, irqentry_state_t *irq_state)
 {
 	trace_page_fault_entries(regs, error_code, address);
 
@@ -1424,7 +1433,7 @@  handle_page_fault(struct pt_regs *regs, unsigned long error_code,
 
 	/* Was the fault on kernel-controlled part of the address space? */
 	if (unlikely(fault_in_kernel_space(address))) {
-		do_kern_addr_fault(regs, error_code, address);
+		do_kern_addr_fault(regs, error_code, address, irq_state);
 	} else {
 		do_user_addr_fault(regs, error_code, address);
 		/*
@@ -1479,7 +1488,7 @@  DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
 	irqentry_enter(regs, &state);
 
 	instrumentation_begin();
-	handle_page_fault(regs, error_code, address);
+	handle_page_fault(regs, error_code, address, &state);
 	instrumentation_end();
 
 	irqentry_exit(regs, &state);