diff mbox series

[v5,09/18] x86/split_lock: Handle #AC exception for split lock

Message ID 1552431636-31511-10-git-send-email-fenghua.yu@intel.com (mailing list archive)
State Not Applicable
Delegated to: Johannes Berg
Headers show
Series x86/split_lock: Enable #AC exception for split locked accesses | expand

Commit Message

Fenghua Yu March 12, 2019, 11 p.m. UTC
There may be different considerations on how to handle #AC for split lock,
e.g. how to handle system hang caused by split lock issue in firmware,
how to emulate faulting instruction, etc. We use a simple method to
handle user and kernel split lock and may extend the method in the future.

When #AC exception for split lock is triggered from user process, the
process is killed by SIGBUS. To execute the process properly, user
application developer needs to fix the split lock issue.

When #AC exception for split lock is triggered from a kernel instruction,
disable #AC for split lock on local CPU and warn the split lock issue.
After the exception, the faulting instruction will be executed and kernel
execution continues. #AC for split lock is only disabled on the local CPU
not globally. It will be re-enabled if the CPU is offline and then online.

Kernel developer should check the warning, which contains helpful faulting
address, context, and callstack info, and fix the split lock issue
one by one. Then further split lock may be captured and fixed.

After bit 29 in MSR_TEST_CTL is set as one in kernel, firmware inherits
the setting when firmware is executed in S4, S5, run time services, SMI,
etc. Split lock issue in firmware triggers #AC and may hang the system
depending on how firmware handles the #AC. It's up to firmware developer
to fix the split lock issues in firmware.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
---
 arch/x86/kernel/traps.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

Comments

Dave Hansen March 12, 2019, 11:51 p.m. UTC | #1
On 3/12/19 4:00 PM, Fenghua Yu wrote:
> +#ifndef CONFIG_CPU_SUP_INTEL
>  DO_ERROR(X86_TRAP_AC,     SIGBUS,  BUS_ADRALN, NULL, "alignment check",     alignment_check)
> +#else
> +dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code)
> +{
> +	unsigned int trapnr = X86_TRAP_AC;
> +	char str[] = "alignment check";
> +	int signr = SIGBUS;
> +
> +	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
> +
> +	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
> +			NOTIFY_STOP) {
> +		cond_local_irq_enable(regs);
> +		if (!user_mode(regs)) {
> +			/*
> +			 * Only split lock can generate #AC from kernel. Warn
> +			 * and disable #AC for split lock on current CPU.
> +			 */
> +			msr_clear_bit(MSR_TEST_CTL,
> +				      TEST_CTL_ENABLE_SPLIT_LOCK_DETECT_SHIFT)

I don't see any feature checking here.  Don't we need to see if this MSR
is supported?

Shouldn't the code here on systems that don't support split lock
disabling be the same as on CONFIG_CPU_SUP_INTEL=n systems?

> +			WARN_ONCE(1, "A split lock issue is detected.\n");
> +
> +			return;
> +		}
> +		/* Handle #AC generated from user code. */
> +		do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
> +			error_code, BUS_ADRALN, NULL);
> +	}
> +}
> +#endif

So...  Do we really need an Intel-specific #ifdef for this sucker?
Fenghua Yu March 13, 2019, 12:49 a.m. UTC | #2
On Tue, Mar 12, 2019 at 04:51:22PM -0700, Dave Hansen wrote:
> On 3/12/19 4:00 PM, Fenghua Yu wrote:
> I don't see any feature checking here.  Don't we need to see if this MSR
> is supported?
> 
> Shouldn't the code here on systems that don't support split lock
> disabling be the same as on CONFIG_CPU_SUP_INTEL=n systems?

You are right. Is the following #AC handler code better?

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d26f9e9c3d83..5296021509c7 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -61,6 +61,7 @@
 #include <asm/mpx.h>
 #include <asm/vm86.h>
 #include <asm/umip.h>
+#include <asm/cpu.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/x86_init.h>
@@ -293,7 +294,37 @@ DO_ERROR(X86_TRAP_OLD_MF, SIGFPE,           0, NULL, "coprocessor segment overru
 DO_ERROR(X86_TRAP_TS,     SIGSEGV,          0, NULL, "invalid TSS",         invalid_TSS)
 DO_ERROR(X86_TRAP_NP,     SIGBUS,           0, NULL, "segment not present", segment_not_present)
 DO_ERROR(X86_TRAP_SS,     SIGBUS,           0, NULL, "stack segment",       stack_segment)
-DO_ERROR(X86_TRAP_AC,     SIGBUS,  BUS_ADRALN, NULL, "alignment check",     alignment_check)
+dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code)
+{
+	unsigned int trapnr = X86_TRAP_AC;
+	char str[] = "alignment check";
+	int signr = SIGBUS;
+
+	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
+
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
+			NOTIFY_STOP) {
+		cond_local_irq_enable(regs);
+		if (!user_mode(regs)) {
+			if (!this_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+				return;
+
+			/*
+			 * Only split lock can generate #AC from kernel. Warn
+			 * and disable #AC for split lock on current CPU.
+			 */
+			msr_clear_bit(MSR_TEST_CTL,
+				      TEST_CTL_ENABLE_SPLIT_LOCK_DETECT_SHIFT);
+			WARN_ONCE(1, "A split lock issue is detected.\n");
+
+
+			return;
+		}
+		/* Handle #AC generated from user code. */
+		do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
+			error_code, BUS_ADRALN, NULL);
+	}
+}
 #undef IP
 
 #ifdef CONFIG_VMAP_STACK
Dave Hansen March 13, 2019, 4:22 p.m. UTC | #3
On 3/12/19 5:49 PM, Fenghua Yu wrote:
> On Tue, Mar 12, 2019 at 04:51:22PM -0700, Dave Hansen wrote:
>> On 3/12/19 4:00 PM, Fenghua Yu wrote:
>> I don't see any feature checking here.  Don't we need to see if this MSR
>> is supported?
>>
>> Shouldn't the code here on systems that don't support split lock
>> disabling be the same as on CONFIG_CPU_SUP_INTEL=n systems?
> 
> You are right. Is the following #AC handler code better?

Fenghua, I'd really appreciate if you could take a deep breath and slow
down.  The most important thing is getting the right patch out and being
as respectful as possible with reviewer bandwidth.

> @@ -293,7 +294,37 @@ DO_ERROR(X86_TRAP_OLD_MF, SIGFPE,           0, NULL, "coprocessor segment overru
>  DO_ERROR(X86_TRAP_TS,     SIGSEGV,          0, NULL, "invalid TSS",         invalid_TSS)
>  DO_ERROR(X86_TRAP_NP,     SIGBUS,           0, NULL, "segment not present", segment_not_present)
>  DO_ERROR(X86_TRAP_SS,     SIGBUS,           0, NULL, "stack segment",       stack_segment)
> -DO_ERROR(X86_TRAP_AC,     SIGBUS,  BUS_ADRALN, NULL, "alignment check",     alignment_check)
> +dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code)

Is this really an appropriate place to stick this function?  Without any
whitespace, and even pushing out the "#undef" that was here before?

> +{
> +	unsigned int trapnr = X86_TRAP_AC;
> +	char str[] = "alignment check";
> +	int signr = SIGBUS;
> +
> +	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
> +
> +	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
> +			NOTIFY_STOP) {

Please unindent this code block.

> +		cond_local_irq_enable(regs);
> +		if (!user_mode(regs)) {

Comments please.

The comment about #AC being impossible in the kernel without the split
lock detection feature belongs here, not below.

> +			if (!this_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
> +				return;

Is this consistent with the code that was here before?  Basically, if we
are in the kernel, get an #AC and end up here, we just return from this
function?  Is that what DO_ERROR() did?

> +			/*
> +			 * Only split lock can generate #AC from kernel. Warn
> +			 * and disable #AC for split lock on current CPU.
> +			 */
> +			msr_clear_bit(MSR_TEST_CTL,
> +				      TEST_CTL_ENABLE_SPLIT_LOCK_DETECT_SHIFT);
> +			WARN_ONCE(1, "A split lock issue is detected.\n");

Is it an issue?  I'd probably say: "split lock operation detected"

> +
> +
> +			return;

Extra whitespace.

> +		}
> +		/* Handle #AC generated from user code. */
> +		do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
> +			error_code, BUS_ADRALN, NULL);
> +	}
> +}
>  #undef IP
diff mbox series

Patch

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d26f9e9c3d83..69b6233e783e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -61,6 +61,7 @@ 
 #include <asm/mpx.h>
 #include <asm/vm86.h>
 #include <asm/umip.h>
+#include <asm/cpu.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/x86_init.h>
@@ -293,7 +294,37 @@  DO_ERROR(X86_TRAP_OLD_MF, SIGFPE,           0, NULL, "coprocessor segment overru
 DO_ERROR(X86_TRAP_TS,     SIGSEGV,          0, NULL, "invalid TSS",         invalid_TSS)
 DO_ERROR(X86_TRAP_NP,     SIGBUS,           0, NULL, "segment not present", segment_not_present)
 DO_ERROR(X86_TRAP_SS,     SIGBUS,           0, NULL, "stack segment",       stack_segment)
+#ifndef CONFIG_CPU_SUP_INTEL
 DO_ERROR(X86_TRAP_AC,     SIGBUS,  BUS_ADRALN, NULL, "alignment check",     alignment_check)
+#else
+dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code)
+{
+	unsigned int trapnr = X86_TRAP_AC;
+	char str[] = "alignment check";
+	int signr = SIGBUS;
+
+	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
+
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
+			NOTIFY_STOP) {
+		cond_local_irq_enable(regs);
+		if (!user_mode(regs)) {
+			/*
+			 * Only split lock can generate #AC from kernel. Warn
+			 * and disable #AC for split lock on current CPU.
+			 */
+			msr_clear_bit(MSR_TEST_CTL,
+				      TEST_CTL_ENABLE_SPLIT_LOCK_DETECT_SHIFT);
+			WARN_ONCE(1, "A split lock issue is detected.\n");
+
+			return;
+		}
+		/* Handle #AC generated from user code. */
+		do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
+			error_code, BUS_ADRALN, NULL);
+	}
+}
+#endif
 #undef IP
 
 #ifdef CONFIG_VMAP_STACK