diff mbox

[v3,2/5] x86/msr: Carry on after a non-"safe" MSR access fails without !panic_on_oops

Message ID 35f2f107e0d85473a0e66c08f93d571a9c72b7fc.1457723023.git.luto@kernel.org (mailing list archive)
State New, archived
Headers show

Commit Message

Andy Lutomirski March 11, 2016, 7:06 p.m. UTC
This demotes an OOPS and likely panic due to a failed non-"safe" MSR
access to a WARN and, for RDMSR, a return value of zero.  If
panic_on_oops is set, then failed unsafe MSR accesses will still
oops and panic.

To be clear, this type of failure should *not* happen.  This patch
exists to minimize the chance of nasty undebuggable failures due on
systems that used to work due to a now-fixed CONFIG_PARAVIRT=y bug.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/include/asm/msr.h | 10 ++++++++--
 arch/x86/mm/extable.c      | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)

Comments

Ingo Molnar March 12, 2016, 3:31 p.m. UTC | #1
* Andy Lutomirski <luto@kernel.org> wrote:

> +bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
> +			     struct pt_regs *regs, int trapnr)
> +{
> +	WARN(1, "unsafe MSR access error: RDMSR from 0x%x",
> +	     (unsigned int)regs->cx);

Please make this WARN_ONCE(). There's no point in locking up the system with 
WARN() spam, should this trigger frequently.

> +	WARN(1, "unsafe MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x)\n",
> +	     (unsigned int)regs->cx,
> +	     (unsigned int)regs->dx, (unsigned int)regs->ax);

Ditto.

Thanks,

	Ingo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ingo Molnar March 12, 2016, 3:36 p.m. UTC | #2
* Andy Lutomirski <luto@kernel.org> wrote:

> This demotes an OOPS and likely panic due to a failed non-"safe" MSR
> access to a WARN and, for RDMSR, a return value of zero.  If
> panic_on_oops is set, then failed unsafe MSR accesses will still
> oops and panic.
> 
> To be clear, this type of failure should *not* happen.  This patch
> exists to minimize the chance of nasty undebuggable failures due on
> systems that used to work due to a now-fixed CONFIG_PARAVIRT=y bug.
> 
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>  arch/x86/include/asm/msr.h | 10 ++++++++--
>  arch/x86/mm/extable.c      | 33 +++++++++++++++++++++++++++++++++
>  2 files changed, 41 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
> index 93fb7c1cffda..1487054a1a70 100644
> --- a/arch/x86/include/asm/msr.h
> +++ b/arch/x86/include/asm/msr.h
> @@ -92,7 +92,10 @@ static inline unsigned long long native_read_msr(unsigned int msr)
>  {
>  	DECLARE_ARGS(val, low, high);
>  
> -	asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
> +	asm volatile("1: rdmsr\n"
> +		     "2:\n"
> +		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe)
> +		     : EAX_EDX_RET(val, low, high) : "c" (msr));
>  	if (msr_tracepoint_active(__tracepoint_read_msr))
>  		do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0);
>  	return EAX_EDX_VAL(val, low, high);
> @@ -119,7 +122,10 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
>  static inline void native_write_msr(unsigned int msr,
>  				    unsigned low, unsigned high)
>  {
> -	asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
> +	asm volatile("1: wrmsr\n"
> +		     "2:\n"
> +		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
> +		     : : "c" (msr), "a"(low), "d" (high) : "memory");
>  	if (msr_tracepoint_active(__tracepoint_read_msr))
>  		do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
>  }
> diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
> index 9dd7e4b7fcde..f310714e6e6d 100644
> --- a/arch/x86/mm/extable.c
> +++ b/arch/x86/mm/extable.c
> @@ -49,6 +49,39 @@ bool ex_handler_ext(const struct exception_table_entry *fixup,
>  }
>  EXPORT_SYMBOL(ex_handler_ext);
>  
> +bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
> +			     struct pt_regs *regs, int trapnr)
> +{
> +	WARN(1, "unsafe MSR access error: RDMSR from 0x%x",
> +	     (unsigned int)regs->cx);

Btw., instead of the safe/unsafe naming (which has an emotional and security 
secondary attribute), shouldn't we move this over to a _check() (or _checking()) 
naming instead that we do in other places in the kernel?

I.e.:

	rdmsr(msr, l, h);

and:

	if (rdmsr_check(msr, l, h)) {
		...
	}

and then we could name the helpers as _check() and _nocheck() - which is neutral 
naming.

Thanks,

	Ingo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andy Lutomirski March 12, 2016, 5:32 p.m. UTC | #3
On Sat, Mar 12, 2016 at 7:36 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> * Andy Lutomirski <luto@kernel.org> wrote:
>
>> This demotes an OOPS and likely panic due to a failed non-"safe" MSR
>> access to a WARN and, for RDMSR, a return value of zero.  If
>> panic_on_oops is set, then failed unsafe MSR accesses will still
>> oops and panic.
>>
>> To be clear, this type of failure should *not* happen.  This patch
>> exists to minimize the chance of nasty undebuggable failures due on
>> systems that used to work due to a now-fixed CONFIG_PARAVIRT=y bug.
>>
>> Signed-off-by: Andy Lutomirski <luto@kernel.org>
>> ---
>>  arch/x86/include/asm/msr.h | 10 ++++++++--
>>  arch/x86/mm/extable.c      | 33 +++++++++++++++++++++++++++++++++
>>  2 files changed, 41 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
>> index 93fb7c1cffda..1487054a1a70 100644
>> --- a/arch/x86/include/asm/msr.h
>> +++ b/arch/x86/include/asm/msr.h
>> @@ -92,7 +92,10 @@ static inline unsigned long long native_read_msr(unsigned int msr)
>>  {
>>       DECLARE_ARGS(val, low, high);
>>
>> -     asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
>> +     asm volatile("1: rdmsr\n"
>> +                  "2:\n"
>> +                  _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe)
>> +                  : EAX_EDX_RET(val, low, high) : "c" (msr));
>>       if (msr_tracepoint_active(__tracepoint_read_msr))
>>               do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0);
>>       return EAX_EDX_VAL(val, low, high);
>> @@ -119,7 +122,10 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
>>  static inline void native_write_msr(unsigned int msr,
>>                                   unsigned low, unsigned high)
>>  {
>> -     asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
>> +     asm volatile("1: wrmsr\n"
>> +                  "2:\n"
>> +                  _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
>> +                  : : "c" (msr), "a"(low), "d" (high) : "memory");
>>       if (msr_tracepoint_active(__tracepoint_read_msr))
>>               do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
>>  }
>> diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
>> index 9dd7e4b7fcde..f310714e6e6d 100644
>> --- a/arch/x86/mm/extable.c
>> +++ b/arch/x86/mm/extable.c
>> @@ -49,6 +49,39 @@ bool ex_handler_ext(const struct exception_table_entry *fixup,
>>  }
>>  EXPORT_SYMBOL(ex_handler_ext);
>>
>> +bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
>> +                          struct pt_regs *regs, int trapnr)
>> +{
>> +     WARN(1, "unsafe MSR access error: RDMSR from 0x%x",
>> +          (unsigned int)regs->cx);
>
> Btw., instead of the safe/unsafe naming (which has an emotional and security
> secondary attribute), shouldn't we move this over to a _check() (or _checking())
> naming instead that we do in other places in the kernel?
>
> I.e.:
>
>         rdmsr(msr, l, h);
>
> and:
>
>         if (rdmsr_check(msr, l, h)) {
>                 ...
>         }
>
> and then we could name the helpers as _check() and _nocheck() - which is neutral
> naming.

Will do as a separate followup series.

At least with this series applied, the functions named _safe all point
to each other correctly.

--Andy
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 93fb7c1cffda..1487054a1a70 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -92,7 +92,10 @@  static inline unsigned long long native_read_msr(unsigned int msr)
 {
 	DECLARE_ARGS(val, low, high);
 
-	asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
+	asm volatile("1: rdmsr\n"
+		     "2:\n"
+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe)
+		     : EAX_EDX_RET(val, low, high) : "c" (msr));
 	if (msr_tracepoint_active(__tracepoint_read_msr))
 		do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0);
 	return EAX_EDX_VAL(val, low, high);
@@ -119,7 +122,10 @@  static inline unsigned long long native_read_msr_safe(unsigned int msr,
 static inline void native_write_msr(unsigned int msr,
 				    unsigned low, unsigned high)
 {
-	asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
+	asm volatile("1: wrmsr\n"
+		     "2:\n"
+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
+		     : : "c" (msr), "a"(low), "d" (high) : "memory");
 	if (msr_tracepoint_active(__tracepoint_read_msr))
 		do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
 }
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 9dd7e4b7fcde..f310714e6e6d 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -49,6 +49,39 @@  bool ex_handler_ext(const struct exception_table_entry *fixup,
 }
 EXPORT_SYMBOL(ex_handler_ext);
 
+bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
+			     struct pt_regs *regs, int trapnr)
+{
+	WARN(1, "unsafe MSR access error: RDMSR from 0x%x",
+	     (unsigned int)regs->cx);
+
+	/* If panic_on_oops is set, don't try to recover. */
+	if (panic_on_oops)
+		return false;
+
+	regs->ip = ex_fixup_addr(fixup);
+	regs->ax = 0;
+	regs->dx = 0;
+	return true;
+}
+EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
+
+bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
+			     struct pt_regs *regs, int trapnr)
+{
+	WARN(1, "unsafe MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x)\n",
+	     (unsigned int)regs->cx,
+	     (unsigned int)regs->dx, (unsigned int)regs->ax);
+
+	/* If panic_on_oops is set, don't try to recover. */
+	if (panic_on_oops)
+		return false;
+
+	regs->ip = ex_fixup_addr(fixup);
+	return true;
+}
+EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
+
 bool ex_has_fault_handler(unsigned long ip)
 {
 	const struct exception_table_entry *e;