diff mbox

[v7,3/3] x86, mce: Add __mcsafe_copy()

Message ID 5b0243c5df825ad0841f4bb5584cd15d3f013f09.1451952351.git.tony.luck@intel.com (mailing list archive)
State Superseded
Headers show

Commit Message

Luck, Tony Dec. 31, 2015, 7:43 p.m. UTC
Make use of the EXTABLE_FAULT exception table entries. This routine
returns a structure to indicate the result of the copy:

struct mcsafe_ret {
	u64 trapnr;
	u64 remain;
};

If the copy is successful, then both 'trapnr' and 'remain' are zero.

If we faulted during the copy, then 'trapnr' will say which type
of trap (X86_TRAP_PF or X86_TRAP_MC) and 'remain' says how many
bytes were not copied.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/Kconfig                 |  10 +++
 arch/x86/include/asm/string_64.h |  10 +++
 arch/x86/kernel/x8664_ksyms_64.c |   4 ++
 arch/x86/lib/memcpy_64.S         | 136 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 160 insertions(+)

Comments

Dan Williams Jan. 6, 2016, 4:42 a.m. UTC | #1
On Thu, Dec 31, 2015 at 11:43 AM, Tony Luck <tony.luck@intel.com> wrote:
> Make use of the EXTABLE_FAULT exception table entries. This routine
> returns a structure to indicate the result of the copy:
>
> struct mcsafe_ret {
>         u64 trapnr;
>         u64 remain;
> };
>
> If the copy is successful, then both 'trapnr' and 'remain' are zero.
>
> If we faulted during the copy, then 'trapnr' will say which type
> of trap (X86_TRAP_PF or X86_TRAP_MC) and 'remain' says how many
> bytes were not copied.
>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>  arch/x86/Kconfig                 |  10 +++
>  arch/x86/include/asm/string_64.h |  10 +++
>  arch/x86/kernel/x8664_ksyms_64.c |   4 ++
>  arch/x86/lib/memcpy_64.S         | 136 +++++++++++++++++++++++++++++++++++++++
>  4 files changed, 160 insertions(+)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 96d058a87100..42d26b4d1ec4 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -1001,6 +1001,16 @@ config X86_MCE_INJECT
>           If you don't know what a machine check is and you don't do kernel
>           QA it is safe to say n.
>
> +config MCE_KERNEL_RECOVERY
> +       bool "Recovery from machine checks in special kernel memory copy functions"
> +       default n
> +       depends on X86_MCE && X86_64
> +       ---help---
> +         This option provides a new memory copy function mcsafe_memcpy()
> +         that is annotated to allow the machine check handler to return
> +         to an alternate code path to return an error to the caller instead
> +         of crashing the system. Say yes if you have a driver that uses this.
> +
>  config X86_THERMAL_VECTOR
>         def_bool y
>         depends on X86_MCE_INTEL
> diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
> index ff8b9a17dc4b..16a8f0e56e4a 100644
> --- a/arch/x86/include/asm/string_64.h
> +++ b/arch/x86/include/asm/string_64.h
> @@ -78,6 +78,16 @@ int strcmp(const char *cs, const char *ct);
>  #define memset(s, c, n) __memset(s, c, n)
>  #endif
>
> +#ifdef CONFIG_MCE_KERNEL_RECOVERY
> +struct mcsafe_ret {
> +       u64 trapnr;
> +       u64 remain;
> +};

Can we move this definition outside of the CONFIG_MCE_KERNEL_RECOVERY
ifdef guard?  On a test integration branch the kbuild robot caught the
following:

   In file included from include/linux/pmem.h:21:0,
                    from drivers/acpi/nfit.c:22:
   arch/x86/include/asm/pmem.h: In function 'arch_memcpy_from_pmem':
>> arch/x86/include/asm/pmem.h:55:21: error: storage size of 'ret' isn't known
      struct mcsafe_ret ret;
                        ^
>> arch/x86/include/asm/pmem.h:57:9: error: implicit declaration of function '__mcsafe_copy' [-Werror=implicit-function-declaration]
      ret = __mcsafe_copy(dst, (void __force *) src, n);
            ^
>> arch/x86/include/asm/pmem.h:55:21: warning: unused variable 'ret' [-Wunused-variable]
      struct mcsafe_ret ret;
                        ^
   cc1: some warnings being treated as errors

vim +55 arch/x86/include/asm/pmem.h

    49  }
    50
    51  static inline int arch_memcpy_from_pmem(void *dst, const void
__pmem *src,
    52                  size_t n)
    53  {
    54          if (IS_ENABLED(CONFIG_MCE_KERNEL_RECOVERY)) {
  > 55                  struct mcsafe_ret ret;
    56
  > 57                  ret = __mcsafe_copy(dst, (void __force *) src, n);
    58                  if (ret.remain)
    59                          return -EIO;
    60                  return 0;
Luck, Tony Jan. 6, 2016, 7:06 a.m. UTC | #2
You were heading towards:

ld: undefined __mcsafe_copy

since that is also inside the #ifdef. 

Weren't you going to "select" this?

I'm seriously wondering whether the ifdef still makes sense. Now I don't have an extra exception table and routines to sort/search/fixup, it doesn't seem as useful as it was a few iterations ago.

Sent from my iPhone

> On Jan 5, 2016, at 20:43, Dan Williams <dan.j.williams@intel.com> wrote:
> 
>> On Thu, Dec 31, 2015 at 11:43 AM, Tony Luck <tony.luck@intel.com> wrote:
>> Make use of the EXTABLE_FAULT exception table entries. This routine
>> returns a structure to indicate the result of the copy:
>> 
>> struct mcsafe_ret {
>>        u64 trapnr;
>>        u64 remain;
>> };
>> 
>> If the copy is successful, then both 'trapnr' and 'remain' are zero.
>> 
>> If we faulted during the copy, then 'trapnr' will say which type
>> of trap (X86_TRAP_PF or X86_TRAP_MC) and 'remain' says how many
>> bytes were not copied.
>> 
>> Signed-off-by: Tony Luck <tony.luck@intel.com>
>> ---
>> arch/x86/Kconfig                 |  10 +++
>> arch/x86/include/asm/string_64.h |  10 +++
>> arch/x86/kernel/x8664_ksyms_64.c |   4 ++
>> arch/x86/lib/memcpy_64.S         | 136 +++++++++++++++++++++++++++++++++++++++
>> 4 files changed, 160 insertions(+)
>> 
>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>> index 96d058a87100..42d26b4d1ec4 100644
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -1001,6 +1001,16 @@ config X86_MCE_INJECT
>>          If you don't know what a machine check is and you don't do kernel
>>          QA it is safe to say n.
>> 
>> +config MCE_KERNEL_RECOVERY
>> +       bool "Recovery from machine checks in special kernel memory copy functions"
>> +       default n
>> +       depends on X86_MCE && X86_64
>> +       ---help---
>> +         This option provides a new memory copy function mcsafe_memcpy()
>> +         that is annotated to allow the machine check handler to return
>> +         to an alternate code path to return an error to the caller instead
>> +         of crashing the system. Say yes if you have a driver that uses this.
>> +
>> config X86_THERMAL_VECTOR
>>        def_bool y
>>        depends on X86_MCE_INTEL
>> diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
>> index ff8b9a17dc4b..16a8f0e56e4a 100644
>> --- a/arch/x86/include/asm/string_64.h
>> +++ b/arch/x86/include/asm/string_64.h
>> @@ -78,6 +78,16 @@ int strcmp(const char *cs, const char *ct);
>> #define memset(s, c, n) __memset(s, c, n)
>> #endif
>> 
>> +#ifdef CONFIG_MCE_KERNEL_RECOVERY
>> +struct mcsafe_ret {
>> +       u64 trapnr;
>> +       u64 remain;
>> +};
> 
> Can we move this definition outside of the CONFIG_MCE_KERNEL_RECOVERY
> ifdef guard?  On a test integration branch the kbuild robot caught the
> following:
> 
>   In file included from include/linux/pmem.h:21:0,
>                    from drivers/acpi/nfit.c:22:
>   arch/x86/include/asm/pmem.h: In function 'arch_memcpy_from_pmem':
>>> arch/x86/include/asm/pmem.h:55:21: error: storage size of 'ret' isn't known
>      struct mcsafe_ret ret;
>                        ^
>>> arch/x86/include/asm/pmem.h:57:9: error: implicit declaration of function '__mcsafe_copy' [-Werror=implicit-function-declaration]
>      ret = __mcsafe_copy(dst, (void __force *) src, n);
>            ^
>>> arch/x86/include/asm/pmem.h:55:21: warning: unused variable 'ret' [-Wunused-variable]
>      struct mcsafe_ret ret;
>                        ^
>   cc1: some warnings being treated as errors
> 
> vim +55 arch/x86/include/asm/pmem.h
> 
>    49  }
>    50
>    51  static inline int arch_memcpy_from_pmem(void *dst, const void
> __pmem *src,
>    52                  size_t n)
>    53  {
>    54          if (IS_ENABLED(CONFIG_MCE_KERNEL_RECOVERY)) {
>> 55                  struct mcsafe_ret ret;
>    56
>> 57                  ret = __mcsafe_copy(dst, (void __force *) src, n);
>    58                  if (ret.remain)
>    59                          return -EIO;
>    60                  return 0;
Dan Williams Jan. 6, 2016, 7:11 a.m. UTC | #3
On Tue, Jan 5, 2016 at 11:06 PM, Luck, Tony <tony.luck@intel.com> wrote:
> You were heading towards:
>
> ld: undefined __mcsafe_copy

True, we'd also need a dummy mcsafe_copy() definition to compile it
out in the disabled case.

> since that is also inside the #ifdef.
>
> Weren't you going to "select" this?
>

I do select it, but by randconfig I still need to handle the
CONFIG_X86_MCE=n case.

> I'm seriously wondering whether the ifdef still makes sense. Now I don't have an extra exception table and routines to sort/search/fixup, it doesn't seem as useful as it was a few iterations ago.

Either way is ok with me.  That said, the extra definitions to allow
it compile out when not enabled don't seem too onerous.
diff mbox

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 96d058a87100..42d26b4d1ec4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1001,6 +1001,16 @@  config X86_MCE_INJECT
 	  If you don't know what a machine check is and you don't do kernel
 	  QA it is safe to say n.
 
+config MCE_KERNEL_RECOVERY
+	bool "Recovery from machine checks in special kernel memory copy functions"
+	default n
+	depends on X86_MCE && X86_64
+	---help---
+	  This option provides a new memory copy function mcsafe_memcpy()
+	  that is annotated to allow the machine check handler to return
+	  to an alternate code path to return an error to the caller instead
+	  of crashing the system. Say yes if you have a driver that uses this.
+
 config X86_THERMAL_VECTOR
 	def_bool y
 	depends on X86_MCE_INTEL
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index ff8b9a17dc4b..16a8f0e56e4a 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -78,6 +78,16 @@  int strcmp(const char *cs, const char *ct);
 #define memset(s, c, n) __memset(s, c, n)
 #endif
 
+#ifdef CONFIG_MCE_KERNEL_RECOVERY
+struct mcsafe_ret {
+	u64 trapnr;
+	u64 remain;
+};
+
+struct mcsafe_ret __mcsafe_copy(void *dst, const void __user *src, size_t cnt);
+extern void __mcsafe_copy_end(void);
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_X86_STRING_64_H */
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index a0695be19864..3d42d0ef3333 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -37,6 +37,10 @@  EXPORT_SYMBOL(__copy_user_nocache);
 EXPORT_SYMBOL(_copy_from_user);
 EXPORT_SYMBOL(_copy_to_user);
 
+#ifdef CONFIG_MCE_KERNEL_RECOVERY
+EXPORT_SYMBOL(__mcsafe_copy);
+#endif
+
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
 
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 16698bba87de..e5b1acad8b1e 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -177,3 +177,139 @@  ENTRY(memcpy_orig)
 .Lend:
 	retq
 ENDPROC(memcpy_orig)
+
+#ifdef CONFIG_MCE_KERNEL_RECOVERY
+/*
+ * __mcsafe_copy - memory copy with machine check exception handling
+ * Note that we only catch machine checks when reading the source addresses.
+ * Writes to target are posted and don't generate machine checks.
+ */
+ENTRY(__mcsafe_copy)
+	cmpl $8,%edx
+	jb 20f		/* less then 8 bytes, go to byte copy loop */
+
+	/* check for bad alignment of source */
+	movl %esi,%ecx
+	andl $7,%ecx
+	jz 102f				/* already aligned */
+	subl $8,%ecx
+	negl %ecx
+	subl %ecx,%edx
+0:	movb (%rsi),%al
+	movb %al,(%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz 0b
+102:
+	movl %edx,%ecx
+	andl $63,%edx
+	shrl $6,%ecx
+	jz 17f
+1:	movq (%rsi),%r8
+2:	movq 1*8(%rsi),%r9
+3:	movq 2*8(%rsi),%r10
+4:	movq 3*8(%rsi),%r11
+	mov %r8,(%rdi)
+	mov %r9,1*8(%rdi)
+	mov %r10,2*8(%rdi)
+	mov %r11,3*8(%rdi)
+9:	movq 4*8(%rsi),%r8
+10:	movq 5*8(%rsi),%r9
+11:	movq 6*8(%rsi),%r10
+12:	movq 7*8(%rsi),%r11
+	mov %r8,4*8(%rdi)
+	mov %r9,5*8(%rdi)
+	mov %r10,6*8(%rdi)
+	mov %r11,7*8(%rdi)
+	leaq 64(%rsi),%rsi
+	leaq 64(%rdi),%rdi
+	decl %ecx
+	jnz 1b
+17:	movl %edx,%ecx
+	andl $7,%edx
+	shrl $3,%ecx
+	jz 20f
+18:	movq (%rsi),%r8
+	mov %r8,(%rdi)
+	leaq 8(%rsi),%rsi
+	leaq 8(%rdi),%rdi
+	decl %ecx
+	jnz 18b
+20:	andl %edx,%edx
+	jz 23f
+	movl %edx,%ecx
+21:	movb (%rsi),%al
+	movb %al,(%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz 21b
+23:	xorq %rax, %rax
+	xorq %rdx, %rdx
+	sfence
+	/* copy successful. return 0 */
+	ret
+
+	.section .fixup,"ax"
+	/* fixups for machine check */
+30:
+	add %ecx,%edx
+	jmp 100f
+31:
+	shl $6,%ecx
+	add %ecx,%edx
+	jmp 100f
+32:
+	shl $6,%ecx
+	lea -8(%ecx,%edx),%edx
+	jmp 100f
+33:
+	shl $6,%ecx
+	lea -16(%ecx,%edx),%edx
+	jmp 100f
+34:
+	shl $6,%ecx
+	lea -24(%ecx,%edx),%edx
+	jmp 100f
+35:
+	shl $6,%ecx
+	lea -32(%ecx,%edx),%edx
+	jmp 100f
+36:
+	shl $6,%ecx
+	lea -40(%ecx,%edx),%edx
+	jmp 100f
+37:
+	shl $6,%ecx
+	lea -48(%ecx,%edx),%edx
+	jmp 100f
+38:
+	shl $6,%ecx
+	lea -56(%ecx,%edx),%edx
+	jmp 100f
+39:
+	lea (%rdx,%rcx,8),%rdx
+	jmp 100f
+40:
+	mov %ecx,%edx
+100:
+	sfence
+
+	/* %rax set the fault number in fixup_exception() */
+	ret
+	.previous
+
+	_ASM_EXTABLE_FAULT(0b,30b)
+	_ASM_EXTABLE_FAULT(1b,31b)
+	_ASM_EXTABLE_FAULT(2b,32b)
+	_ASM_EXTABLE_FAULT(3b,33b)
+	_ASM_EXTABLE_FAULT(4b,34b)
+	_ASM_EXTABLE_FAULT(9b,35b)
+	_ASM_EXTABLE_FAULT(10b,36b)
+	_ASM_EXTABLE_FAULT(11b,37b)
+	_ASM_EXTABLE_FAULT(12b,38b)
+	_ASM_EXTABLE_FAULT(18b,39b)
+	_ASM_EXTABLE_FAULT(21b,40b)
+ENDPROC(__mcsafe_copy)
+#endif