Message ID | 20211018202542.584115-6-tony.luck@intel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Basic recovery for machine checks inside SGX | expand |
On Mon, Oct 18, 2021 at 01:25:40PM -0700, Tony Luck wrote: > Add a call inside memory_failure() to call the arch specific code > to check if the address is an SGX EPC page and handle it. > > Note the SGX EPC pages do not have a "struct page" entry, so the hook > goes in at the same point as the device mapping hook. > > Pull the call to acquire the mutex earlier so the SGX errors are also > protected. > > Make set_mce_nospec() skip SGX pages when trying to adjust > the 1:1 map. > > Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org> > Tested-by: Reinette Chatre <reinette.chatre@intel.com> > Signed-off-by: Tony Luck <tony.luck@intel.com> > --- ... > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 73a52aba448f..62b199ed5ec6 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -3284,5 +3284,19 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) > return 0; > } > > +#ifndef arch_memory_failure > +static inline int arch_memory_failure(unsigned long pfn, int flags) > +{ > + return -ENXIO; > +} > +#endif > + > +#ifndef arch_is_platform_page > +static inline bool arch_is_platform_page(u64 paddr) > +{ > + return false; > +} > +#endif > + How about putting these definitions near the other related functions in the same file (like below)? ... extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); // here? /* * Error handlers for various types of pages. */ enum mf_result { Otherwise, the patch looks good to me. Reviewed-by: Naoya Horiguchi <naoya.horiguchi@nec.com> Thanks, Naoya Horiguchi
> How about putting these definitions near the other related functions > in the same file (like below)? > > ... > extern void shake_page(struct page *p); > extern atomic_long_t num_poisoned_pages __read_mostly; > extern int soft_offline_page(unsigned long pfn, int flags); > > // here? Makes sense to group together with these other RAS bits. I'll move the definitions here. > Otherwise, the patch looks good to me. > > Reviewed-by: Naoya Horiguchi <naoya.horiguchi@nec.com> Thanks for the review! -Tony
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9ad2acaaae9b..4865f2860a4f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -853,4 +853,12 @@ enum mds_mitigations { MDS_MITIGATION_VMWERV, }; +#ifdef CONFIG_X86_SGX +int arch_memory_failure(unsigned long pfn, int flags); +#define arch_memory_failure arch_memory_failure + +bool arch_is_platform_page(u64 paddr); +#define arch_is_platform_page arch_is_platform_page +#endif + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 43fa081a1adb..ce8dd215f5b3 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_SET_MEMORY_H #define _ASM_X86_SET_MEMORY_H +#include <linux/mm.h> #include <asm/page.h> #include <asm-generic/set_memory.h> @@ -98,6 +99,9 @@ static inline int set_mce_nospec(unsigned long pfn, bool unmap) unsigned long decoy_addr; int rc; + /* SGX pages are not in the 1:1 map */ + if (arch_is_platform_page(pfn << PAGE_SHIFT)) + return 0; /* * We would like to just call: * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); diff --git a/include/linux/mm.h b/include/linux/mm.h index 73a52aba448f..62b199ed5ec6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3284,5 +3284,19 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) return 0; } +#ifndef arch_memory_failure +static inline int arch_memory_failure(unsigned long pfn, int flags) +{ + return -ENXIO; +} +#endif + +#ifndef arch_is_platform_page +static inline bool arch_is_platform_page(u64 paddr) +{ + return false; +} +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 3e6449f2102a..b1cbf9845c19 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1632,21 +1632,28 @@ int memory_failure(unsigned long pfn, int flags) if (!sysctl_memory_failure_recovery) panic("Memory failure on page %lx", pfn); + mutex_lock(&mf_mutex); + p = pfn_to_online_page(pfn); if (!p) { + res = arch_memory_failure(pfn, flags); + if (res == 0) + goto unlock_mutex; + if (pfn_valid(pfn)) { pgmap = get_dev_pagemap(pfn, NULL); - if (pgmap) - return memory_failure_dev_pagemap(pfn, flags, - pgmap); + if (pgmap) { + res = memory_failure_dev_pagemap(pfn, flags, + pgmap); + goto unlock_mutex; + } } pr_err("Memory failure: %#lx: memory outside kernel control\n", pfn); - return -ENXIO; + res = -ENXIO; + goto unlock_mutex; } - mutex_lock(&mf_mutex); - try_again: if (PageHuge(p)) { res = memory_failure_hugetlb(pfn, flags);