diff mbox series

[v5,1/7] mce: fix set_mce_nospec to always unmap the whole page

Message ID 20220128213150.1333552-2-jane.chu@oracle.com (mailing list archive)
State New, archived
Headers show
Series DAX poison recovery | expand

Commit Message

Jane Chu Jan. 28, 2022, 9:31 p.m. UTC
Mark poisoned page as not present, and to reverse the 'np' effect,
restate the _PAGE_PRESENT bit. Please refer to discussions here for
reason behind the decision.
https://lore.kernel.org/all/CAPcyv4hrXPb1tASBZUg-GgdVs0OOFKXMXLiHmktg_kFi7YBMyQ@mail.gmail.com/

Fixes: 284ce4011ba6 ("x86/memory_failure: Introduce {set, clear}_mce_nospec()")
Signed-off-by: Jane Chu <jane.chu@oracle.com>
---
 arch/x86/include/asm/set_memory.h | 17 +++++------------
 arch/x86/kernel/cpu/mce/core.c    |  6 +++---
 arch/x86/mm/pat/set_memory.c      |  8 +++++++-
 include/linux/set_memory.h        |  2 +-
 4 files changed, 16 insertions(+), 17 deletions(-)

Comments

Christoph Hellwig Feb. 2, 2022, 1:21 p.m. UTC | #1
> +static inline int set_mce_nospec(unsigned long pfn)
>  {
>  	unsigned long decoy_addr;
>  	int rc;
> @@ -117,10 +113,7 @@ static inline int set_mce_nospec(unsigned long pfn, bool unmap)
>  	 */
>  	decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
>  
> -	if (unmap)
> -		rc = set_memory_np(decoy_addr, 1);
> -	else
> -		rc = set_memory_uc(decoy_addr, 1);
> +	rc = set_memory_np(decoy_addr, 1);
>  	if (rc)
>  		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
>  	return rc;
> @@ -130,7 +123,7 @@ static inline int set_mce_nospec(unsigned long pfn, bool unmap)
>  /* Restore full speculative operation to the pfn. */
>  static inline int clear_mce_nospec(unsigned long pfn)
>  {
> -	return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
> +	return _set_memory_present((unsigned long) pfn_to_kaddr(pfn), 1);
>  }

Wouldn't it make more sense to move these helpers out of line rather
than exporting _set_memory_present?

>  /*
> - * _set_memory_prot is an internal helper for callers that have been passed
> + * __set_memory_prot is an internal helper for callers that have been passed

This looks unrelated to the patch.
Jane Chu Feb. 2, 2022, 9:20 p.m. UTC | #2
On 2/2/2022 5:21 AM, Christoph Hellwig wrote:
>> +static inline int set_mce_nospec(unsigned long pfn)
>>   {
>>   	unsigned long decoy_addr;
>>   	int rc;
>> @@ -117,10 +113,7 @@ static inline int set_mce_nospec(unsigned long pfn, bool unmap)
>>   	 */
>>   	decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
>>   
>> -	if (unmap)
>> -		rc = set_memory_np(decoy_addr, 1);
>> -	else
>> -		rc = set_memory_uc(decoy_addr, 1);
>> +	rc = set_memory_np(decoy_addr, 1);
>>   	if (rc)
>>   		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
>>   	return rc;
>> @@ -130,7 +123,7 @@ static inline int set_mce_nospec(unsigned long pfn, bool unmap)
>>   /* Restore full speculative operation to the pfn. */
>>   static inline int clear_mce_nospec(unsigned long pfn)
>>   {
>> -	return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
>> +	return _set_memory_present((unsigned long) pfn_to_kaddr(pfn), 1);
>>   }
> 
> Wouldn't it make more sense to move these helpers out of line rather
> than exporting _set_memory_present?

Do you mean to move
   return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
into clear_mce_nospec() for the x86 arch and get rid of _set_memory_present?
If so, sure I'll do that.

> 
>>   /*
>> - * _set_memory_prot is an internal helper for callers that have been passed
>> + * __set_memory_prot is an internal helper for callers that have been passed
> 
> This looks unrelated to the patch.

My bad, will remove the remnant.

thanks!
-jane
Jane Chu Feb. 2, 2022, 11:07 p.m. UTC | #3
On 2/2/2022 1:20 PM, Jane Chu wrote:
> On 2/2/2022 5:21 AM, Christoph Hellwig wrote:
>>> +static inline int set_mce_nospec(unsigned long pfn)
>>>    {
>>>    	unsigned long decoy_addr;
>>>    	int rc;
>>> @@ -117,10 +113,7 @@ static inline int set_mce_nospec(unsigned long pfn, bool unmap)
>>>    	 */
>>>    	decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
>>>    
>>> -	if (unmap)
>>> -		rc = set_memory_np(decoy_addr, 1);
>>> -	else
>>> -		rc = set_memory_uc(decoy_addr, 1);
>>> +	rc = set_memory_np(decoy_addr, 1);
>>>    	if (rc)
>>>    		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
>>>    	return rc;
>>> @@ -130,7 +123,7 @@ static inline int set_mce_nospec(unsigned long pfn, bool unmap)
>>>    /* Restore full speculative operation to the pfn. */
>>>    static inline int clear_mce_nospec(unsigned long pfn)
>>>    {
>>> -	return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
>>> +	return _set_memory_present((unsigned long) pfn_to_kaddr(pfn), 1);
>>>    }
>>
>> Wouldn't it make more sense to move these helpers out of line rather
>> than exporting _set_memory_present?
> 
> Do you mean to move
>     return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
> into clear_mce_nospec() for the x86 arch and get rid of _set_memory_present?
> If so, sure I'll do that.

Looks like I can't do that.  It's either exporting 
_set_memory_present(), or exporting change_page_attr_set().  Perhaps the 
former is more conventional?

-jane
Christoph Hellwig Feb. 3, 2022, 1:42 p.m. UTC | #4
On Wed, Feb 02, 2022 at 11:07:37PM +0000, Jane Chu wrote:
> On 2/2/2022 1:20 PM, Jane Chu wrote:
> >> Wouldn't it make more sense to move these helpers out of line rather
> >> than exporting _set_memory_present?
> > 
> > Do you mean to move
> >     return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
> > into clear_mce_nospec() for the x86 arch and get rid of _set_memory_present?
> > If so, sure I'll do that.
> 
> Looks like I can't do that.  It's either exporting 
> _set_memory_present(), or exporting change_page_attr_set().  Perhaps the 
> former is more conventional?

These helpers above means set_mce_nospec and clear_mce_nospec.  If they
are moved to normal functions instead of inlines, there is no need to
export the internals at all.
Dan Williams Feb. 4, 2022, 5:23 a.m. UTC | #5
On Thu, Feb 3, 2022 at 5:42 AM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Wed, Feb 02, 2022 at 11:07:37PM +0000, Jane Chu wrote:
> > On 2/2/2022 1:20 PM, Jane Chu wrote:
> > >> Wouldn't it make more sense to move these helpers out of line rather
> > >> than exporting _set_memory_present?
> > >
> > > Do you mean to move
> > >     return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
> > > into clear_mce_nospec() for the x86 arch and get rid of _set_memory_present?
> > > If so, sure I'll do that.
> >
> > Looks like I can't do that.  It's either exporting
> > _set_memory_present(), or exporting change_page_attr_set().  Perhaps the
> > former is more conventional?
>
> These helpers above means set_mce_nospec and clear_mce_nospec.  If they
> are moved to normal functions instead of inlines, there is no need to
> export the internals at all.

Agree, {set,clear}_mce_nospec() can just move to arch/x86/mm/pat/set_memory.c.
Jane Chu Feb. 6, 2022, 8:30 a.m. UTC | #6
On 2/3/2022 9:23 PM, Dan Williams wrote:
> On Thu, Feb 3, 2022 at 5:42 AM Christoph Hellwig <hch@infradead.org> wrote:
>>
>> On Wed, Feb 02, 2022 at 11:07:37PM +0000, Jane Chu wrote:
>>> On 2/2/2022 1:20 PM, Jane Chu wrote:
>>>>> Wouldn't it make more sense to move these helpers out of line rather
>>>>> than exporting _set_memory_present?
>>>>
>>>> Do you mean to move
>>>>      return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
>>>> into clear_mce_nospec() for the x86 arch and get rid of _set_memory_present?
>>>> If so, sure I'll do that.
>>>
>>> Looks like I can't do that.  It's either exporting
>>> _set_memory_present(), or exporting change_page_attr_set().  Perhaps the
>>> former is more conventional?
>>
>> These helpers above means set_mce_nospec and clear_mce_nospec.  If they
>> are moved to normal functions instead of inlines, there is no need to
>> export the internals at all.
> 
> Agree, {set,clear}_mce_nospec() can just move to arch/x86/mm/pat/set_memory.c.

Got it, will do.

thanks!
-jane
diff mbox series

Patch

diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index ff0f2d90338a..aef6677da291 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -50,6 +50,7 @@  int set_memory_decrypted(unsigned long addr, int numpages);
 int set_memory_np_noalias(unsigned long addr, int numpages);
 int set_memory_nonglobal(unsigned long addr, int numpages);
 int set_memory_global(unsigned long addr, int numpages);
+int _set_memory_present(unsigned long addr, int numpages);
 
 int set_pages_array_uc(struct page **pages, int addrinarray);
 int set_pages_array_wc(struct page **pages, int addrinarray);
@@ -89,13 +90,8 @@  void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
 extern int kernel_set_to_readonly;
 
 #ifdef CONFIG_X86_64
-/*
- * Prevent speculative access to the page by either unmapping
- * it (if we do not require access to any part of the page) or
- * marking it uncacheable (if we want to try to retrieve data
- * from non-poisoned lines in the page).
- */
-static inline int set_mce_nospec(unsigned long pfn, bool unmap)
+/* Prevent speculative access to a page by marking it not-present */
+static inline int set_mce_nospec(unsigned long pfn)
 {
 	unsigned long decoy_addr;
 	int rc;
@@ -117,10 +113,7 @@  static inline int set_mce_nospec(unsigned long pfn, bool unmap)
 	 */
 	decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
 
-	if (unmap)
-		rc = set_memory_np(decoy_addr, 1);
-	else
-		rc = set_memory_uc(decoy_addr, 1);
+	rc = set_memory_np(decoy_addr, 1);
 	if (rc)
 		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
 	return rc;
@@ -130,7 +123,7 @@  static inline int set_mce_nospec(unsigned long pfn, bool unmap)
 /* Restore full speculative operation to the pfn. */
 static inline int clear_mce_nospec(unsigned long pfn)
 {
-	return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
+	return _set_memory_present((unsigned long) pfn_to_kaddr(pfn), 1);
 }
 #define clear_mce_nospec clear_mce_nospec
 #else
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5818b837fd4d..8d12739f283d 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -613,7 +613,7 @@  static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
 
 	pfn = mce->addr >> PAGE_SHIFT;
 	if (!memory_failure(pfn, 0)) {
-		set_mce_nospec(pfn, whole_page(mce));
+		set_mce_nospec(pfn);
 		mce->kflags |= MCE_HANDLED_UC;
 	}
 
@@ -1297,7 +1297,7 @@  static void kill_me_maybe(struct callback_head *cb)
 
 	ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
 	if (!ret) {
-		set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
+		set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
 		sync_core();
 		return;
 	}
@@ -1321,7 +1321,7 @@  static void kill_me_never(struct callback_head *cb)
 	p->mce_count = 0;
 	pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
 	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
-		set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
+		set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
 }
 
 static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index b4072115c8ef..68d84c8bd977 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1816,7 +1816,7 @@  static inline int cpa_clear_pages_array(struct page **pages, int numpages,
 }
 
 /*
- * _set_memory_prot is an internal helper for callers that have been passed
+ * __set_memory_prot is an internal helper for callers that have been passed
  * a pgprot_t value from upper layers and a reservation has already been taken.
  * If you want to set the pgprot to a specific page protocol, use the
  * set_memory_xx() functions.
@@ -1983,6 +1983,12 @@  int set_memory_global(unsigned long addr, int numpages)
 				    __pgprot(_PAGE_GLOBAL), 0);
 }
 
+int _set_memory_present(unsigned long addr, int numpages)
+{
+	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
+}
+EXPORT_SYMBOL_GPL(_set_memory_present);
+
 /*
  * __set_memory_enc_pgtable() is used for the hypervisors that get
  * informed about "encryption" status via page tables.
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index f36be5166c19..9ad898d40e7e 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -43,7 +43,7 @@  static inline bool can_set_direct_map(void)
 #endif /* CONFIG_ARCH_HAS_SET_DIRECT_MAP */
 
 #ifndef set_mce_nospec
-static inline int set_mce_nospec(unsigned long pfn, bool unmap)
+static inline int set_mce_nospec(unsigned long pfn)
 {
 	return 0;
 }