diff mbox series

[RFC,v9,11/13] xpfo, mm: optimize spinlock usage in xpfo_kunmap

Message ID 5bab13e12d4215112ad2180106cc6bb9b513754a.1554248002.git.khalid.aziz@oracle.com (mailing list archive)
State New, archived
Headers show
Series Add support for eXclusive Page Frame Ownership | expand

Commit Message

Khalid Aziz April 3, 2019, 5:34 p.m. UTC
From: Julian Stecklina <jsteckli@amazon.de>

Only the xpfo_kunmap call that needs to actually unmap the page
needs to be serialized. We need to be careful to handle the case,
where after the atomic decrement of the mapcount, a xpfo_kmap
increased the mapcount again. In this case, we can safely skip
modifying the page table.

Model-checked with up to 4 concurrent callers with Spin.

Signed-off-by: Julian Stecklina <jsteckli@amazon.de>
Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
Cc: Khalid Aziz <khalid@gonehiking.org>
Cc: x86@kernel.org
Cc: kernel-hardening@lists.openwall.com
Cc: Vasileios P. Kemerlis <vpk@cs.columbia.edu>
Cc: Juerg Haefliger <juerg.haefliger@canonical.com>
Cc: Tycho Andersen <tycho@tycho.ws>
Cc: Marco Benatto <marco.antonio.780@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
---
 include/linux/xpfo.h | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

Comments

Peter Zijlstra April 4, 2019, 7:56 a.m. UTC | #1
On Wed, Apr 03, 2019 at 11:34:12AM -0600, Khalid Aziz wrote:
> From: Julian Stecklina <jsteckli@amazon.de>
> 
> Only the xpfo_kunmap call that needs to actually unmap the page
> needs to be serialized. We need to be careful to handle the case,
> where after the atomic decrement of the mapcount, a xpfo_kmap
> increased the mapcount again. In this case, we can safely skip
> modifying the page table.
> 
> Model-checked with up to 4 concurrent callers with Spin.
> 
> Signed-off-by: Julian Stecklina <jsteckli@amazon.de>
> Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
> Cc: Khalid Aziz <khalid@gonehiking.org>
> Cc: x86@kernel.org
> Cc: kernel-hardening@lists.openwall.com
> Cc: Vasileios P. Kemerlis <vpk@cs.columbia.edu>
> Cc: Juerg Haefliger <juerg.haefliger@canonical.com>
> Cc: Tycho Andersen <tycho@tycho.ws>
> Cc: Marco Benatto <marco.antonio.780@gmail.com>
> Cc: David Woodhouse <dwmw2@infradead.org>
> ---
>  include/linux/xpfo.h | 24 +++++++++++++++---------
>  1 file changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/xpfo.h b/include/linux/xpfo.h
> index 2318c7eb5fb7..37e7f52fa6ce 100644
> --- a/include/linux/xpfo.h
> +++ b/include/linux/xpfo.h
> @@ -61,6 +61,7 @@ static inline void xpfo_kmap(void *kaddr, struct page *page)
>  static inline void xpfo_kunmap(void *kaddr, struct page *page)
>  {
>  	unsigned long flags;
> +	bool flush_tlb = false;
>  
>  	if (!static_branch_unlikely(&xpfo_inited))
>  		return;
> @@ -72,18 +73,23 @@ static inline void xpfo_kunmap(void *kaddr, struct page *page)
>  	 * The page is to be allocated back to user space, so unmap it from
>  	 * the kernel, flush the TLB and tag it as a user page.
>  	 */
> -	spin_lock_irqsave(&page->xpfo_lock, flags);
> -
>  	if (atomic_dec_return(&page->xpfo_mapcount) == 0) {
> -#ifdef CONFIG_XPFO_DEBUG
> -		WARN_ON(PageXpfoUnmapped(page));
> -#endif
> -		SetPageXpfoUnmapped(page);
> -		set_kpte(kaddr, page, __pgprot(0));
> -		xpfo_flush_kernel_tlb(page, 0);
> +		spin_lock_irqsave(&page->xpfo_lock, flags);
> +
> +		/*
> +		 * In the case, where we raced with kmap after the
> +		 * atomic_dec_return, we must not nuke the mapping.
> +		 */
> +		if (atomic_read(&page->xpfo_mapcount) == 0) {
> +			SetPageXpfoUnmapped(page);
> +			set_kpte(kaddr, page, __pgprot(0));
> +			flush_tlb = true;
> +		}
> +		spin_unlock_irqrestore(&page->xpfo_lock, flags);
>  	}
>  
> -	spin_unlock_irqrestore(&page->xpfo_lock, flags);
> +	if (flush_tlb)
> +		xpfo_flush_kernel_tlb(page, 0);
>  }

This doesn't help with the TLB invalidation issue, AFAICT this is still
completely buggered. kunmap_atomic() can be called from IRQ context.
Khalid Aziz April 4, 2019, 4:06 p.m. UTC | #2
On 4/4/19 1:56 AM, Peter Zijlstra wrote:
> On Wed, Apr 03, 2019 at 11:34:12AM -0600, Khalid Aziz wrote:
>> From: Julian Stecklina <jsteckli@amazon.de>
>>
>> Only the xpfo_kunmap call that needs to actually unmap the page
>> needs to be serialized. We need to be careful to handle the case,
>> where after the atomic decrement of the mapcount, a xpfo_kmap
>> increased the mapcount again. In this case, we can safely skip
>> modifying the page table.
>>
>> Model-checked with up to 4 concurrent callers with Spin.
>>
>> Signed-off-by: Julian Stecklina <jsteckli@amazon.de>
>> Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
>> Cc: Khalid Aziz <khalid@gonehiking.org>
>> Cc: x86@kernel.org
>> Cc: kernel-hardening@lists.openwall.com
>> Cc: Vasileios P. Kemerlis <vpk@cs.columbia.edu>
>> Cc: Juerg Haefliger <juerg.haefliger@canonical.com>
>> Cc: Tycho Andersen <tycho@tycho.ws>
>> Cc: Marco Benatto <marco.antonio.780@gmail.com>
>> Cc: David Woodhouse <dwmw2@infradead.org>
>> ---
>>  include/linux/xpfo.h | 24 +++++++++++++++---------
>>  1 file changed, 15 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/linux/xpfo.h b/include/linux/xpfo.h
>> index 2318c7eb5fb7..37e7f52fa6ce 100644
>> --- a/include/linux/xpfo.h
>> +++ b/include/linux/xpfo.h
>> @@ -61,6 +61,7 @@ static inline void xpfo_kmap(void *kaddr, struct page *page)
>>  static inline void xpfo_kunmap(void *kaddr, struct page *page)
>>  {
>>  	unsigned long flags;
>> +	bool flush_tlb = false;
>>  
>>  	if (!static_branch_unlikely(&xpfo_inited))
>>  		return;
>> @@ -72,18 +73,23 @@ static inline void xpfo_kunmap(void *kaddr, struct page *page)
>>  	 * The page is to be allocated back to user space, so unmap it from
>>  	 * the kernel, flush the TLB and tag it as a user page.
>>  	 */
>> -	spin_lock_irqsave(&page->xpfo_lock, flags);
>> -
>>  	if (atomic_dec_return(&page->xpfo_mapcount) == 0) {
>> -#ifdef CONFIG_XPFO_DEBUG
>> -		WARN_ON(PageXpfoUnmapped(page));
>> -#endif
>> -		SetPageXpfoUnmapped(page);
>> -		set_kpte(kaddr, page, __pgprot(0));
>> -		xpfo_flush_kernel_tlb(page, 0);
>> +		spin_lock_irqsave(&page->xpfo_lock, flags);
>> +
>> +		/*
>> +		 * In the case, where we raced with kmap after the
>> +		 * atomic_dec_return, we must not nuke the mapping.
>> +		 */
>> +		if (atomic_read(&page->xpfo_mapcount) == 0) {
>> +			SetPageXpfoUnmapped(page);
>> +			set_kpte(kaddr, page, __pgprot(0));
>> +			flush_tlb = true;
>> +		}
>> +		spin_unlock_irqrestore(&page->xpfo_lock, flags);
>>  	}
>>  
>> -	spin_unlock_irqrestore(&page->xpfo_lock, flags);
>> +	if (flush_tlb)
>> +		xpfo_flush_kernel_tlb(page, 0);
>>  }
> 
> This doesn't help with the TLB invalidation issue, AFAICT this is still
> completely buggered. kunmap_atomic() can be called from IRQ context.
> 

OK. xpfo_kmap/xpfo_kunmap need redesign.

--
Khalid
diff mbox series

Patch

diff --git a/include/linux/xpfo.h b/include/linux/xpfo.h
index 2318c7eb5fb7..37e7f52fa6ce 100644
--- a/include/linux/xpfo.h
+++ b/include/linux/xpfo.h
@@ -61,6 +61,7 @@  static inline void xpfo_kmap(void *kaddr, struct page *page)
 static inline void xpfo_kunmap(void *kaddr, struct page *page)
 {
 	unsigned long flags;
+	bool flush_tlb = false;
 
 	if (!static_branch_unlikely(&xpfo_inited))
 		return;
@@ -72,18 +73,23 @@  static inline void xpfo_kunmap(void *kaddr, struct page *page)
 	 * The page is to be allocated back to user space, so unmap it from
 	 * the kernel, flush the TLB and tag it as a user page.
 	 */
-	spin_lock_irqsave(&page->xpfo_lock, flags);
-
 	if (atomic_dec_return(&page->xpfo_mapcount) == 0) {
-#ifdef CONFIG_XPFO_DEBUG
-		WARN_ON(PageXpfoUnmapped(page));
-#endif
-		SetPageXpfoUnmapped(page);
-		set_kpte(kaddr, page, __pgprot(0));
-		xpfo_flush_kernel_tlb(page, 0);
+		spin_lock_irqsave(&page->xpfo_lock, flags);
+
+		/*
+		 * In the case, where we raced with kmap after the
+		 * atomic_dec_return, we must not nuke the mapping.
+		 */
+		if (atomic_read(&page->xpfo_mapcount) == 0) {
+			SetPageXpfoUnmapped(page);
+			set_kpte(kaddr, page, __pgprot(0));
+			flush_tlb = true;
+		}
+		spin_unlock_irqrestore(&page->xpfo_lock, flags);
 	}
 
-	spin_unlock_irqrestore(&page->xpfo_lock, flags);
+	if (flush_tlb)
+		xpfo_flush_kernel_tlb(page, 0);
 }
 
 void xpfo_alloc_pages(struct page *page, int order, gfp_t gfp, bool will_map);