diff mbox

[v11,2/12] x86, mm, pat: Refactor !pat_enabled handling

Message ID 1432940350-1802-3-git-send-email-toshi.kani@hp.com
State Superseded
Headers show

Commit Message

Toshi Kani May 29, 2015, 10:59 p.m. UTC
From: Toshi Kani <toshi.kani@hp.com>

This patch refactors the !pat_enabled code paths and integrates
them into the PAT abstraction code.  The PAT table is emulated by
corresponding to the two cache attribute bits, PWT (Write Through)
and PCD (Cache Disable).  The emulated PAT table is the same as the
BIOS default setup when the system has PAT but the "nopat" boot
option is specified.  The emulated PAT table is also used when
MSR_IA32_CR_PAT returns 0 -- 9d34cfdf4796 ("x86: Don't rely on
VMWare emulating PAT MSR correctly").

Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/mm/init.c     |    6 ++-
 arch/x86/mm/iomap_32.c |   12 +++---
 arch/x86/mm/ioremap.c  |    5 +--
 arch/x86/mm/pageattr.c |    3 --
 arch/x86/mm/pat.c      |   95 +++++++++++++++++++++++++++++-------------------
 5 files changed, 67 insertions(+), 54 deletions(-)

Comments

Borislav Petkov May 31, 2015, 9:46 a.m. UTC | #1
On Fri, May 29, 2015 at 04:59:00PM -0600, Toshi Kani wrote:
> From: Toshi Kani <toshi.kani@hp.com>
> 
> This patch refactors the !pat_enabled code paths and integrates
> them into the PAT abstraction code.  The PAT table is emulated by
> corresponding to the two cache attribute bits, PWT (Write Through)
> and PCD (Cache Disable).  The emulated PAT table is the same as the
> BIOS default setup when the system has PAT but the "nopat" boot
> option is specified.  The emulated PAT table is also used when
> MSR_IA32_CR_PAT returns 0 -- 9d34cfdf4796 ("x86: Don't rely on
> VMWare emulating PAT MSR correctly").

To be honest, I wasn't surprised when you sent me the same patch and
ignored most of my comments. For the future, please let me know if I'm
wasting my time with commenting on your stuff so that I can plan my work
and not waste time and energy reviewing, ok?

Unfortunately, if you want something done right, you have to do it
yourself.

So I did that, I split that ugly cleanup into something much more
readable, patches as a reply to this message.

Feel free to base your work ontop of

git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git#tip-mm-2
Toshi Kani June 1, 2015, 3:50 p.m. UTC | #2
On Sun, 2015-05-31 at 11:46 +0200, Borislav Petkov wrote:
> On Fri, May 29, 2015 at 04:59:00PM -0600, Toshi Kani wrote:
> > From: Toshi Kani <toshi.kani@hp.com>
> > 
> > This patch refactors the !pat_enabled code paths and integrates
> > them into the PAT abstraction code.  The PAT table is emulated by
> > corresponding to the two cache attribute bits, PWT (Write Through)
> > and PCD (Cache Disable).  The emulated PAT table is the same as the
> > BIOS default setup when the system has PAT but the "nopat" boot
> > option is specified.  The emulated PAT table is also used when
> > MSR_IA32_CR_PAT returns 0 -- 9d34cfdf4796 ("x86: Don't rely on
> > VMWare emulating PAT MSR correctly").
> 
> To be honest, I wasn't surprised when you sent me the same patch and
> ignored most of my comments. For the future, please let me know if I'm
> wasting my time with commenting on your stuff so that I can plan my work
> and not waste time and energy reviewing, ok?

I apologize that I overlooked a comment requesting to divide this
refactor patch into smaller patches.

> Unfortunately, if you want something done right, you have to do it
> yourself.
>
> So I did that, I split that ugly cleanup into something much more
> readable, patches as a reply to this message.
> 
> Feel free to base your work ontop of
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git#tip-mm-2
> 

I will look into your changes, and rebase the patchset.

Thanks,
-Toshi
diff mbox

Patch

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 1d55318..8533b46 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -40,7 +40,7 @@ 
  */
 uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
 	[_PAGE_CACHE_MODE_WB      ]	= 0         | 0        ,
-	[_PAGE_CACHE_MODE_WC      ]	= _PAGE_PWT | 0        ,
+	[_PAGE_CACHE_MODE_WC      ]	= 0         | _PAGE_PCD,
 	[_PAGE_CACHE_MODE_UC_MINUS]	= 0         | _PAGE_PCD,
 	[_PAGE_CACHE_MODE_UC      ]	= _PAGE_PWT | _PAGE_PCD,
 	[_PAGE_CACHE_MODE_WT      ]	= 0         | _PAGE_PCD,
@@ -50,11 +50,11 @@  EXPORT_SYMBOL(__cachemode2pte_tbl);
 
 uint8_t __pte2cachemode_tbl[8] = {
 	[__pte2cm_idx( 0        | 0         | 0        )] = _PAGE_CACHE_MODE_WB,
-	[__pte2cm_idx(_PAGE_PWT | 0         | 0        )] = _PAGE_CACHE_MODE_WC,
+	[__pte2cm_idx(_PAGE_PWT | 0         | 0        )] = _PAGE_CACHE_MODE_UC_MINUS,
 	[__pte2cm_idx( 0        | _PAGE_PCD | 0        )] = _PAGE_CACHE_MODE_UC_MINUS,
 	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0        )] = _PAGE_CACHE_MODE_UC,
 	[__pte2cm_idx( 0        | 0         | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
-	[__pte2cm_idx(_PAGE_PWT | 0         | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
+	[__pte2cm_idx(_PAGE_PWT | 0         | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
 	[__pte2cm_idx(0         | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
 	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
 };
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 37d2ba2..9c0ff04 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -78,13 +78,13 @@  void __iomem *
 iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
 {
 	/*
-	 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
-	 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
-	 * MTRR is UC or WC.  UC_MINUS gets the real intention, of the
-	 * user, which is "WC if the MTRR is WC, UC if you can't do that."
+	 * For non-PAT systems, translate non-WB request to UC- just in
+	 * case the caller set the PWT bit to prot directly without using
+	 * pgprot_writecombine(). UC- translates to uncached if the MTRR
+	 * is UC or WC. UC- gets the real intention, of the user, which is
+	 * "WC if the MTRR is WC, UC if you can't do that."
 	 */
-	if (!pat_enabled() && pgprot_val(prot) ==
-	    (__PAGE_KERNEL | cachemode2protval(_PAGE_CACHE_MODE_WC)))
+	if (!pat_enabled() && pgprot2cachemode(prot) != _PAGE_CACHE_MODE_WB)
 		prot = __pgprot(__PAGE_KERNEL |
 				cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
 
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 938609e..078c270 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -292,11 +292,8 @@  EXPORT_SYMBOL_GPL(ioremap_uc);
  */
 void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
 {
-	if (pat_enabled())
-		return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
+	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
 					__builtin_return_address(0));
-	else
-		return ioremap_nocache(phys_addr, size);
 }
 EXPORT_SYMBOL(ioremap_wc);
 
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 70d221f..94aae76 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1571,9 +1571,6 @@  int set_memory_wc(unsigned long addr, int numpages)
 {
 	int ret;
 
-	if (!pat_enabled())
-		return set_memory_uc(addr, numpages);
-
 	ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
 		_PAGE_CACHE_MODE_WC, NULL);
 	if (ret)
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index e1ec6a7..819ae28 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -182,7 +182,11 @@  void pat_init_cache_modes(void)
 	char pat_msg[33];
 	u64 pat;
 
-	rdmsrl(MSR_IA32_CR_PAT, pat);
+	if (pat_enabled())
+		rdmsrl(MSR_IA32_CR_PAT, pat);
+	else
+		pat = boot_pat_state;
+
 	pat_msg[32] = 0;
 	for (i = 7; i >= 0; i--) {
 		cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
@@ -199,21 +203,16 @@  void pat_init(void)
 	u64 pat;
 	static bool boot_cpu_done;
 
-	if (!pat_enabled())
-		return;
-
 	if (!boot_cpu_done) {
-		if (!cpu_has_pat) {
+		if (!cpu_has_pat)
 			pat_disable("PAT not supported by CPU.");
-			return;
-		}
 
-		rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
-		if (!boot_pat_state) {
-			pat_disable("PAT read returns always zero, disabled.");
-			return;
+		if (pat_enabled()) {
+			rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
+			if (!boot_pat_state)
+				pat_disable("PAT read returns always zero, disabled.");
 		}
-	} else if (!cpu_has_pat) {
+	} else if (!cpu_has_pat && pat_enabled()) {
 		/*
 		 * If this happens we are on a secondary CPU, but
 		 * switched to PAT on the boot CPU. We have no way to
@@ -222,23 +221,50 @@  void pat_init(void)
 		panic("PAT enabled, but not supported by secondary CPU\n");
 	}
 
-	/* Set PWT to Write-Combining. All other bits stay the same */
-	/*
-	 * PTE encoding used in Linux:
-	 *      PAT
-	 *      |PCD
-	 *      ||PWT
-	 *      |||
-	 *      000 WB		_PAGE_CACHE_WB
-	 *      001 WC		_PAGE_CACHE_WC
-	 *      010 UC-		_PAGE_CACHE_UC_MINUS
-	 *      011 UC		_PAGE_CACHE_UC
-	 * PAT bit unused
-	 */
-	pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
-	      PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
+	if (!pat_enabled()) {
+		/*
+		 * No PAT. Emulate the PAT table that corresponds to the two
+		 * cache bits, PWT (Write Through) and PCD (Cache Disable).
+		 * This setup is the same as the BIOS default setup when the
+		 * system has PAT but the "nopat" boot option is specified.
+		 * This emulated PAT table is also used when MSR_IA32_CR_PAT
+		 * returns 0.
+		 *
+		 *  PTE encoding used in Linux:
+		 *       PCD
+		 *       |PWT  PAT
+		 *       ||    slot
+		 *       00    0    WB : _PAGE_CACHE_MODE_WB
+		 *       01    1    WT : _PAGE_CACHE_MODE_WT
+		 *       10    2    UC-: _PAGE_CACHE_MODE_UC_MINUS
+		 *       11    3    UC : _PAGE_CACHE_MODE_UC
+		 *
+		 * NOTE: When WC or WP is used, it is redirected to UC- per
+		 * the default setup in __cachemode2pte_tbl[].
+		 */
+		pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
+		      PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
+		if (!boot_pat_state)
+			boot_pat_state = pat;
+	} else {
+		/*
+		 * PTE encoding used in Linux:
+		 *      PAT
+		 *      |PCD
+		 *      ||PWT
+		 *      |||
+		 *      000 WB		_PAGE_CACHE_WB
+		 *      001 WC		_PAGE_CACHE_WC
+		 *      010 UC-		_PAGE_CACHE_UC_MINUS
+		 *      011 UC		_PAGE_CACHE_UC
+		 * PAT bit unused
+		 */
+		pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
+		      PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
+	}
 
-	wrmsrl(MSR_IA32_CR_PAT, pat);
+	if (pat_enabled())
+		wrmsrl(MSR_IA32_CR_PAT, pat);
 
 	if (!boot_cpu_done) {
 		pat_init_cache_modes();
@@ -400,12 +426,8 @@  int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
 
 	if (!pat_enabled()) {
 		/* This is identical to page table setting without PAT */
-		if (new_type) {
-			if (req_type == _PAGE_CACHE_MODE_WC)
-				*new_type = _PAGE_CACHE_MODE_UC_MINUS;
-			else
-				*new_type = req_type;
-		}
+		if (new_type)
+			*new_type = req_type;
 		return 0;
 	}
 
@@ -909,11 +931,8 @@  void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
 
 pgprot_t pgprot_writecombine(pgprot_t prot)
 {
-	if (pat_enabled())
-		return __pgprot(pgprot_val(prot) |
+	return __pgprot(pgprot_val(prot) |
 				cachemode2protval(_PAGE_CACHE_MODE_WC));
-	else
-		return pgprot_noncached(prot);
 }
 EXPORT_SYMBOL_GPL(pgprot_writecombine);