diff mbox series

[06/12] target/ppc: Fix ordering of hash MMU accesses

Message ID 20190215170029.15641-7-clg@kaod.org (mailing list archive)
State New, archived
Headers show
Series ppc: add native hash and radix support for POWER9 | expand

Commit Message

Cédric Le Goater Feb. 15, 2019, 5 p.m. UTC
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>

With mttcg, we can have MMU lookups happening at the same time
as the guest modifying the page tables.

Since the HPTEs of the hash table MMU contains two words (or
double worlds on 64-bit), we need to make sure we read them
in the right order, with the correct memory barrier.

Additionally, when using emulated SPAPR mode, the hypercalls
writing to the hash table must also perform the udpates in
the right order.

Note: This part is still not entirely correct

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 hw/ppc/spapr.c          | 21 +++++++++++++++++++--
 target/ppc/mmu-hash32.c |  6 ++++++
 target/ppc/mmu-hash64.c |  6 ++++++
 3 files changed, 31 insertions(+), 2 deletions(-)

Comments

David Gibson Feb. 19, 2019, 3:52 a.m. UTC | #1
On Fri, Feb 15, 2019 at 06:00:23PM +0100, Cédric Le Goater wrote:
> From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> 
> With mttcg, we can have MMU lookups happening at the same time
> as the guest modifying the page tables.
> 
> Since the HPTEs of the hash table MMU contains two words (or
> double worlds on 64-bit), we need to make sure we read them
> in the right order, with the correct memory barrier.
> 
> Additionally, when using emulated SPAPR mode, the hypercalls
> writing to the hash table must also perform the udpates in
> the right order.
> 
> Note: This part is still not entirely correct
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Signed-off-by: Cédric Le Goater <clg@kaod.org>

Yeah, this stuff was written under the assumption it was protected by
the BQL, which is getting less true all the time.

Applied.

> ---
>  hw/ppc/spapr.c          | 21 +++++++++++++++++++--
>  target/ppc/mmu-hash32.c |  6 ++++++
>  target/ppc/mmu-hash64.c |  6 ++++++
>  3 files changed, 31 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 60572eb59275..1afe31ee6163 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1507,8 +1507,25 @@ static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
>      if (!spapr->htab) {
>          kvmppc_write_hpte(ptex, pte0, pte1);
>      } else {
> -        stq_p(spapr->htab + offset, pte0);
> -        stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
> +        if (pte0 & HPTE64_V_VALID) {
> +            stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
> +            /*
> +             * When setting valid, we write PTE1 first. This ensures
> +             * proper synchronization with the reading code in
> +             * ppc_hash64_pteg_search()
> +             */
> +            smp_wmb();
> +            stq_p(spapr->htab + offset, pte0);
> +        } else {
> +            stq_p(spapr->htab + offset, pte0);
> +            /*
> +             * When clearing it we set PTE0 first. This ensures proper
> +             * synchronization with the reading code in
> +             * ppc_hash64_pteg_search()
> +             */
> +            smp_wmb();
> +            stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
> +        }
>      }
>  }
>  
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index 03ae3c127985..e8562a7c8780 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -319,6 +319,12 @@ static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off,
>  
>      for (i = 0; i < HPTES_PER_GROUP; i++) {
>          pte0 = ppc_hash32_load_hpte0(cpu, pte_offset);
> +        /*
> +         * pte0 contains the valid bit and must be read before pte1,
> +         * otherwise we might see an old pte1 with a new valid bit and
> +         * thus an inconsistent hpte value
> +         */
> +        smp_rmb();
>          pte1 = ppc_hash32_load_hpte1(cpu, pte_offset);
>  
>          if ((pte0 & HPTE32_V_VALID)
> diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
> index f6c822ef917b..b3c4d33faa55 100644
> --- a/target/ppc/mmu-hash64.c
> +++ b/target/ppc/mmu-hash64.c
> @@ -506,6 +506,12 @@ static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
>      }
>      for (i = 0; i < HPTES_PER_GROUP; i++) {
>          pte0 = ppc_hash64_hpte0(cpu, pteg, i);
> +        /*
> +         * pte0 contains the valid bit and must be read before pte1,
> +         * otherwise we might see an old pte1 with a new valid bit and
> +         * thus an inconsistent hpte value
> +         */
> +        smp_rmb();
>          pte1 = ppc_hash64_hpte1(cpu, pteg, i);
>  
>          /* This compares V, B, H (secondary) and the AVPN */
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 60572eb59275..1afe31ee6163 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1507,8 +1507,25 @@  static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
     if (!spapr->htab) {
         kvmppc_write_hpte(ptex, pte0, pte1);
     } else {
-        stq_p(spapr->htab + offset, pte0);
-        stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
+        if (pte0 & HPTE64_V_VALID) {
+            stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
+            /*
+             * When setting valid, we write PTE1 first. This ensures
+             * proper synchronization with the reading code in
+             * ppc_hash64_pteg_search()
+             */
+            smp_wmb();
+            stq_p(spapr->htab + offset, pte0);
+        } else {
+            stq_p(spapr->htab + offset, pte0);
+            /*
+             * When clearing it we set PTE0 first. This ensures proper
+             * synchronization with the reading code in
+             * ppc_hash64_pteg_search()
+             */
+            smp_wmb();
+            stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
+        }
     }
 }
 
diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
index 03ae3c127985..e8562a7c8780 100644
--- a/target/ppc/mmu-hash32.c
+++ b/target/ppc/mmu-hash32.c
@@ -319,6 +319,12 @@  static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off,
 
     for (i = 0; i < HPTES_PER_GROUP; i++) {
         pte0 = ppc_hash32_load_hpte0(cpu, pte_offset);
+        /*
+         * pte0 contains the valid bit and must be read before pte1,
+         * otherwise we might see an old pte1 with a new valid bit and
+         * thus an inconsistent hpte value
+         */
+        smp_rmb();
         pte1 = ppc_hash32_load_hpte1(cpu, pte_offset);
 
         if ((pte0 & HPTE32_V_VALID)
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index f6c822ef917b..b3c4d33faa55 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -506,6 +506,12 @@  static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
     }
     for (i = 0; i < HPTES_PER_GROUP; i++) {
         pte0 = ppc_hash64_hpte0(cpu, pteg, i);
+        /*
+         * pte0 contains the valid bit and must be read before pte1,
+         * otherwise we might see an old pte1 with a new valid bit and
+         * thus an inconsistent hpte value
+         */
+        smp_rmb();
         pte1 = ppc_hash64_hpte1(cpu, pteg, i);
 
         /* This compares V, B, H (secondary) and the AVPN */