diff mbox series

[v2,16/28] s390x/tcg: Fault-safe memmove

Message ID 20190906075750.14791-17-david@redhat.com (mailing list archive)
State New, archived
Headers show
Series s390x/tcg: mem_helper: Fault-safe handling | expand

Commit Message

David Hildenbrand Sept. 6, 2019, 7:57 a.m. UTC
Replace fast_memmove() variants by access_memmove() variants, that
first try to probe access to all affected pages (maximum is two pages).

In MVCOS, simply always call access_memmove_as() and drop the TODO
about LAP. LAP is already handled in the MMU.

Get rid of adj_len_to_page(), which is now unused.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/mem_helper.c | 204 +++++++++++++++++++++-----------------
 1 file changed, 115 insertions(+), 89 deletions(-)

Comments

Richard Henderson Sept. 11, 2019, 9:11 p.m. UTC | #1
On 9/6/19 3:57 AM, David Hildenbrand wrote:
> Replace fast_memmove() variants by access_memmove() variants, that
> first try to probe access to all affected pages (maximum is two pages).
> 
> In MVCOS, simply always call access_memmove_as() and drop the TODO
> about LAP. LAP is already handled in the MMU.
> 
> Get rid of adj_len_to_page(), which is now unused.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  target/s390x/mem_helper.c | 204 +++++++++++++++++++++-----------------
>  1 file changed, 115 insertions(+), 89 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~
Richard Henderson Sept. 11, 2019, 10:03 p.m. UTC | #2
On 9/6/19 3:57 AM, David Hildenbrand wrote:
> +static void access_memmove_idx(CPUS390XState *env, vaddr dest, vaddr src,
> +                               int size, int dest_idx, int src_idx,
> +                               uintptr_t ra)
> +{
> +    S390Access srca = access_prepare_idx(env, src, size, MMU_DATA_LOAD, src_idx,
> +                                         ra);
> +    S390Access desta = access_prepare_idx(env, dest, size, MMU_DATA_STORE,
> +                                          dest_idx, ra);

I was just thinking that it might be worth passing in these Access structures.
 It seems that usually we wind up computing them in several locations.

Hoisting it up it would make MVC look like

    midx = cpu_mmu_index(env);
    srca = access_prepare_idx(env, src, size, LOAD, midx, ra);
    dsta = access_prepare_idx(env, dst, size, STORE, midx, ra);

    if (dst == src + 1) {
        uint8_t x = access_get_byte(env, &srca, 0, ra);
        access_memset(env, &dsta, x, size, ra);
    } else if (!is_destructive_overlap(env, dst, src, size)) {
        access_memmove(env, &dsta, &srca, size, ra);
    } else {
        // byte by byte loop, but still need srca, dsta.
    }

which seems even More Correct, since the current access_memset path does not
check for read watchpoints or faults on all of [src, src+size-1].


r~
David Hildenbrand Sept. 13, 2019, 12:37 p.m. UTC | #3
On 12.09.19 00:03, Richard Henderson wrote:
> On 9/6/19 3:57 AM, David Hildenbrand wrote:
>> +static void access_memmove_idx(CPUS390XState *env, vaddr dest, vaddr src,
>> +                               int size, int dest_idx, int src_idx,
>> +                               uintptr_t ra)
>> +{
>> +    S390Access srca = access_prepare_idx(env, src, size, MMU_DATA_LOAD, src_idx,
>> +                                         ra);
>> +    S390Access desta = access_prepare_idx(env, dest, size, MMU_DATA_STORE,
>> +                                          dest_idx, ra);
> 
> I was just thinking that it might be worth passing in these Access structures.
>  It seems that usually we wind up computing them in several locations.
> 
> Hoisting it up it would make MVC look like
> 
>     midx = cpu_mmu_index(env);
>     srca = access_prepare_idx(env, src, size, LOAD, midx, ra);
>     dsta = access_prepare_idx(env, dst, size, STORE, midx, ra);
> 
>     if (dst == src + 1) {
>         uint8_t x = access_get_byte(env, &srca, 0, ra);
>         access_memset(env, &dsta, x, size, ra);
>     } else if (!is_destructive_overlap(env, dst, src, size)) {
>         access_memmove(env, &dsta, &srca, size, ra);
>     } else {
>         // byte by byte loop, but still need srca, dsta.
>     }
> 
> which seems even More Correct, since the current access_memset path does not
> check for read watchpoints or faults on all of [src, src+size-1].
> 

I had precisely that in previous versions :) Can switch to that model.
diff mbox series

Patch

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index 8d654b24e7..db678ddf47 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -65,17 +65,6 @@  static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
     return dest > src && dest <= src + len - 1;
 }
 
-/* Reduce the length so that addr + len doesn't cross a page boundary.  */
-static inline uint32_t adj_len_to_page(uint32_t len, uint64_t addr)
-{
-#ifndef CONFIG_USER_ONLY
-    if ((addr & ~TARGET_PAGE_MASK) + len - 1 >= TARGET_PAGE_SIZE) {
-        return -(addr | TARGET_PAGE_MASK);
-    }
-#endif
-    return len;
-}
-
 /* Trigger a SPECIFICATION exception if an address or a length is not
    naturally aligned.  */
 static inline void check_alignment(CPUS390XState *env, uint64_t v,
@@ -129,6 +118,7 @@  typedef struct S390Access {
     char *haddr2;
     uint16_t size1;
     uint16_t size2;
+    int mmu_idx;
 } S390Access;
 
 static S390Access access_prepare_idx(CPUS390XState *env, vaddr vaddr, int size,
@@ -138,6 +128,7 @@  static S390Access access_prepare_idx(CPUS390XState *env, vaddr vaddr, int size,
     S390Access access = {
         .vaddr1 = vaddr,
         .size1 = MIN(size, -(vaddr | TARGET_PAGE_MASK)),
+        .mmu_idx = mmu_idx,
     };
 
     g_assert(size > 0 && size <= 4096);
@@ -195,42 +186,112 @@  static void access_memset(CPUS390XState *env, vaddr vaddr, uint8_t byte,
     access_memset_idx(env, vaddr, byte, size, mmu_idx, ra);
 }
 
-#ifndef CONFIG_USER_ONLY
-static void fast_memmove_idx(CPUS390XState *env, uint64_t dest, uint64_t src,
-                             uint32_t len, int dest_idx, int src_idx,
-                             uintptr_t ra)
-{
-    TCGMemOpIdx oi_dest = make_memop_idx(MO_UB, dest_idx);
-    TCGMemOpIdx oi_src = make_memop_idx(MO_UB, src_idx);
-    uint32_t len_adj;
-    void *src_p;
-    void *dest_p;
-    uint8_t x;
-
-    while (len > 0) {
-        src = wrap_address(env, src);
-        dest = wrap_address(env, dest);
-        src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, src_idx);
-        dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, dest_idx);
-
-        if (src_p && dest_p) {
-            /* Access to both whole pages granted.  */
-            len_adj = adj_len_to_page(adj_len_to_page(len, src), dest);
-            memmove(dest_p, src_p, len_adj);
+static uint8_t access_get_byte(CPUS390XState *env, const S390Access *access,
+                               int offset, uintptr_t ra)
+{
+#ifdef CONFIG_USER_ONLY
+    if (offset < access->size1) {
+        return ldub_p(access->haddr1 + offset);
+    } else {
+        return ldub_p(access->haddr2 + offset - access->size1);
+    }
+#else
+    TCGMemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
+
+    if (offset < access->size1) {
+        if (likely(access->haddr1)) {
+            return ldub_p(access->haddr1 + offset);
+        }
+        return helper_ret_ldub_mmu(env, access->vaddr1 + offset, oi, ra);
+    }
+    if (access->haddr2) {
+        return ldub_p(access->haddr2 + offset - access->size1);
+    }
+    return helper_ret_ldub_mmu(env, access->vaddr2 + offset - access->size1, oi,
+                               ra);
+#endif
+}
+
+static void access_set_byte(CPUS390XState *env, const S390Access *access,
+                            int offset, uint8_t byte, uintptr_t ra)
+{
+#ifdef CONFIG_USER_ONLY
+    if (offset < access->size1) {
+        stb_p(access->haddr1 + offset, byte);
+    } else {
+        stb_p(access->haddr2 + offset - access->size1, byte);
+    }
+#else
+    TCGMemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
+
+    if (offset < access->size1) {
+        if (likely(access->haddr1)) {
+            stb_p(access->haddr1 + offset, byte);
         } else {
-            /* We failed to get access to one or both whole pages. The next
-               read or write access will likely fill the QEMU TLB for the
-               next iteration.  */
-            len_adj = 1;
-            x = helper_ret_ldub_mmu(env, src, oi_src, ra);
-            helper_ret_stb_mmu(env, dest, x, oi_dest, ra);
+            helper_ret_stb_mmu(env, access->vaddr1 + offset, byte, oi, ra);
+        }
+    } else {
+        if (likely(access->haddr2)) {
+            stb_p(access->haddr2 + offset - access->size1, byte);
+        } else {
+            helper_ret_stb_mmu(env, access->vaddr2 + offset - access->size1,
+                               byte, oi, ra);
+        }
+    }
+#endif
+}
+
+/*
+ * Move data with the same semantics as memmove() in case ranges don't overlap
+ * or src > dest. Undefined behavior on destructive overlaps.
+ */
+static void access_memmove_idx(CPUS390XState *env, vaddr dest, vaddr src,
+                               int size, int dest_idx, int src_idx,
+                               uintptr_t ra)
+{
+    S390Access srca = access_prepare_idx(env, src, size, MMU_DATA_LOAD, src_idx,
+                                         ra);
+    S390Access desta = access_prepare_idx(env, dest, size, MMU_DATA_STORE,
+                                          dest_idx, ra);
+    uint16_t diff;
+
+    /* Fallback to slow access in case we don't have access to all host pages */
+    if (unlikely(!desta.haddr1 || (desta.size2 && !desta.haddr2) ||
+                 !srca.haddr1 || (srca.size2 && !srca.haddr2))) {
+        uint8_t byte;
+        int i;
+
+        for (i = 0; i < desta.size1 + desta.size2; i++) {
+            byte = access_get_byte(env, &srca, i, ra);
+            access_set_byte(env, &desta, i, byte, ra);
         }
-        src += len_adj;
-        dest += len_adj;
-        len -= len_adj;
+        return;
+    }
+
+    if (srca.size1 == desta.size1) {
+        memmove(desta.haddr1, srca.haddr1, srca.size1);
+        memmove(desta.haddr2, srca.haddr2, srca.size2);
+    } else if (srca.size1 < desta.size1) {
+        diff = desta.size1 - srca.size1;
+        memmove(desta.haddr1, srca.haddr1, srca.size1);
+        memmove(desta.haddr1 + srca.size1, srca.haddr2, diff);
+        memmove(desta.haddr2, srca.haddr2 + diff, desta.size2);
+    } else {
+        diff = srca.size1 - desta.size1;
+        memmove(desta.haddr1, srca.haddr1, desta.size1);
+        memmove(desta.haddr2, srca.haddr1 + desta.size1, diff);
+        memmove(desta.haddr2 + diff, srca.haddr2, srca.size2);
     }
 }
 
+static void access_memmove(CPUS390XState *env, vaddr dest, vaddr src,
+                           int size, uintptr_t ra)
+{
+    int mmu_idx = cpu_mmu_index(env, false);
+
+    access_memmove_idx(env, dest, src, size, mmu_idx, mmu_idx, ra);
+}
+
 static int mmu_idx_from_as(uint8_t as)
 {
     switch (as) {
@@ -246,43 +307,14 @@  static int mmu_idx_from_as(uint8_t as)
     }
 }
 
-static void fast_memmove_as(CPUS390XState *env, uint64_t dest, uint64_t src,
-                            uint32_t len, uint8_t dest_as, uint8_t src_as,
-                            uintptr_t ra)
+static void access_memmove_as(CPUS390XState *env, uint64_t dest, uint64_t src,
+                              uint32_t len, uint8_t dest_as, uint8_t src_as,
+                              uintptr_t ra)
 {
     int src_idx = mmu_idx_from_as(src_as);
     int dest_idx = mmu_idx_from_as(dest_as);
 
-    fast_memmove_idx(env, dest, src, len, dest_idx, src_idx, ra);
-}
-#endif
-
-static void fast_memmove(CPUS390XState *env, uint64_t dest, uint64_t src,
-                         uint32_t l, uintptr_t ra)
-{
-    int mmu_idx = cpu_mmu_index(env, false);
-
-    while (l > 0) {
-        void *src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, mmu_idx);
-        void *dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, mmu_idx);
-        if (src_p && dest_p) {
-            /* Access to both whole pages granted.  */
-            uint32_t l_adj = adj_len_to_page(l, src);
-            l_adj = adj_len_to_page(l_adj, dest);
-            memmove(dest_p, src_p, l_adj);
-            src += l_adj;
-            dest += l_adj;
-            l -= l_adj;
-        } else {
-            /* We failed to get access to one or both whole pages. The next
-               read or write access will likely fill the QEMU TLB for the
-               next iteration.  */
-            cpu_stb_data_ra(env, dest, cpu_ldub_data_ra(env, src, ra), ra);
-            src++;
-            dest++;
-            l--;
-        }
-    }
+    access_memmove_idx(env, dest, src, len, dest_idx, src_idx, ra);
 }
 
 /* and on array */
@@ -386,7 +418,7 @@  static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
     if (dest == src + 1) {
         access_memset(env, dest, cpu_ldub_data_ra(env, src, ra), l, ra);
     } else if (!is_destructive_overlap(env, dest, src, l)) {
-        fast_memmove(env, dest, src, l, ra);
+        access_memmove(env, dest, src, l, ra);
     } else {
         for (i = 0; i < l; i++) {
             uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
@@ -744,7 +776,7 @@  uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2)
      * - CC-option with surpression of page-translation exceptions
      * - Store r1/r2 register identifiers at real location 162
      */
-    fast_memmove(env, r1, r2, TARGET_PAGE_SIZE, GETPC());
+    access_memmove(env, r1, r2, TARGET_PAGE_SIZE, GETPC());
     return 0; /* data moved */
 }
 
@@ -847,7 +879,7 @@  static inline uint32_t do_mvcl(CPUS390XState *env,
         len = MIN(len, *srclen);
         *destlen -= len;
         *srclen -= len;
-        fast_memmove(env, *dest, *src, len, ra);
+        access_memmove(env, *dest, *src, len, ra);
         *src = wrap_address(env, *src + len);
         *dest = wrap_address(env, *dest + len);
     } else if (wordsize == 1) {
@@ -911,7 +943,7 @@  uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
         } else {
             cur_len = MIN(cur_len, srclen);
 
-            fast_memmove(env, dest, src, cur_len, ra);
+            access_memmove(env, dest, src, cur_len, ra);
             src = wrap_address(env, src + cur_len);
             srclen -= cur_len;
             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
@@ -2453,16 +2485,10 @@  uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
         s390_program_interrupt(env, PGM_ADDRESSING, 6, ra);
     }
 
-    /* FIXME: a) LAP
-     *        b) Access using correct keys
-     *        c) AR-mode
-     */
-#ifdef CONFIG_USER_ONLY
-    /* psw keys are never valid in user mode, we will never reach this */
-    g_assert_not_reached();
-#else
-    fast_memmove_as(env, dest, src, len, dest_as, src_as, ra);
-#endif
+    /* FIXME: Access using correct keys and AR-mode */
+    if (len) {
+        access_memmove_as(env, dest, src, len, dest_as, src_as, ra);
+    }
 
     return cc;
 }