diff mbox series

[v4] tcg: Toggle page execution for Apple Silicon

Message ID 20210121184752.1395873-1-richard.henderson@linaro.org (mailing list archive)
State New, archived
Headers show
Series [v4] tcg: Toggle page execution for Apple Silicon | expand

Commit Message

Richard Henderson Jan. 21, 2021, 6:47 p.m. UTC
From: Roman Bolshakov <r.bolshakov@yadro.com>

Pages can't be both write and executable at the same time on Apple
Silicon. macOS provides public API to switch write protection [1] for
JIT applications, like TCG.

1. https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon

Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
Message-Id: <20210113032806.18220-1-r.bolshakov@yadro.com>
[rth: Inline the qemu_thread_jit_* functions;
 drop the MAP_JIT change for a follow-on patch.]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---

Supercedes: <20210113032806.18220-1-r.bolshakov@yadro.com>

This is the version of Roman's patch that I'm queuing to tcg-next.
What's missing from the full "Fix execution" patch is setting MAP_JIT
for !splitwx in alloc_code_gen_buffer().


r~

---
 include/qemu/osdep.h      | 28 ++++++++++++++++++++++++++++
 accel/tcg/cpu-exec.c      |  2 ++
 accel/tcg/translate-all.c |  3 +++
 tcg/tcg.c                 |  1 +
 4 files changed, 34 insertions(+)

Comments

Alexander Graf Jan. 21, 2021, 8:01 p.m. UTC | #1
On 21.01.21 19:47, Richard Henderson wrote:
> From: Roman Bolshakov <r.bolshakov@yadro.com>
>
> Pages can't be both write and executable at the same time on Apple
> Silicon. macOS provides public API to switch write protection [1] for
> JIT applications, like TCG.
>
> 1. https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon
>
> Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
> Message-Id: <20210113032806.18220-1-r.bolshakov@yadro.com>
> [rth: Inline the qemu_thread_jit_* functions;
>  drop the MAP_JIT change for a follow-on patch.]
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>
> Supercedes: <20210113032806.18220-1-r.bolshakov@yadro.com>
>
> This is the version of Roman's patch that I'm queuing to tcg-next.
> What's missing from the full "Fix execution" patch is setting MAP_JIT
> for !splitwx in alloc_code_gen_buffer().


This patch segfaults in tcg_out32() for me if I add the MAP_JIT flag
manually to the mmap call:


(lldb) bt
* thread #3, stop reason = EXC_BAD_ACCESS (code=2, address=0x118000000)
  * frame #0: 0x0000000100297e8c qemu-system-x86_64`tcg_prologue_init
[inlined] tcg_out32(s=0x0000000100bb64c0, v=2847570941) at tcg.c:250:24
[opt]
    frame #1: 0x0000000100297e7c qemu-system-x86_64`tcg_prologue_init
[inlined] tcg_out_insn_3314(s=0x0000000100bb64c0, insn=2847539200,
r1=TCG_REG_X29, r2=TCG_REG_X30, rn=TCG_REG_SP, ofs=-96, pre=true,
w=true) at tcg-target.c.inc:666 [opt]
    frame #2: 0x0000000100297e7c qemu-system-x86_64`tcg_prologue_init
[inlined] tcg_target_qemu_prologue(s=0x0000000100bb64c0) at
tcg-target.c.inc:2858 [opt]
    frame #3: 0x0000000100297e7c
qemu-system-x86_64`tcg_prologue_init(s=0x0000000100bb64c0) at tcg.c:1116
[opt]
    frame #4: 0x00000001002d7ab8
qemu-system-x86_64`tcg_exec_init(tb_size=<unavailable>,
splitwx=<unavailable>) at translate-all.c:1349:5 [opt]
    frame #5: 0x000000010028d690
qemu-system-x86_64`tcg_init(ms=<unavailable>) at tcg-all.c:113:5 [opt]
    frame #6: 0x000000010007d540
qemu-system-x86_64`accel_init_machine(accel=0x00000001020c9ec0,
ms=0x00000001020c6880) at accel.c:55:11 [opt]
    frame #7: 0x00000001002b90f0
qemu-system-x86_64`do_configure_accelerator(opaque=0x000000016ff12ea0,
opts=0x00000001020c9e30, errp=0x0000000100bc18e0) at vl.c:2148:11 [opt]
    frame #8: 0x0000000100482c00
qemu-system-x86_64`qemu_opts_foreach(list=<unavailable>,
func=(qemu-system-x86_64`do_configure_accelerator at vl.c:2125),
opaque=0x000000016ff12ea0, errp=0x0000000100bc18e0) at
qemu-option.c:1147:14 [opt]
    frame #9: 0x00000001002b6d48 qemu-system-x86_64`qemu_init [inlined]
configure_accelerators(progname=<unavailable>) at vl.c:2216:10 [opt]
    frame #10: 0x00000001002b6bd8
qemu-system-x86_64`qemu_init(argc=<unavailable>, argv=<unavailable>,
envp=<unavailable>) at vl.c:3484 [opt]
    frame #11: 0x0000000100007aac qemu-system-x86_64`qemu_main(argc=3,
argv=0x000000016fdff848, envp=<unavailable>) at main.c:49:5 [opt]
    frame #12: 0x000000010001dd34
qemu-system-x86_64`call_qemu_main(opaque=0x0000000000000000) at
cocoa.m:1714:14 [opt]
    frame #13: 0x0000000100477c1c
qemu-system-x86_64`qemu_thread_start(args=<unavailable>) at
qemu-thread-posix.c:521:9 [opt]
    frame #14: 0x000000019846106c libsystem_pthread.dylib`_pthread_start
+ 320


Alex
Alexander Graf Jan. 21, 2021, 8:06 p.m. UTC | #2
On 21.01.21 21:01, Alexander Graf wrote:
> On 21.01.21 19:47, Richard Henderson wrote:
>> From: Roman Bolshakov <r.bolshakov@yadro.com>
>>
>> Pages can't be both write and executable at the same time on Apple
>> Silicon. macOS provides public API to switch write protection [1] for
>> JIT applications, like TCG.
>>
>> 1. https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon
>>
>> Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
>> Message-Id: <20210113032806.18220-1-r.bolshakov@yadro.com>
>> [rth: Inline the qemu_thread_jit_* functions;
>>  drop the MAP_JIT change for a follow-on patch.]
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>
>> Supercedes: <20210113032806.18220-1-r.bolshakov@yadro.com>
>>
>> This is the version of Roman's patch that I'm queuing to tcg-next.
>> What's missing from the full "Fix execution" patch is setting MAP_JIT
>> for !splitwx in alloc_code_gen_buffer().
>
> This patch segfaults in tcg_out32() for me if I add the MAP_JIT flag
> manually to the mmap call:


I take it all back. I forgot to actually git am the patch :(. It works
just fine.

Tested-by: Alexander Graf <agraf@csgraf.de>


Alex
Roman Bolshakov Jan. 23, 2021, 11:53 a.m. UTC | #3
On Thu, Jan 21, 2021 at 08:47:52AM -1000, Richard Henderson wrote:
> From: Roman Bolshakov <r.bolshakov@yadro.com>
> 
> Pages can't be both write and executable at the same time on Apple
> Silicon. macOS provides public API to switch write protection [1] for
> JIT applications, like TCG.
> 
> 1. https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon
> 
> Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
> Message-Id: <20210113032806.18220-1-r.bolshakov@yadro.com>
> [rth: Inline the qemu_thread_jit_* functions;
>  drop the MAP_JIT change for a follow-on patch.]
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> 
> Supercedes: <20210113032806.18220-1-r.bolshakov@yadro.com>
> 
> This is the version of Roman's patch that I'm queuing to tcg-next.
> What's missing from the full "Fix execution" patch is setting MAP_JIT
> for !splitwx in alloc_code_gen_buffer().
> 

Richard, thanks for updating the patch. I have no objections against
moving the functions and inlining them. However I'm seeing an issue that
wasn't present in v3:

Process 37109 stopped                                                                                                                          * thread #6, stop reason = EXC_BAD_ACCESS (code=1, address=0xfffffffffffffd4f)
    frame #0: 0x00000001002f1c90 qemu-system-x86_64`tcg_emit_op(opc=INDEX_op_add_i64) at tcg.c:2531:5 [opt]                                       2528 TCGOp *tcg_emit_op(TCGOpcode opc)
   2529 {                                                                                                                                         2530     TCGOp *op = tcg_op_alloc(opc);
-> 2531     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
   2532     return op;
   2533 }
   2534
Target 0: (qemu-system-x86_64) stopped.
(lldb) bt
* thread #6, stop reason = EXC_BAD_ACCESS (code=1, address=0xfffffffffffffd4f)
  * frame #0: 0x00000001002f1c90 qemu-system-x86_64`tcg_emit_op(opc=INDEX_op_add_i64) at tcg.c:2531:5 [opt]
    frame #1: 0x000000010026f040 qemu-system-x86_64`tcg_gen_addi_i64 [inlined] tcg_gen_op3(opc=INDEX_op_add_i64, a1=4430334952, a2=4430333440,
a3=4430361496) at tcg-op.c:60:17 [opt]
    frame #2: 0x000000010026f038 qemu-system-x86_64`tcg_gen_addi_i64 [inlined] tcg_gen_op3_i64(opc=INDEX_op_add_i64, a1=<unavailable>, a2=<unav
ailable>, a3=<unavailable>) at tcg-op.h:94 [opt]
    frame #3: 0x000000010026f030 qemu-system-x86_64`tcg_gen_addi_i64 [inlined] tcg_gen_add_i64(ret=<unavailable>, arg1=<unavailable>, arg2=<una
vailable>) at tcg-op.h:618 [opt]
    frame #4: 0x000000010026f030 qemu-system-x86_64`tcg_gen_addi_i64(ret=<unavailable>, arg1=<unavailable>, arg2=<unavailable>) at tcg-op.c:123
5 [opt]
    frame #5: 0x000000010021d1e0 qemu-system-x86_64`gen_lea_modrm_1(s=<unavailable>, a=(def_seg = 2, base = 5, index = -1, scale = 0, disp = -6
89)) at translate.c:2101:9 [opt]
    frame #6: 0x000000010020eeec qemu-system-x86_64`disas_insn [inlined] gen_lea_modrm(env=0x0000000118610870, s=0x00000001700b6b00, modrm=<una
vailable>) at translate.c:2111:15 [opt]
    frame #7: 0x000000010020eec0 qemu-system-x86_64`disas_insn(s=0x00000001700b6b00, cpu=<unavailable>) at translate.c:5509 [opt]
    frame #8: 0x000000010020bb44 qemu-system-x86_64`i386_tr_translate_insn(dcbase=0x00000001700b6b00, cpu=<unavailable>) at translate.c:8573:15
 [opt]
    frame #9: 0x00000001002fbcf8 qemu-system-x86_64`translator_loop(ops=0x0000000100b209c8, db=0x00000001700b6b00, cpu=0x0000000118608000, tb=0
x0000000120017200, max_insns=512) at translator.c:0 [opt]
    frame #10: 0x000000010020b73c qemu-system-x86_64`gen_intermediate_code(cpu=<unavailable>, tb=<unavailable>, max_insns=<unavailable>) at tra
nslate.c:8635:5 [opt]
    frame #11: 0x0000000100257970 qemu-system-x86_64`tb_gen_code(cpu=0x0000000118608000, pc=<unavailable>, cs_base=0, flags=4194483, cflags=-16
777216) at translate-all.c:1931:5 [opt]
    frame #12: 0x00000001002deb90 qemu-system-x86_64`cpu_exec [inlined] tb_find(cpu=0x0000000118608000, last_tb=0x0000000000000000, tb_exit=<un
available>, cf_mask=0) at cpu-exec.c:456:14 [opt]
    frame #13: 0x00000001002deb54 qemu-system-x86_64`cpu_exec(cpu=0x0000000118608000) at cpu-exec.c:812 [opt]
    frame #14: 0x00000001002bc0d0 qemu-system-x86_64`tcg_cpus_exec(cpu=0x0000000118608000) at tcg-cpus.c:57:11 [opt]
    frame #15: 0x000000010024c2cc qemu-system-x86_64`rr_cpu_thread_fn(arg=<unavailable>) at tcg-cpus-rr.c:217:21 [opt]
    frame #16: 0x00000001004b00b4 qemu-system-x86_64`qemu_thread_start(args=<unavailable>) at qemu-thread-posix.c:521:9 [opt]
    frame #17: 0x0000000191c4d06c libsystem_pthread.dylib`_pthread_start + 320

I'm looking into the issue but perhaps we'll need v5.

Best regards,
Roman

> 
> r~
> 
> ---
>  include/qemu/osdep.h      | 28 ++++++++++++++++++++++++++++
>  accel/tcg/cpu-exec.c      |  2 ++
>  accel/tcg/translate-all.c |  3 +++
>  tcg/tcg.c                 |  1 +
>  4 files changed, 34 insertions(+)
> 
> diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> index a434382c58..b6ffdc15bf 100644
> --- a/include/qemu/osdep.h
> +++ b/include/qemu/osdep.h
> @@ -119,6 +119,10 @@ extern int daemon(int, int);
>  #include "sysemu/os-posix.h"
>  #endif
>  
> +#ifdef __APPLE__
> +#include <AvailabilityMacros.h>
> +#endif
> +
>  #include "glib-compat.h"
>  #include "qemu/typedefs.h"
>  
> @@ -682,4 +686,28 @@ char *qemu_get_host_name(Error **errp);
>   */
>  size_t qemu_get_host_physmem(void);
>  
> +/*
> + * Toggle write/execute on the pages marked MAP_JIT
> + * for the current thread.
> + */
> +#if defined(MAC_OS_VERSION_11_0) && \
> +    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
> +static inline void qemu_thread_jit_execute(void)
> +{
> +    if (__builtin_available(macOS 11.0, *)) {
> +        pthread_jit_write_protect_np(true);
> +    }
> +}
> +
> +static inline void qemu_thread_jit_write(void)
> +{
> +    if (__builtin_available(macOS 11.0, *)) {
> +        pthread_jit_write_protect_np(false);
> +    }
> +}
> +#else
> +static inline void qemu_thread_jit_write(void) {}
> +static inline void qemu_thread_jit_execute(void) {}
> +#endif
> +
>  #endif
> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
> index 37d17c8e88..6d017e46dd 100644
> --- a/accel/tcg/cpu-exec.c
> +++ b/accel/tcg/cpu-exec.c
> @@ -186,6 +186,7 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
>      }
>  #endif /* DEBUG_DISAS */
>  
> +    qemu_thread_jit_execute();
>      ret = tcg_qemu_tb_exec(env, tb_ptr);
>      cpu->can_do_io = 1;
>      /*
> @@ -410,6 +411,7 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
>  {
>      uintptr_t old;
>  
> +    qemu_thread_jit_write();
>      assert(n < ARRAY_SIZE(tb->jmp_list_next));
>      qemu_spin_lock(&tb_next->jmp_lock);
>  
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 73fef47148..d09c187e0f 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1670,7 +1670,9 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
>  
>  static void tb_phys_invalidate__locked(TranslationBlock *tb)
>  {
> +    qemu_thread_jit_write();
>      do_tb_phys_invalidate(tb, true);
> +    qemu_thread_jit_execute();
>  }
>  
>  /* invalidate one TB
> @@ -1872,6 +1874,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>  #endif
>  
>      assert_memory_lock();
> +    qemu_thread_jit_write();
>  
>      phys_pc = get_page_addr_code(env, pc);
>  
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 5110f6f39c..4d734130df 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -1112,6 +1112,7 @@ void tcg_prologue_init(TCGContext *s)
>      s->pool_labels = NULL;
>  #endif
>  
> +    qemu_thread_jit_write();
>      /* Generate the prologue.  */
>      tcg_target_qemu_prologue(s);
>  
> -- 
> 2.25.1
>
Roman Bolshakov Jan. 23, 2021, 6:04 p.m. UTC | #4
On Sat, Jan 23, 2021 at 02:53:49PM +0300, Roman Bolshakov wrote:
> On Thu, Jan 21, 2021 at 08:47:52AM -1000, Richard Henderson wrote:
> > From: Roman Bolshakov <r.bolshakov@yadro.com>
> > 
> > Pages can't be both write and executable at the same time on Apple
> > Silicon. macOS provides public API to switch write protection [1] for
> > JIT applications, like TCG.
> > 
> > 1. https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon
> > 
> > Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
> > Message-Id: <20210113032806.18220-1-r.bolshakov@yadro.com>
> > [rth: Inline the qemu_thread_jit_* functions;
> >  drop the MAP_JIT change for a follow-on patch.]
> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> > ---
> > 
> > Supercedes: <20210113032806.18220-1-r.bolshakov@yadro.com>
> > 
> > This is the version of Roman's patch that I'm queuing to tcg-next.
> > What's missing from the full "Fix execution" patch is setting MAP_JIT
> > for !splitwx in alloc_code_gen_buffer().
> > 
> 
> Richard, thanks for updating the patch. I have no objections against
> moving the functions and inlining them. However I'm seeing an issue that
> wasn't present in v3:
> 
> Process 37109 stopped                                                                                                                          * thread #6, stop reason = EXC_BAD_ACCESS (code=1, address=0xfffffffffffffd4f)
>     frame #0: 0x00000001002f1c90 qemu-system-x86_64`tcg_emit_op(opc=INDEX_op_add_i64) at tcg.c:2531:5 [opt]                                       2528 TCGOp *tcg_emit_op(TCGOpcode opc)
>    2529 {                                                                                                                                         2530     TCGOp *op = tcg_op_alloc(opc);
> -> 2531     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
>    2532     return op;
>    2533 }
>    2534
> Target 0: (qemu-system-x86_64) stopped.
> (lldb) bt
> * thread #6, stop reason = EXC_BAD_ACCESS (code=1, address=0xfffffffffffffd4f)
>   * frame #0: 0x00000001002f1c90 qemu-system-x86_64`tcg_emit_op(opc=INDEX_op_add_i64) at tcg.c:2531:5 [opt]
>     frame #1: 0x000000010026f040 qemu-system-x86_64`tcg_gen_addi_i64 [inlined] tcg_gen_op3(opc=INDEX_op_add_i64, a1=4430334952, a2=4430333440,
> a3=4430361496) at tcg-op.c:60:17 [opt]
>     frame #2: 0x000000010026f038 qemu-system-x86_64`tcg_gen_addi_i64 [inlined] tcg_gen_op3_i64(opc=INDEX_op_add_i64, a1=<unavailable>, a2=<unav
> ailable>, a3=<unavailable>) at tcg-op.h:94 [opt]
>     frame #3: 0x000000010026f030 qemu-system-x86_64`tcg_gen_addi_i64 [inlined] tcg_gen_add_i64(ret=<unavailable>, arg1=<unavailable>, arg2=<una
> vailable>) at tcg-op.h:618 [opt]
>     frame #4: 0x000000010026f030 qemu-system-x86_64`tcg_gen_addi_i64(ret=<unavailable>, arg1=<unavailable>, arg2=<unavailable>) at tcg-op.c:123
> 5 [opt]
>     frame #5: 0x000000010021d1e0 qemu-system-x86_64`gen_lea_modrm_1(s=<unavailable>, a=(def_seg = 2, base = 5, index = -1, scale = 0, disp = -6
> 89)) at translate.c:2101:9 [opt]
>     frame #6: 0x000000010020eeec qemu-system-x86_64`disas_insn [inlined] gen_lea_modrm(env=0x0000000118610870, s=0x00000001700b6b00, modrm=<una
> vailable>) at translate.c:2111:15 [opt]
>     frame #7: 0x000000010020eec0 qemu-system-x86_64`disas_insn(s=0x00000001700b6b00, cpu=<unavailable>) at translate.c:5509 [opt]
>     frame #8: 0x000000010020bb44 qemu-system-x86_64`i386_tr_translate_insn(dcbase=0x00000001700b6b00, cpu=<unavailable>) at translate.c:8573:15
>  [opt]
>     frame #9: 0x00000001002fbcf8 qemu-system-x86_64`translator_loop(ops=0x0000000100b209c8, db=0x00000001700b6b00, cpu=0x0000000118608000, tb=0
> x0000000120017200, max_insns=512) at translator.c:0 [opt]
>     frame #10: 0x000000010020b73c qemu-system-x86_64`gen_intermediate_code(cpu=<unavailable>, tb=<unavailable>, max_insns=<unavailable>) at tra
> nslate.c:8635:5 [opt]
>     frame #11: 0x0000000100257970 qemu-system-x86_64`tb_gen_code(cpu=0x0000000118608000, pc=<unavailable>, cs_base=0, flags=4194483, cflags=-16
> 777216) at translate-all.c:1931:5 [opt]
>     frame #12: 0x00000001002deb90 qemu-system-x86_64`cpu_exec [inlined] tb_find(cpu=0x0000000118608000, last_tb=0x0000000000000000, tb_exit=<un
> available>, cf_mask=0) at cpu-exec.c:456:14 [opt]
>     frame #13: 0x00000001002deb54 qemu-system-x86_64`cpu_exec(cpu=0x0000000118608000) at cpu-exec.c:812 [opt]
>     frame #14: 0x00000001002bc0d0 qemu-system-x86_64`tcg_cpus_exec(cpu=0x0000000118608000) at tcg-cpus.c:57:11 [opt]
>     frame #15: 0x000000010024c2cc qemu-system-x86_64`rr_cpu_thread_fn(arg=<unavailable>) at tcg-cpus-rr.c:217:21 [opt]
>     frame #16: 0x00000001004b00b4 qemu-system-x86_64`qemu_thread_start(args=<unavailable>) at qemu-thread-posix.c:521:9 [opt]
>     frame #17: 0x0000000191c4d06c libsystem_pthread.dylib`_pthread_start + 320
> 
> I'm looking into the issue but perhaps we'll need v5.
> 

Nope. The issue is not directly related to the patch and W^X. I think it
can be applied.

tcg_ctx->ops is somehow getting corrupted despite it's initialized
properly during TCG start:

(lldb) p tcg_ctx->ops
(TCGContext::(anonymous union)) $18 = {
  tqh_first = 0x0000008401010000
  tqh_circ = {
    tql_next = 0x0000008401010000
    tql_prev = 0xfffffffffffffd4f
  }
}

I've bisected it (with v3 of Apple Silicon fix for TCG) to:

commit 8fe35e0444be88de4e3ab80a2a0e210a1f6d663d
Author: Richard Henderson <richard.henderson@linaro.org>
Date:   Mon Mar 30 20:42:43 2020 -0700

    tcg/optimize: Use tcg_constant_internal with constant folding

    Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

 tcg/optimize.c | 108 ++++++++++++++++++++++++++-------------------------------
 1 file changed, 49 insertions(+), 59 deletions(-)

Although, it crashes with a bit different backtrace than provided above:

Process 17251 stopped
* thread #6, stop reason = EXC_BAD_ACCESS (code=1, address=0x10)
    frame #0: 0x00000001002727a8 qemu-system-x86_64`tcg_opt_gen_mov [inlined] ts_is_copy(ts=<unavailable>) at optimize.c:68:25 [opt]
   65
   66   static inline bool ts_is_copy(TCGTemp *ts)
   67   {
-> 68       return ts_info(ts)->next_copy != ts;
   69   }
   70
   71   /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
Target 0: (qemu-system-x86_64) stopped.
(lldb) bt
* thread #6, stop reason = EXC_BAD_ACCESS (code=1, address=0x10)
  * frame #0: 0x00000001002727a8 qemu-system-x86_64`tcg_opt_gen_mov [inlined] ts_is_copy(ts=<unavailable>) at optimize.c:68:25 [opt]
    frame #1: 0x00000001002727a4 qemu-system-x86_64`tcg_opt_gen_mov [inlined] ts_are_copies(ts1=0x00000001202e8e50, ts2=0x00000001202ef398) at optimize.c:163 [opt]
    frame #2: 0x000000010027278c qemu-system-x86_64`tcg_opt_gen_mov(s=0x00000001202e8000, op=0x0000000119157710, dst=4834889296, src=4834915224) at optimize.c:191 [opt]
    frame #3: 0x0000000100271740 qemu-system-x86_64`tcg_optimize(s=<unavailable>) at optimize.c:0:9 [opt]
    frame #4: 0x00000001002f39c4 qemu-system-x86_64`tcg_gen_code(s=0x00000001202e8000, tb=0x0000000128020800) at tcg.c:4407:5 [opt]
    frame #5: 0x00000001002b2688 qemu-system-x86_64`tb_gen_code(cpu=0x0000000118428000, pc=<unavailable>, cs_base=0, flags=4194483, cflags=-16777216) at translate-all.c:1961:21 [opt]
    frame #6: 0x0000000100279460 qemu-system-x86_64`cpu_exec [inlined] tb_find(cpu=0x0000000118428000, last_tb=0x0000000000000000, tb_exit=<unavailable>, cf_mask=0) at cpu-exec.c:456:14 [opt]
    frame #7: 0x0000000100279424 qemu-system-x86_64`cpu_exec(cpu=0x0000000118428000) at cpu-exec.c:812 [opt]
    frame #8: 0x000000010026ea74 qemu-system-x86_64`tcg_cpus_exec(cpu=0x0000000118428000) at tcg-cpus.c:57:11 [opt]
    frame #9: 0x0000000100284efc qemu-system-x86_64`rr_cpu_thread_fn(arg=<unavailable>) at tcg-cpus-rr.c:217:21 [opt]
    frame #10: 0x00000001004cffe8 qemu-system-x86_64`qemu_thread_start(args=<unavailable>) at qemu-thread-posix.c:521:9 [opt]
    frame #11: 0x0000000191c4d06c libsystem_pthread.dylib`_pthread_start + 320

The problem is that ts2 in ts_are_copies() contains NULL state_ptr:

(lldb) p *ts2
(TCGTemp) $2 = {
  reg = TCG_REG_X0
  val_type = TEMP_VAL_DEAD
  base_type = TCG_TYPE_I64
  type = TCG_TYPE_I64
  kind = TEMP_CONST
  indirect_reg = 0
  indirect_base = 0
  mem_coherent = 0
  mem_allocated = 0
  temp_allocated = 1
  val = -690
  mem_base = 0x0000000000000000
  mem_offset = 0
  name = 0x0000000000000000
  state = 0
  state_ptr = 0x0000000000000000
}

-Roman
BALATON Zoltan Jan. 23, 2021, 6:33 p.m. UTC | #5
On Sat, 23 Jan 2021, Roman Bolshakov wrote:
> On Sat, Jan 23, 2021 at 02:53:49PM +0300, Roman Bolshakov wrote:
>> On Thu, Jan 21, 2021 at 08:47:52AM -1000, Richard Henderson wrote:
>>> From: Roman Bolshakov <r.bolshakov@yadro.com>
>>>
>>> Pages can't be both write and executable at the same time on Apple
>>> Silicon. macOS provides public API to switch write protection [1] for
>>> JIT applications, like TCG.
>>>
>>> 1. https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon
>>>
>>> Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
>>> Message-Id: <20210113032806.18220-1-r.bolshakov@yadro.com>
>>> [rth: Inline the qemu_thread_jit_* functions;
>>>  drop the MAP_JIT change for a follow-on patch.]
>>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>>> ---
>>>
>>> Supercedes: <20210113032806.18220-1-r.bolshakov@yadro.com>
>>>
>>> This is the version of Roman's patch that I'm queuing to tcg-next.
>>> What's missing from the full "Fix execution" patch is setting MAP_JIT
>>> for !splitwx in alloc_code_gen_buffer().
>>>
>>
>> Richard, thanks for updating the patch. I have no objections against
>> moving the functions and inlining them. However I'm seeing an issue that
>> wasn't present in v3:
>>
>> Process 37109 stopped                                                                                                                          * thread #6, stop reason = EXC_BAD_ACCESS (code=1, address=0xfffffffffffffd4f)
>>     frame #0: 0x00000001002f1c90 qemu-system-x86_64`tcg_emit_op(opc=INDEX_op_add_i64) at tcg.c:2531:5 [opt]                                       2528 TCGOp *tcg_emit_op(TCGOpcode opc)
>>    2529 {                                                                                                                                         2530     TCGOp *op = tcg_op_alloc(opc);
>> -> 2531     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
>>    2532     return op;
>>    2533 }
>>    2534
>> Target 0: (qemu-system-x86_64) stopped.
>> (lldb) bt
>> * thread #6, stop reason = EXC_BAD_ACCESS (code=1, address=0xfffffffffffffd4f)
>>   * frame #0: 0x00000001002f1c90 qemu-system-x86_64`tcg_emit_op(opc=INDEX_op_add_i64) at tcg.c:2531:5 [opt]
>>     frame #1: 0x000000010026f040 qemu-system-x86_64`tcg_gen_addi_i64 [inlined] tcg_gen_op3(opc=INDEX_op_add_i64, a1=4430334952, a2=4430333440,
>> a3=4430361496) at tcg-op.c:60:17 [opt]
>>     frame #2: 0x000000010026f038 qemu-system-x86_64`tcg_gen_addi_i64 [inlined] tcg_gen_op3_i64(opc=INDEX_op_add_i64, a1=<unavailable>, a2=<unav
>> ailable>, a3=<unavailable>) at tcg-op.h:94 [opt]
>>     frame #3: 0x000000010026f030 qemu-system-x86_64`tcg_gen_addi_i64 [inlined] tcg_gen_add_i64(ret=<unavailable>, arg1=<unavailable>, arg2=<una
>> vailable>) at tcg-op.h:618 [opt]
>>     frame #4: 0x000000010026f030 qemu-system-x86_64`tcg_gen_addi_i64(ret=<unavailable>, arg1=<unavailable>, arg2=<unavailable>) at tcg-op.c:123
>> 5 [opt]
>>     frame #5: 0x000000010021d1e0 qemu-system-x86_64`gen_lea_modrm_1(s=<unavailable>, a=(def_seg = 2, base = 5, index = -1, scale = 0, disp = -6
>> 89)) at translate.c:2101:9 [opt]
>>     frame #6: 0x000000010020eeec qemu-system-x86_64`disas_insn [inlined] gen_lea_modrm(env=0x0000000118610870, s=0x00000001700b6b00, modrm=<una
>> vailable>) at translate.c:2111:15 [opt]
>>     frame #7: 0x000000010020eec0 qemu-system-x86_64`disas_insn(s=0x00000001700b6b00, cpu=<unavailable>) at translate.c:5509 [opt]
>>     frame #8: 0x000000010020bb44 qemu-system-x86_64`i386_tr_translate_insn(dcbase=0x00000001700b6b00, cpu=<unavailable>) at translate.c:8573:15
>>  [opt]
>>     frame #9: 0x00000001002fbcf8 qemu-system-x86_64`translator_loop(ops=0x0000000100b209c8, db=0x00000001700b6b00, cpu=0x0000000118608000, tb=0
>> x0000000120017200, max_insns=512) at translator.c:0 [opt]
>>     frame #10: 0x000000010020b73c qemu-system-x86_64`gen_intermediate_code(cpu=<unavailable>, tb=<unavailable>, max_insns=<unavailable>) at tra
>> nslate.c:8635:5 [opt]
>>     frame #11: 0x0000000100257970 qemu-system-x86_64`tb_gen_code(cpu=0x0000000118608000, pc=<unavailable>, cs_base=0, flags=4194483, cflags=-16
>> 777216) at translate-all.c:1931:5 [opt]
>>     frame #12: 0x00000001002deb90 qemu-system-x86_64`cpu_exec [inlined] tb_find(cpu=0x0000000118608000, last_tb=0x0000000000000000, tb_exit=<un
>> available>, cf_mask=0) at cpu-exec.c:456:14 [opt]
>>     frame #13: 0x00000001002deb54 qemu-system-x86_64`cpu_exec(cpu=0x0000000118608000) at cpu-exec.c:812 [opt]
>>     frame #14: 0x00000001002bc0d0 qemu-system-x86_64`tcg_cpus_exec(cpu=0x0000000118608000) at tcg-cpus.c:57:11 [opt]
>>     frame #15: 0x000000010024c2cc qemu-system-x86_64`rr_cpu_thread_fn(arg=<unavailable>) at tcg-cpus-rr.c:217:21 [opt]
>>     frame #16: 0x00000001004b00b4 qemu-system-x86_64`qemu_thread_start(args=<unavailable>) at qemu-thread-posix.c:521:9 [opt]
>>     frame #17: 0x0000000191c4d06c libsystem_pthread.dylib`_pthread_start + 320
>>
>> I'm looking into the issue but perhaps we'll need v5.
>>
>
> Nope. The issue is not directly related to the patch and W^X. I think it
> can be applied.
>
> tcg_ctx->ops is somehow getting corrupted despite it's initialized
> properly during TCG start:
>
> (lldb) p tcg_ctx->ops
> (TCGContext::(anonymous union)) $18 = {
>  tqh_first = 0x0000008401010000
>  tqh_circ = {
>    tql_next = 0x0000008401010000
>    tql_prev = 0xfffffffffffffd4f
>  }
> }
>
> I've bisected it (with v3 of Apple Silicon fix for TCG) to:
>
> commit 8fe35e0444be88de4e3ab80a2a0e210a1f6d663d
> Author: Richard Henderson <richard.henderson@linaro.org>
> Date:   Mon Mar 30 20:42:43 2020 -0700
>
>    tcg/optimize: Use tcg_constant_internal with constant folding

Yes, known problem, see https://bugs.launchpad.net/qemu/+bug/1912065

Regards,
BALATON Zoltan

>    Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>
> tcg/optimize.c | 108 ++++++++++++++++++++++++++-------------------------------
> 1 file changed, 49 insertions(+), 59 deletions(-)
>
> Although, it crashes with a bit different backtrace than provided above:
>
> Process 17251 stopped
> * thread #6, stop reason = EXC_BAD_ACCESS (code=1, address=0x10)
>    frame #0: 0x00000001002727a8 qemu-system-x86_64`tcg_opt_gen_mov [inlined] ts_is_copy(ts=<unavailable>) at optimize.c:68:25 [opt]
>   65
>   66   static inline bool ts_is_copy(TCGTemp *ts)
>   67   {
> -> 68       return ts_info(ts)->next_copy != ts;
>   69   }
>   70
>   71   /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
> Target 0: (qemu-system-x86_64) stopped.
> (lldb) bt
> * thread #6, stop reason = EXC_BAD_ACCESS (code=1, address=0x10)
>  * frame #0: 0x00000001002727a8 qemu-system-x86_64`tcg_opt_gen_mov [inlined] ts_is_copy(ts=<unavailable>) at optimize.c:68:25 [opt]
>    frame #1: 0x00000001002727a4 qemu-system-x86_64`tcg_opt_gen_mov [inlined] ts_are_copies(ts1=0x00000001202e8e50, ts2=0x00000001202ef398) at optimize.c:163 [opt]
>    frame #2: 0x000000010027278c qemu-system-x86_64`tcg_opt_gen_mov(s=0x00000001202e8000, op=0x0000000119157710, dst=4834889296, src=4834915224) at optimize.c:191 [opt]
>    frame #3: 0x0000000100271740 qemu-system-x86_64`tcg_optimize(s=<unavailable>) at optimize.c:0:9 [opt]
>    frame #4: 0x00000001002f39c4 qemu-system-x86_64`tcg_gen_code(s=0x00000001202e8000, tb=0x0000000128020800) at tcg.c:4407:5 [opt]
>    frame #5: 0x00000001002b2688 qemu-system-x86_64`tb_gen_code(cpu=0x0000000118428000, pc=<unavailable>, cs_base=0, flags=4194483, cflags=-16777216) at translate-all.c:1961:21 [opt]
>    frame #6: 0x0000000100279460 qemu-system-x86_64`cpu_exec [inlined] tb_find(cpu=0x0000000118428000, last_tb=0x0000000000000000, tb_exit=<unavailable>, cf_mask=0) at cpu-exec.c:456:14 [opt]
>    frame #7: 0x0000000100279424 qemu-system-x86_64`cpu_exec(cpu=0x0000000118428000) at cpu-exec.c:812 [opt]
>    frame #8: 0x000000010026ea74 qemu-system-x86_64`tcg_cpus_exec(cpu=0x0000000118428000) at tcg-cpus.c:57:11 [opt]
>    frame #9: 0x0000000100284efc qemu-system-x86_64`rr_cpu_thread_fn(arg=<unavailable>) at tcg-cpus-rr.c:217:21 [opt]
>    frame #10: 0x00000001004cffe8 qemu-system-x86_64`qemu_thread_start(args=<unavailable>) at qemu-thread-posix.c:521:9 [opt]
>    frame #11: 0x0000000191c4d06c libsystem_pthread.dylib`_pthread_start + 320
>
> The problem is that ts2 in ts_are_copies() contains NULL state_ptr:
>
> (lldb) p *ts2
> (TCGTemp) $2 = {
>  reg = TCG_REG_X0
>  val_type = TEMP_VAL_DEAD
>  base_type = TCG_TYPE_I64
>  type = TCG_TYPE_I64
>  kind = TEMP_CONST
>  indirect_reg = 0
>  indirect_base = 0
>  mem_coherent = 0
>  mem_allocated = 0
>  temp_allocated = 1
>  val = -690
>  mem_base = 0x0000000000000000
>  mem_offset = 0
>  name = 0x0000000000000000
>  state = 0
>  state_ptr = 0x0000000000000000
> }
>
> -Roman
>
>
Roman Bolshakov Jan. 23, 2021, 6:49 p.m. UTC | #6
On Sat, Jan 23, 2021 at 07:33:37PM +0100, BALATON Zoltan wrote:
> On Sat, 23 Jan 2021, Roman Bolshakov wrote:
> > On Sat, Jan 23, 2021 at 02:53:49PM +0300, Roman Bolshakov wrote:
> > > On Thu, Jan 21, 2021 at 08:47:52AM -1000, Richard Henderson wrote:
> > > > From: Roman Bolshakov <r.bolshakov@yadro.com>
> > > > 
> > > > Pages can't be both write and executable at the same time on Apple
> > > > Silicon. macOS provides public API to switch write protection [1] for
> > > > JIT applications, like TCG.
> > > > 
> > > > 1. https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon
> > > > 
> > > > Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
> > > > Message-Id: <20210113032806.18220-1-r.bolshakov@yadro.com>
> > > > [rth: Inline the qemu_thread_jit_* functions;
> > > >  drop the MAP_JIT change for a follow-on patch.]
> > > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> > > > ---
> > > > 
> > > > Supercedes: <20210113032806.18220-1-r.bolshakov@yadro.com>
> > > > 
> > > > This is the version of Roman's patch that I'm queuing to tcg-next.
> > > > What's missing from the full "Fix execution" patch is setting MAP_JIT
> > > > for !splitwx in alloc_code_gen_buffer().
> > > > 
> > > 
> > > Richard, thanks for updating the patch. I have no objections against
> > > moving the functions and inlining them. However I'm seeing an issue that
> > > wasn't present in v3:
> > > 
> > > Process 37109 stopped                                                                                                                          * thread #6, stop reason = EXC_BAD_ACCESS (code=1, address=0xfffffffffffffd4f)
> > >     frame #0: 0x00000001002f1c90 qemu-system-x86_64`tcg_emit_op(opc=INDEX_op_add_i64) at tcg.c:2531:5 [opt]                                       2528 TCGOp *tcg_emit_op(TCGOpcode opc)
> > >    2529 {                                                                                                                                         2530     TCGOp *op = tcg_op_alloc(opc);
> > > -> 2531     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
> > >    2532     return op;
> > >    2533 }
> > >    2534
> > > Target 0: (qemu-system-x86_64) stopped.
> > > (lldb) bt
> > > * thread #6, stop reason = EXC_BAD_ACCESS (code=1, address=0xfffffffffffffd4f)
> > >   * frame #0: 0x00000001002f1c90 qemu-system-x86_64`tcg_emit_op(opc=INDEX_op_add_i64) at tcg.c:2531:5 [opt]
> > >     frame #1: 0x000000010026f040 qemu-system-x86_64`tcg_gen_addi_i64 [inlined] tcg_gen_op3(opc=INDEX_op_add_i64, a1=4430334952, a2=4430333440,
> > > a3=4430361496) at tcg-op.c:60:17 [opt]
> > >     frame #2: 0x000000010026f038 qemu-system-x86_64`tcg_gen_addi_i64 [inlined] tcg_gen_op3_i64(opc=INDEX_op_add_i64, a1=<unavailable>, a2=<unav
> > > ailable>, a3=<unavailable>) at tcg-op.h:94 [opt]
> > >     frame #3: 0x000000010026f030 qemu-system-x86_64`tcg_gen_addi_i64 [inlined] tcg_gen_add_i64(ret=<unavailable>, arg1=<unavailable>, arg2=<una
> > > vailable>) at tcg-op.h:618 [opt]
> > >     frame #4: 0x000000010026f030 qemu-system-x86_64`tcg_gen_addi_i64(ret=<unavailable>, arg1=<unavailable>, arg2=<unavailable>) at tcg-op.c:123
> > > 5 [opt]
> > >     frame #5: 0x000000010021d1e0 qemu-system-x86_64`gen_lea_modrm_1(s=<unavailable>, a=(def_seg = 2, base = 5, index = -1, scale = 0, disp = -6
> > > 89)) at translate.c:2101:9 [opt]
> > >     frame #6: 0x000000010020eeec qemu-system-x86_64`disas_insn [inlined] gen_lea_modrm(env=0x0000000118610870, s=0x00000001700b6b00, modrm=<una
> > > vailable>) at translate.c:2111:15 [opt]
> > >     frame #7: 0x000000010020eec0 qemu-system-x86_64`disas_insn(s=0x00000001700b6b00, cpu=<unavailable>) at translate.c:5509 [opt]
> > >     frame #8: 0x000000010020bb44 qemu-system-x86_64`i386_tr_translate_insn(dcbase=0x00000001700b6b00, cpu=<unavailable>) at translate.c:8573:15
> > >  [opt]
> > >     frame #9: 0x00000001002fbcf8 qemu-system-x86_64`translator_loop(ops=0x0000000100b209c8, db=0x00000001700b6b00, cpu=0x0000000118608000, tb=0
> > > x0000000120017200, max_insns=512) at translator.c:0 [opt]
> > >     frame #10: 0x000000010020b73c qemu-system-x86_64`gen_intermediate_code(cpu=<unavailable>, tb=<unavailable>, max_insns=<unavailable>) at tra
> > > nslate.c:8635:5 [opt]
> > >     frame #11: 0x0000000100257970 qemu-system-x86_64`tb_gen_code(cpu=0x0000000118608000, pc=<unavailable>, cs_base=0, flags=4194483, cflags=-16
> > > 777216) at translate-all.c:1931:5 [opt]
> > >     frame #12: 0x00000001002deb90 qemu-system-x86_64`cpu_exec [inlined] tb_find(cpu=0x0000000118608000, last_tb=0x0000000000000000, tb_exit=<un
> > > available>, cf_mask=0) at cpu-exec.c:456:14 [opt]
> > >     frame #13: 0x00000001002deb54 qemu-system-x86_64`cpu_exec(cpu=0x0000000118608000) at cpu-exec.c:812 [opt]
> > >     frame #14: 0x00000001002bc0d0 qemu-system-x86_64`tcg_cpus_exec(cpu=0x0000000118608000) at tcg-cpus.c:57:11 [opt]
> > >     frame #15: 0x000000010024c2cc qemu-system-x86_64`rr_cpu_thread_fn(arg=<unavailable>) at tcg-cpus-rr.c:217:21 [opt]
> > >     frame #16: 0x00000001004b00b4 qemu-system-x86_64`qemu_thread_start(args=<unavailable>) at qemu-thread-posix.c:521:9 [opt]
> > >     frame #17: 0x0000000191c4d06c libsystem_pthread.dylib`_pthread_start + 320
> > > 
> > > I'm looking into the issue but perhaps we'll need v5.
> > > 
> > 
> > Nope. The issue is not directly related to the patch and W^X. I think it
> > can be applied.
> > 
> > tcg_ctx->ops is somehow getting corrupted despite it's initialized
> > properly during TCG start:
> > 
> > (lldb) p tcg_ctx->ops
> > (TCGContext::(anonymous union)) $18 = {
> >  tqh_first = 0x0000008401010000
> >  tqh_circ = {
> >    tql_next = 0x0000008401010000
> >    tql_prev = 0xfffffffffffffd4f
> >  }
> > }
> > 
> > I've bisected it (with v3 of Apple Silicon fix for TCG) to:
> > 
> > commit 8fe35e0444be88de4e3ab80a2a0e210a1f6d663d
> > Author: Richard Henderson <richard.henderson@linaro.org>
> > Date:   Mon Mar 30 20:42:43 2020 -0700
> > 
> >    tcg/optimize: Use tcg_constant_internal with constant folding
> 
> Yes, known problem, see https://bugs.launchpad.net/qemu/+bug/1912065
> 

Thanks for providing the link, Zoltan.

-Roman

> Regards,
> BALATON Zoltan
> 
> >    Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> > 
> > tcg/optimize.c | 108 ++++++++++++++++++++++++++-------------------------------
> > 1 file changed, 49 insertions(+), 59 deletions(-)
> > 
> > Although, it crashes with a bit different backtrace than provided above:
> > 
> > Process 17251 stopped
> > * thread #6, stop reason = EXC_BAD_ACCESS (code=1, address=0x10)
> >    frame #0: 0x00000001002727a8 qemu-system-x86_64`tcg_opt_gen_mov [inlined] ts_is_copy(ts=<unavailable>) at optimize.c:68:25 [opt]
> >   65
> >   66   static inline bool ts_is_copy(TCGTemp *ts)
> >   67   {
> > -> 68       return ts_info(ts)->next_copy != ts;
> >   69   }
> >   70
> >   71   /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
> > Target 0: (qemu-system-x86_64) stopped.
> > (lldb) bt
> > * thread #6, stop reason = EXC_BAD_ACCESS (code=1, address=0x10)
> >  * frame #0: 0x00000001002727a8 qemu-system-x86_64`tcg_opt_gen_mov [inlined] ts_is_copy(ts=<unavailable>) at optimize.c:68:25 [opt]
> >    frame #1: 0x00000001002727a4 qemu-system-x86_64`tcg_opt_gen_mov [inlined] ts_are_copies(ts1=0x00000001202e8e50, ts2=0x00000001202ef398) at optimize.c:163 [opt]
> >    frame #2: 0x000000010027278c qemu-system-x86_64`tcg_opt_gen_mov(s=0x00000001202e8000, op=0x0000000119157710, dst=4834889296, src=4834915224) at optimize.c:191 [opt]
> >    frame #3: 0x0000000100271740 qemu-system-x86_64`tcg_optimize(s=<unavailable>) at optimize.c:0:9 [opt]
> >    frame #4: 0x00000001002f39c4 qemu-system-x86_64`tcg_gen_code(s=0x00000001202e8000, tb=0x0000000128020800) at tcg.c:4407:5 [opt]
> >    frame #5: 0x00000001002b2688 qemu-system-x86_64`tb_gen_code(cpu=0x0000000118428000, pc=<unavailable>, cs_base=0, flags=4194483, cflags=-16777216) at translate-all.c:1961:21 [opt]
> >    frame #6: 0x0000000100279460 qemu-system-x86_64`cpu_exec [inlined] tb_find(cpu=0x0000000118428000, last_tb=0x0000000000000000, tb_exit=<unavailable>, cf_mask=0) at cpu-exec.c:456:14 [opt]
> >    frame #7: 0x0000000100279424 qemu-system-x86_64`cpu_exec(cpu=0x0000000118428000) at cpu-exec.c:812 [opt]
> >    frame #8: 0x000000010026ea74 qemu-system-x86_64`tcg_cpus_exec(cpu=0x0000000118428000) at tcg-cpus.c:57:11 [opt]
> >    frame #9: 0x0000000100284efc qemu-system-x86_64`rr_cpu_thread_fn(arg=<unavailable>) at tcg-cpus-rr.c:217:21 [opt]
> >    frame #10: 0x00000001004cffe8 qemu-system-x86_64`qemu_thread_start(args=<unavailable>) at qemu-thread-posix.c:521:9 [opt]
> >    frame #11: 0x0000000191c4d06c libsystem_pthread.dylib`_pthread_start + 320
> > 
> > The problem is that ts2 in ts_are_copies() contains NULL state_ptr:
> > 
> > (lldb) p *ts2
> > (TCGTemp) $2 = {
> >  reg = TCG_REG_X0
> >  val_type = TEMP_VAL_DEAD
> >  base_type = TCG_TYPE_I64
> >  type = TCG_TYPE_I64
> >  kind = TEMP_CONST
> >  indirect_reg = 0
> >  indirect_base = 0
> >  mem_coherent = 0
> >  mem_allocated = 0
> >  temp_allocated = 1
> >  val = -690
> >  mem_base = 0x0000000000000000
> >  mem_offset = 0
> >  name = 0x0000000000000000
> >  state = 0
> >  state_ptr = 0x0000000000000000
> > }
> > 
> > -Roman
> > 
> >
diff mbox series

Patch

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index a434382c58..b6ffdc15bf 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -119,6 +119,10 @@  extern int daemon(int, int);
 #include "sysemu/os-posix.h"
 #endif
 
+#ifdef __APPLE__
+#include <AvailabilityMacros.h>
+#endif
+
 #include "glib-compat.h"
 #include "qemu/typedefs.h"
 
@@ -682,4 +686,28 @@  char *qemu_get_host_name(Error **errp);
  */
 size_t qemu_get_host_physmem(void);
 
+/*
+ * Toggle write/execute on the pages marked MAP_JIT
+ * for the current thread.
+ */
+#if defined(MAC_OS_VERSION_11_0) && \
+    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
+static inline void qemu_thread_jit_execute(void)
+{
+    if (__builtin_available(macOS 11.0, *)) {
+        pthread_jit_write_protect_np(true);
+    }
+}
+
+static inline void qemu_thread_jit_write(void)
+{
+    if (__builtin_available(macOS 11.0, *)) {
+        pthread_jit_write_protect_np(false);
+    }
+}
+#else
+static inline void qemu_thread_jit_write(void) {}
+static inline void qemu_thread_jit_execute(void) {}
+#endif
+
 #endif
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 37d17c8e88..6d017e46dd 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -186,6 +186,7 @@  cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
     }
 #endif /* DEBUG_DISAS */
 
+    qemu_thread_jit_execute();
     ret = tcg_qemu_tb_exec(env, tb_ptr);
     cpu->can_do_io = 1;
     /*
@@ -410,6 +411,7 @@  static inline void tb_add_jump(TranslationBlock *tb, int n,
 {
     uintptr_t old;
 
+    qemu_thread_jit_write();
     assert(n < ARRAY_SIZE(tb->jmp_list_next));
     qemu_spin_lock(&tb_next->jmp_lock);
 
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 73fef47148..d09c187e0f 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1670,7 +1670,9 @@  static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
 
 static void tb_phys_invalidate__locked(TranslationBlock *tb)
 {
+    qemu_thread_jit_write();
     do_tb_phys_invalidate(tb, true);
+    qemu_thread_jit_execute();
 }
 
 /* invalidate one TB
@@ -1872,6 +1874,7 @@  TranslationBlock *tb_gen_code(CPUState *cpu,
 #endif
 
     assert_memory_lock();
+    qemu_thread_jit_write();
 
     phys_pc = get_page_addr_code(env, pc);
 
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 5110f6f39c..4d734130df 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1112,6 +1112,7 @@  void tcg_prologue_init(TCGContext *s)
     s->pool_labels = NULL;
 #endif
 
+    qemu_thread_jit_write();
     /* Generate the prologue.  */
     tcg_target_qemu_prologue(s);