Message ID | 1460044433-19282-6-git-send-email-sergey.fedorov@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Sergey Fedorov <sergey.fedorov@linaro.org> writes: > From: Sergey Fedorov <serge.fdrv@gmail.com> > > Ensure direct jump patching in i386 is atomic by: > * naturally aligning a location of direct jump address; > * using atomic_read()/atomic_set() for code patching. > > Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com> > Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org> > --- > include/exec/exec-all.h | 2 +- > tcg/i386/tcg-target.inc.c | 17 +++++++++++++++++ > 2 files changed, 18 insertions(+), 1 deletion(-) > > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index 59709c9dd5c9..82399175fe80 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -312,7 +312,7 @@ void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr); > static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) > { > /* patch the branch destination */ > - stl_le_p((void*)jmp_addr, addr - (jmp_addr + 4)); > + atomic_set((int32_t *)jmp_addr, addr - (jmp_addr + 4)); > /* no need to flush icache explicitly */ > } > #elif defined(__s390x__) > diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c > index 9187d34caf6d..3ffb7b3124d8 100644 > --- a/tcg/i386/tcg-target.inc.c > +++ b/tcg/i386/tcg-target.inc.c > @@ -1123,6 +1123,19 @@ static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest) > tcg_out_branch(s, 0, dest); > } > > +static void tcg_out_nopn(TCGContext *s, int n) > +{ > + static const uint8_t nop1[] = { 0x90 }; > + static const uint8_t nop2[] = { 0x66, 0x90 }; > + static const uint8_t nop3[] = { 0x8d, 0x76, 0x00 }; > + static const uint8_t *const nopn[] = { nop1, nop2, nop3 }; > + int i; > + assert(n <= ARRAY_SIZE(nopn)); > + for (i = 0; i < n; ++i) { > + tcg_out8(s, nopn[n - 1][i]); > + } > +} *shudder* I recall x86 instruction encoding is weird. Maybe a comment for the function to describe the 3 forms of NOP we have here? > + > #if defined(CONFIG_SOFTMMU) > /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, > * int mmu_idx, uintptr_t ra) > @@ -1777,6 +1790,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, > case INDEX_op_goto_tb: > if (s->tb_jmp_offset) { > /* direct jump method */ > + /* align jump displacement for atomic pathing */ s/pathing/patching/ > + if (((uintptr_t)s->code_ptr & 3) != 3) { > + tcg_out_nopn(s, 3 - ((uintptr_t)s->code_ptr & 3)); > + } apropos my previous comments. I think the intention could be made clearer the use of well named helper functions to check alignment and calculate number elements until next alignment. > tcg_out8(s, OPC_JMP_long); /* jmp im */ > s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); > tcg_out32(s, 0); -- Alex Bennée
On 20/04/16 12:55, Alex Bennée wrote: > Sergey Fedorov <sergey.fedorov@linaro.org> writes: >> diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c >> index 9187d34caf6d..3ffb7b3124d8 100644 >> --- a/tcg/i386/tcg-target.inc.c >> +++ b/tcg/i386/tcg-target.inc.c >> @@ -1123,6 +1123,19 @@ static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest) >> tcg_out_branch(s, 0, dest); >> } >> >> +static void tcg_out_nopn(TCGContext *s, int n) >> +{ >> + static const uint8_t nop1[] = { 0x90 }; >> + static const uint8_t nop2[] = { 0x66, 0x90 }; >> + static const uint8_t nop3[] = { 0x8d, 0x76, 0x00 }; >> + static const uint8_t *const nopn[] = { nop1, nop2, nop3 }; >> + int i; >> + assert(n <= ARRAY_SIZE(nopn)); >> + for (i = 0; i < n; ++i) { >> + tcg_out8(s, nopn[n - 1][i]); >> + } >> +} > *shudder* I recall x86 instruction encoding is weird. Maybe a comment > for the function to describe the 3 forms of NOP we have here? Okay. > >> + >> #if defined(CONFIG_SOFTMMU) >> /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, >> * int mmu_idx, uintptr_t ra) >> @@ -1777,6 +1790,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, >> case INDEX_op_goto_tb: >> if (s->tb_jmp_offset) { >> /* direct jump method */ >> + /* align jump displacement for atomic pathing */ > s/pathing/patching/ Nice catch, thanks :) Kind regards, Sergey
On 04/20/2016 02:55 AM, Alex Bennée wrote: >> +static void tcg_out_nopn(TCGContext *s, int n) >> +{ >> + static const uint8_t nop1[] = { 0x90 }; >> + static const uint8_t nop2[] = { 0x66, 0x90 }; >> + static const uint8_t nop3[] = { 0x8d, 0x76, 0x00 }; >> + static const uint8_t *const nopn[] = { nop1, nop2, nop3 }; >> + int i; >> + assert(n <= ARRAY_SIZE(nopn)); >> + for (i = 0; i < n; ++i) { >> + tcg_out8(s, nopn[n - 1][i]); >> + } >> +} > > *shudder* I recall x86 instruction encoding is weird. Maybe a comment > for the function to describe the 3 forms of NOP we have here? I think I'd prefer to drop the tables and do /* Emit 1 or 2 operand size prefixes for the standard one byte nop, xchg %eax,%eax, forming xchg %ax,%ax. All cores accept the duplicate prefix, and all of the interesting recent cores can decode and discard the duplicates in a single cycle. */ for (i = 1; i < n; ++i) { tcg_out8(s, 0x66); } tcg_out8(s, 0x90); r~
On 20/04/16 18:04, Richard Henderson wrote: > On 04/20/2016 02:55 AM, Alex Bennée wrote: >>> +static void tcg_out_nopn(TCGContext *s, int n) >>> +{ >>> + static const uint8_t nop1[] = { 0x90 }; >>> + static const uint8_t nop2[] = { 0x66, 0x90 }; >>> + static const uint8_t nop3[] = { 0x8d, 0x76, 0x00 }; >>> + static const uint8_t *const nopn[] = { nop1, nop2, nop3 }; >>> + int i; >>> + assert(n <= ARRAY_SIZE(nopn)); >>> + for (i = 0; i < n; ++i) { >>> + tcg_out8(s, nopn[n - 1][i]); >>> + } >>> +} >> >> *shudder* I recall x86 instruction encoding is weird. Maybe a comment >> for the function to describe the 3 forms of NOP we have here? > > I think I'd prefer to drop the tables and do > > /* Emit 1 or 2 operand size prefixes for the standard one byte nop, > xchg %eax,%eax, forming xchg %ax,%ax. All cores accept the > duplicate prefix, and all of the interesting recent cores can > decode and discard the duplicates in a single cycle. */ > for (i = 1; i < n; ++i) { > tcg_out8(s, 0x66); > } > tcg_out8(s, 0x90); It's fine if you are sure about that :) Kind regards, Sergey
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 59709c9dd5c9..82399175fe80 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -312,7 +312,7 @@ void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr); static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) { /* patch the branch destination */ - stl_le_p((void*)jmp_addr, addr - (jmp_addr + 4)); + atomic_set((int32_t *)jmp_addr, addr - (jmp_addr + 4)); /* no need to flush icache explicitly */ } #elif defined(__s390x__) diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 9187d34caf6d..3ffb7b3124d8 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -1123,6 +1123,19 @@ static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest) tcg_out_branch(s, 0, dest); } +static void tcg_out_nopn(TCGContext *s, int n) +{ + static const uint8_t nop1[] = { 0x90 }; + static const uint8_t nop2[] = { 0x66, 0x90 }; + static const uint8_t nop3[] = { 0x8d, 0x76, 0x00 }; + static const uint8_t *const nopn[] = { nop1, nop2, nop3 }; + int i; + assert(n <= ARRAY_SIZE(nopn)); + for (i = 0; i < n; ++i) { + tcg_out8(s, nopn[n - 1][i]); + } +} + #if defined(CONFIG_SOFTMMU) /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * int mmu_idx, uintptr_t ra) @@ -1777,6 +1790,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* direct jump method */ + /* align jump displacement for atomic pathing */ + if (((uintptr_t)s->code_ptr & 3) != 3) { + tcg_out_nopn(s, 3 - ((uintptr_t)s->code_ptr & 3)); + } tcg_out8(s, OPC_JMP_long); /* jmp im */ s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); tcg_out32(s, 0);