Message ID | 20190301115413.27153-14-david@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | s390x/tcg: Vector Instruction Support Part 1 | expand |
On 3/1/19 3:53 AM, David Hildenbrand wrote: > + /* > + * Check for possible access exceptions by trying to load the last > + * element. The first element will be checked first next. > + */ > + t = tcg_temp_new_i64(); > + gen_addi_and_wrap_i64(s, t, o->addr1, (v3 - v1) * 16 + 8); > + tcg_gen_qemu_ld_i64(t, t, get_mem_index(s), MO_TEQ); qemu_ld expands to enough code that it is a shame to discard this value and reload it during this loop. Perhaps load this to t2... > + > + for (;; v1++) { > + tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ); > + write_vec_element_i64(t, v1, 0, ES_64); Move v1 == v3 break here... > + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); > + tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ); > + write_vec_element_i64(t, v1, 1, ES_64); > + if (v1 == v3) { > + break; > + } > + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); > + } ... and store t2 into v3 element 1 after the loop. r~
On 01.03.19 17:26, Richard Henderson wrote: > On 3/1/19 3:53 AM, David Hildenbrand wrote: >> + /* >> + * Check for possible access exceptions by trying to load the last >> + * element. The first element will be checked first next. >> + */ >> + t = tcg_temp_new_i64(); >> + gen_addi_and_wrap_i64(s, t, o->addr1, (v3 - v1) * 16 + 8); >> + tcg_gen_qemu_ld_i64(t, t, get_mem_index(s), MO_TEQ); > > qemu_ld expands to enough code that it is a shame to discard this value and > reload it during this loop. Perhaps load this to t2... > >> + >> + for (;; v1++) { >> + tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ); >> + write_vec_element_i64(t, v1, 0, ES_64); > > Move v1 == v3 break here... > >> + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); >> + tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ); >> + write_vec_element_i64(t, v1, 1, ES_64); >> + if (v1 == v3) { >> + break; >> + } >> + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); >> + } > > ... and store t2 into v3 element 1 after the loop. Yes, makes perfect sense, thanks!
On 01.03.19 12:53, David Hildenbrand wrote: > Try to load the last element first. Access to the first element will > be checked afterwards. This way, we can guarantee that the vector is > not modified before we checked for all possible exceptions. (16 vectors > cannot cross more than two pages) > > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > target/s390x/insn-data.def | 2 ++ > target/s390x/translate_vx.inc.c | 34 +++++++++++++++++++++++++++++++++ > 2 files changed, 36 insertions(+) > > diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def > index 2b36205c84..fa0f3a9003 100644 > --- a/target/s390x/insn-data.def > +++ b/target/s390x/insn-data.def > @@ -1000,6 +1000,8 @@ > F(0xe721, VLGV, VRS_c, V, la2, 0, r1, 0, vlgv, 0, IF_VEC) > /* VECTOR LOAD LOGICAL ELEMENT AND ZERO */ > F(0xe704, VLLEZ, VRX, V, la2, 0, 0, 0, vllez, 0, IF_VEC) > +/* VECTOR LOAD MULTIPLE */ > + F(0xe736, VLM, VRS_a, V, la2, 0, 0, 0, vlm, 0, IF_VEC) > > #ifndef CONFIG_USER_ONLY > /* COMPARE AND SWAP AND PURGE */ > diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c > index cdad2a52f0..93f9c0f804 100644 > --- a/target/s390x/translate_vx.inc.c > +++ b/target/s390x/translate_vx.inc.c > @@ -406,3 +406,37 @@ static DisasJumpType op_vllez(DisasContext *s, DisasOps *o) > tcg_temp_free_i64(t); > return DISAS_NEXT; > } > + > +static DisasJumpType op_vlm(DisasContext *s, DisasOps *o) > +{ > + const uint8_t v3 = get_field(s->fields, v3); > + uint8_t v1 = get_field(s->fields, v1); > + TCGv_i64 t; > + > + while (v3 < v1 || (v3 - v1 + 1) > 16) { while -> if, funny that this works > + gen_program_exception(s, PGM_SPECIFICATION); > + return DISAS_NORETURN; > + } > + > + /* > + * Check for possible access exceptions by trying to load the last > + * element. The first element will be checked first next. > + */ > + t = tcg_temp_new_i64(); > + gen_addi_and_wrap_i64(s, t, o->addr1, (v3 - v1) * 16 + 8); > + tcg_gen_qemu_ld_i64(t, t, get_mem_index(s), MO_TEQ); > + > + for (;; v1++) { > + tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ); > + write_vec_element_i64(t, v1, 0, ES_64); > + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); > + tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ); > + write_vec_element_i64(t, v1, 1, ES_64); > + if (v1 == v3) { > + break; > + } > + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); > + } > + tcg_temp_free_i64(t); > + return DISAS_NEXT; > +} >
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index 2b36205c84..fa0f3a9003 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -1000,6 +1000,8 @@ F(0xe721, VLGV, VRS_c, V, la2, 0, r1, 0, vlgv, 0, IF_VEC) /* VECTOR LOAD LOGICAL ELEMENT AND ZERO */ F(0xe704, VLLEZ, VRX, V, la2, 0, 0, 0, vllez, 0, IF_VEC) +/* VECTOR LOAD MULTIPLE */ + F(0xe736, VLM, VRS_a, V, la2, 0, 0, 0, vlm, 0, IF_VEC) #ifndef CONFIG_USER_ONLY /* COMPARE AND SWAP AND PURGE */ diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c index cdad2a52f0..93f9c0f804 100644 --- a/target/s390x/translate_vx.inc.c +++ b/target/s390x/translate_vx.inc.c @@ -406,3 +406,37 @@ static DisasJumpType op_vllez(DisasContext *s, DisasOps *o) tcg_temp_free_i64(t); return DISAS_NEXT; } + +static DisasJumpType op_vlm(DisasContext *s, DisasOps *o) +{ + const uint8_t v3 = get_field(s->fields, v3); + uint8_t v1 = get_field(s->fields, v1); + TCGv_i64 t; + + while (v3 < v1 || (v3 - v1 + 1) > 16) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* + * Check for possible access exceptions by trying to load the last + * element. The first element will be checked first next. + */ + t = tcg_temp_new_i64(); + gen_addi_and_wrap_i64(s, t, o->addr1, (v3 - v1) * 16 + 8); + tcg_gen_qemu_ld_i64(t, t, get_mem_index(s), MO_TEQ); + + for (;; v1++) { + tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ); + write_vec_element_i64(t, v1, 0, ES_64); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ); + write_vec_element_i64(t, v1, 1, ES_64); + if (v1 == v3) { + break; + } + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + } + tcg_temp_free_i64(t); + return DISAS_NEXT; +}