@@ -26,9 +26,6 @@ require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(write_test_code);
-my $periodic_reg_random = 1;
-my $enable_aarch64_ld1 = 0;
-
# Note that we always start in ARM mode even if the C code was compiled for
# thumb because we are called by branch to a lsbit-clear pointer.
# is_thumb tracks the mode we're actually currently in (ie should we emit
@@ -45,6 +42,8 @@ my $test_thumb = 0; # should test code be Thumb mode?
# Maximum alignment restriction permitted for a memory op.
my $MAXALIGN = 64;
+# Maximum offset permitted for a memory op.
+my $MEMBLOCKLEN = 8192;
# An instruction pattern as parsed from the config file turns into
# a record like this:
@@ -60,19 +59,11 @@ my $MAXALIGN = 64;
# Valid block names (keys in blocks hash)
my %valid_blockname = ( constraints => 1, memory => 1 );
-# for thumb only
-sub thumb_align4()
-{
- if ($bytecount & 3) {
- insn16(0xbf00); # NOP
- }
-}
-
# used for aarch64 only for now
sub data_barrier()
{
if ($is_aarch64) {
- insn32(0xd5033f9f); # DSB SYS
+ printf "\tdsb\tsy\n";
}
}
@@ -94,23 +85,35 @@ my $OP_SETMEMBLOCK = 2; # r0 is address of memory block (8192 bytes)
my $OP_GETMEMBLOCK = 3; # add the address of memory block to r0
my $OP_COMPAREMEM = 4; # compare memory block
+sub xr($)
+{
+ my ($reg) = @_;
+ if (!$is_aarch64) {
+ return "r$reg";
+ } elsif ($reg == 31) {
+ return "xzr";
+ } else {
+ return "x$reg";
+ }
+}
+
sub write_thumb_risuop($)
{
my ($op) = @_;
- insn16(0xdee0 | $op);
+ printf "\t.inst.n\t%#x\n", 0xdee0 | $op;
}
sub write_arm_risuop($)
{
my ($op) = @_;
- insn32(0xe7fe5af0 | $op);
+ printf "\t.inst\t%#x\n", 0xe7fe5af0 | $op;
}
sub write_aarch64_risuop($)
{
# instr with bits (28:27) == 0 0 are UNALLOCATED
my ($op) = @_;
- insn32(0x00005af0 | $op);
+ printf "\t.inst\t%#x\n", 0x00005af0 | $op;
}
sub write_risuop($)
@@ -125,17 +128,22 @@ sub write_risuop($)
}
}
+sub write_data32($)
+{
+ my ($data) = @_;
+ printf "\t.word\t%#08x\n", $data;
+}
+
sub write_switch_to_thumb()
{
# Switch to thumb if we're not already there
if (!$is_thumb) {
- # Note that we have to clean up r0 afterwards
- # so it isn't tainted with a value which depends
- # on PC (and which might differ between hw and
- # qemu/valgrind/etc)
- insn32(0xe28f0001); # add r0, pc, #1
- insn32(0xe12fff10); # bx r0
- insn16(0x4040); # eor r0,r0 (enc T1)
+ # Note that we have to clean up R0 afterwards so it isn't
+ # tainted with a value which depends on PC.
+ printf "\tadd\tr0, pc, #1\n";
+ printf "\tbx\tr0\n";
+ printf ".thumb\n";
+ printf "\teors\tr0, r0\n";
$is_thumb = 1;
}
}
@@ -144,9 +152,10 @@ sub write_switch_to_arm()
{
# Switch to ARM mode if we are in thumb mode
if ($is_thumb) {
- thumb_align4();
- insn16(0x4778); # bx pc
- insn16(0xbf00); # nop
+ printf "\t.balign\t4\n";
+ printf "\tbx\tpc\n";
+ printf "\tnop\n";
+ printf ".arm\n";
$is_thumb = 0;
}
}
@@ -165,122 +174,38 @@ sub write_switch_to_test_mode()
}
}
-# sign extend a 32bit reg into a 64bit reg
-sub write_sxt32($$)
-{
- my ($rd, $rn) = @_;
- die "write_sxt32: invalid operation for this arch.\n" if (!$is_aarch64);
-
- insn32(0x93407c00 | $rn << 5 | $rd);
-}
-
sub write_add_rri($$$)
{
my ($rd, $rn, $i) = @_;
- my $sh;
-
- die "write_add_rri: invalid operation for this arch.\n" if (!$is_aarch64);
-
- if ($i >= 0 && $i < 0x1000) {
- $sh = 0;
- } elsif (($i & 0xfff) || $i >= 0x1000000) {
- die "invalid immediate for this arch,\n";
- } else {
- $sh = 1;
- $i >>= 12;
- }
- insn32(0x91000000 | ($rd << 0) | ($rn << 5) | ($i << 10) | ($sh << 22));
+ printf "\tadd\t%s, %s, #%d\n", xr($rd), xr($rn), $i;
}
sub write_sub_rrr($$$)
{
my ($rd, $rn, $rm) = @_;
-
- if ($is_aarch64) {
- insn32(0xcb000000 | ($rm << 16) | ($rn << 5) | $rd);
-
- } elsif ($is_thumb) {
- # enc T2
- insn16(0xeba0 | $rn);
- insn16(0x0000 | ($rd << 8) | $rm);
- } else {
- # enc A1
- insn32(0xe0400000 | ($rn << 16) | ($rd << 12) | $rm);
- }
+ printf "\tsub\t%s, %s, %s\n", xr($rd), xr($rn), xr($rm);
}
# valid shift types
-my $SHIFT_LSL = 0;
-my $SHIFT_LSR = 1;
-my $SHIFT_ASR = 2;
-my $SHIFT_ROR = 3;
+my $SHIFT_LSL = "lsl";
+my $SHIFT_LSR = "lsr";
+my $SHIFT_ASR = "asr";
+my $SHIFT_ROR = "ror";
sub write_sub_rrrs($$$$$)
{
# sub rd, rn, rm, shifted
my ($rd, $rn, $rm, $type, $imm) = @_;
$type = $SHIFT_LSL if $imm == 0;
- my $bits = $is_aarch64 ? 64 : 32;
- if ($imm == $bits && ($type == $SHIFT_LSR || $type == $SHIFT_ASR)) {
- $imm = 0;
- }
- die "write_sub_rrrs: bad shift immediate $imm\n" if $imm < 0 || $imm > ($bits - 1);
-
- if ($is_aarch64) {
- insn32(0xcb000000 | ($type << 22) | ($rm << 16) | ($imm << 10) | ($rn << 5) | $rd);
-
- } elsif ($is_thumb) {
- # enc T2
- my ($imm3, $imm2) = ($imm >> 2, $imm & 3);
- insn16(0xeba0 | $rn);
- insn16(($imm3 << 12) | ($rd << 8) | ($imm2 << 6) | ($type << 4) | $rm);
- } else {
- # enc A1
- insn32(0xe0400000 | ($rn << 16) | ($rd << 12) | ($imm << 7) | ($type << 5) | $rm);
- }
+ printf "\tsub\t%s, %s, %s, %s #%d\n",
+ xr($rd), xr($rn), xr($rm), $type, $imm;
}
sub write_mov_rr($$)
{
my ($rd, $rm) = @_;
-
- if ($is_aarch64) {
- # using ADD 0x11000000 */
- insn32(0x91000000 | ($rm << 5) | $rd);
-
- } elsif ($is_thumb) {
- # enc T3
- insn16(0xea4f);
- insn16(($rd << 8) | $rm);
- } else {
- # enc A1
- insn32(0xe1a00000 | ($rd << 12) | $rm);
- }
-}
-
-sub write_mov_ri16($$$)
-{
- # Write 16 bits of immediate to register.
- my ($rd, $imm, $is_movt) = @_;
-
- die "write_mov_ri16: invalid operation for this arch.\n" if ($is_aarch64);
- die "write_mov_ri16: immediate $imm out of range\n" if (($imm & 0xffff0000) != 0);
-
- if ($is_thumb) {
- # enc T3
- my ($imm4, $i, $imm3, $imm8) = (($imm & 0xf000) >> 12,
- ($imm & 0x0800) >> 11,
- ($imm & 0x0700) >> 8,
- ($imm & 0x00ff));
- insn16(0xf240 | ($is_movt << 7) | ($i << 10) | $imm4);
- insn16(($imm3 << 12) | ($rd << 8) | $imm8);
- } else {
- # enc A2
- my ($imm4, $imm12) = (($imm & 0xf000) >> 12,
- ($imm & 0x0fff));
- insn32(0xe3000000 | ($is_movt << 22) | ($imm4 << 16) | ($rd << 12) | $imm12);
- }
+ printf "\tmov\t%s, %s\n", xr($rd), xr($rm);
}
sub write_mov_ri($$)
@@ -288,21 +213,21 @@ sub write_mov_ri($$)
my ($rd, $imm) = @_;
my $highhalf = ($imm >> 16) & 0xffff;
- if ($is_aarch64) {
- if ($imm < 0) {
- # MOVN
- insn32(0x92800000 | ((~$imm & 0xffff) << 5) | $rd);
- # MOVK, LSL 16
- insn32(0xf2a00000 | ($highhalf << 5) | $rd) if $highhalf != 0xffff;
- } else {
- # MOVZ
- insn32(0x52800000 | (($imm & 0xffff) << 5) | $rd);
- # MOVK, LSL 16
- insn32(0xf2a00000 | ($highhalf << 5) | $rd) if $highhalf != 0;
+ if (!$is_aarch64) {
+ printf "\tmovw\t%s, #%#x\n", xr($rd), 0xffff & $imm;
+ if ($highhalf != 0) {
+ printf "\tmovt\t%s, #%#x\n", xr($rd), $highhalf;
+ }
+ } elsif ($imm < 0) {
+ printf "\tmovn\t%s, #%#x\n", xr($rd), 0xffff & ~$imm;
+ if ($highhalf != 0xffff) {
+ printf "\tmovk\t%s, #%#x, lsl #16\n", xr($rd), $highhalf;
}
} else {
- write_mov_ri16($rd, ($imm & 0xffff), 0);
- write_mov_ri16($rd, $highhalf, 1) if $highhalf;
+ printf "\tmovz\t%s, #%#x\n", xr($rd), 0xffff & $imm;
+ if ($highhalf != 0) {
+ printf "\tmovk\t%s, #%#x, lsl #16\n", xr($rd), $highhalf;
+ }
}
}
@@ -311,7 +236,7 @@ sub write_addpl_rri($$$)
my ($rd, $rn, $imm) = @_;
die "write_addpl: invalid operation for this arch.\n" if (!$is_aarch64);
- insn32(0x04605000 | ($rn << 16) | (($imm & 0x3f) << 5) | $rd);
+ printf "\taddpl\t%s, %s, #%d\n", xr($rd), xr($rn), $imm;
}
sub write_addvl_rri($$$)
@@ -319,7 +244,7 @@ sub write_addvl_rri($$$)
my ($rd, $rn, $imm) = @_;
die "write_addvl: invalid operation for this arch.\n" if (!$is_aarch64);
- insn32(0x04205000 | ($rn << 16) | (($imm & 0x3f) << 5) | $rd);
+ printf "\taddvl\t%s, %s, #%d\n", xr($rd), xr($rn), $imm;
}
sub write_rdvl_ri($$)
@@ -327,7 +252,7 @@ sub write_rdvl_ri($$)
my ($rd, $imm) = @_;
die "write_rdvl: invalid operation for this arch.\n" if (!$is_aarch64);
- insn32(0x04bf5000 | (($imm & 0x3f) << 5) | $rd);
+ printf "\trdvl\t%s, #%d\n", xr($rd), $imm;
}
sub write_madd_rrrr($$$$)
@@ -335,7 +260,7 @@ sub write_madd_rrrr($$$$)
my ($rd, $rn, $rm, $ra) = @_;
die "write_madd: invalid operation for this arch.\n" if (!$is_aarch64);
- insn32(0x9b000000 | ($rm << 16) | ($ra << 10) | ($rn << 5) | $rd);
+ printf "\tmadd\t%s, %s, %s, %s\n", xr($rd), xr($rn), xr($rm), xr($ra);
}
sub write_msub_rrrr($$$$)
@@ -343,13 +268,14 @@ sub write_msub_rrrr($$$$)
my ($rd, $rn, $rm, $ra) = @_;
die "write_msub: invalid operation for this arch.\n" if (!$is_aarch64);
- insn32(0x9b008000 | ($rm << 16) | ($ra << 10) | ($rn << 5) | $rd);
+ printf "\tmsub\t%s, %s, %s, %s\n", xr($rd), xr($rn), xr($rm), xr($ra);
}
sub write_mul_rrr($$$)
{
my ($rd, $rn, $rm) = @_;
- write_madd_rrrr($rd, $rn, $rm, 31);
+
+ printf "\tmul\t%s, %s, %s\n", xr($rd), xr($rn), xr($rm);
}
# write random fp value of passed precision (1=single, 2=double, 4=quad)
@@ -359,7 +285,7 @@ sub write_random_fpreg_var($)
my $randomize_low = 0;
if ($precision != 1 && $precision != 2 && $precision != 4) {
- die "write_random_fpreg: invalid precision.\n";
+ die "write_random_fpreg: invalid precision.\n";
}
my ($low, $high);
@@ -371,7 +297,7 @@ sub write_random_fpreg_var($)
} elsif ($r < 10) {
# NaN (5%)
# (plus a tiny chance of generating +-Inf)
- $randomize_low = 1;
+ $randomize_low = 1;
$high = rand(0xffffffff) | 0x7ff00000;
} elsif ($r < 15) {
# Infinity (5%)
@@ -381,83 +307,22 @@ sub write_random_fpreg_var($)
} elsif ($r < 30) {
# Denormalized number (15%)
# (plus tiny chance of +-0)
- $randomize_low = 1;
+ $randomize_low = 1;
$high = rand(0xffffffff) & ~0x7ff00000;
} else {
# Normalized number (70%)
# (plus a small chance of the other cases)
- $randomize_low = 1;
+ $randomize_low = 1;
$high = rand(0xffffffff);
}
for (my $i = 1; $i < $precision; $i++) {
- if ($randomize_low) {
- $low = rand(0xffffffff);
- }
- insn32($low);
+ if ($randomize_low) {
+ $low = rand(0xffffffff);
+ }
+ printf "\t.word\t%#08x\n", $low;
}
- insn32($high);
-}
-
-sub write_random_double_fpreg()
-{
- my ($low, $high);
- my $r = rand(100);
- if ($r < 5) {
- # +-0 (5%)
- $low = $high = 0;
- $high |= 0x80000000 if (rand() < 0.5);
- } elsif ($r < 10) {
- # NaN (5%)
- # (plus a tiny chance of generating +-Inf)
- $low = rand(0xffffffff);
- $high = rand(0xffffffff) | 0x7ff00000;
- } elsif ($r < 15) {
- # Infinity (5%)
- $low = 0;
- $high = 0x7ff00000;
- $high |= 0x80000000 if (rand() < 0.5);
- } elsif ($r < 30) {
- # Denormalized number (15%)
- # (plus tiny chance of +-0)
- $low = rand(0xffffffff);
- $high = rand(0xffffffff) & ~0x7ff00000;
- } else {
- # Normalized number (70%)
- # (plus a small chance of the other cases)
- $low = rand(0xffffffff);
- $high = rand(0xffffffff);
- }
- insn32($low);
- insn32($high);
-}
-
-sub write_random_single_fpreg()
-{
- my ($value);
- my $r = rand(100);
- if ($r < 5) {
- # +-0 (5%)
- $value = 0;
- $value |= 0x80000000 if (rand() < 0.5);
- } elsif ($r < 10) {
- # NaN (5%)
- # (plus a tiny chance of generating +-Inf)
- $value = rand(0xffffffff) | 0x7f800000;
- } elsif ($r < 15) {
- # Infinity (5%)
- $value = 0x7f800000;
- $value |= 0x80000000 if (rand() < 0.5);
- } elsif ($r < 30) {
- # Denormalized number (15%)
- # (plus tiny chance of +-0)
- $value = rand(0xffffffff) & ~0x7f800000;
- } else {
- # Normalized number (70%)
- # (plus a small chance of the other cases)
- $value = rand(0xffffffff);
- }
- insn32($value);
+ printf "\t.word\t%#08x\n", $high;
}
sub write_random_arm_fpreg()
@@ -479,110 +344,92 @@ sub write_random_arm_fpreg()
sub write_random_arm_regdata($)
{
my ($fp_enabled) = @_;
- # TODO hardcoded, also no d16-d31 initialisation
my $vfp = $fp_enabled ? 2 : 0; # 0 : no vfp, 1 : vfpd16, 2 : vfpd32
write_switch_to_arm();
-
+
# initialise all registers
- if ($vfp == 1) {
- insn32(0xe28f0008); # add r0, pc, #8
- insn32(0xecb00b20); # vldmia r0!, {d0-d15}
- } elsif ($vfp == 2) {
- insn32(0xe28f000c); # add r0, pc, #12
- insn32(0xecb00b20); # vldmia r0!, {d0-d15}
- insn32(0xecf00b20); # vldmia r0!, {d16-d31}
- } else {
- insn32(0xe28f0004); # add r0, pc, #4
- }
-
- insn32(0xe8905fff); # ldmia r0, {r0-r12,r14}
- my $datalen = 14;
- $datalen += (32 * $vfp);
- insn32(0xea000000 + ($datalen-1)); # b next
+ printf "\tadr\tr0, 0f\n";
+ printf "\tb\t1f\n";
+
+ printf "\t.balign %d\n", $fp_enabled ? 8 : 4;
+ printf "0:\n";
+
for (0..(($vfp * 16) - 1)) { # NB: never done for $vfp == 0
write_random_arm_fpreg();
}
# .word [14 words of data for r0..r12,r14]
for (0..13) {
- insn32(rand(0xffffffff));
+ write_data32(rand(0xffffffff));
}
- # next:
- # clear the flags (NZCVQ and GE): msr APSR_nzcvqg, #0
- insn32(0xe32cf000);
+
+ printf "1:\n";
+ if ($vfp == 1) {
+ printf "\tvldmia\tr0!, {d0-d15}\n";
+ } elsif ($vfp == 2) {
+ printf "\tvldmia\tr0!, {d0-d15}\n";
+ printf "\tvldmia\tr0!, {d16-d31}\n";
+ }
+ printf "\tldmia\tr0, {r0-r12,r14}\n";
+
+ # clear the flags (NZCVQ and GE)
+ printf "\tmsr\tAPSR_nzcvqg, #0\n";
}
sub write_random_aarch64_fpdata()
{
# load floating point / SIMD registers
- my $align = 16;
- my $datalen = 32 * 16 + $align;
- write_pc_adr(0, (3 * 4) + ($align - 1)); # insn 1
- write_align_reg(0, $align); # insn 2
- write_jump_fwd($datalen); # insn 3
+ printf "\t.data\n";
+ printf "\t.balign\t16\n";
+ printf "1:\n";
- # align safety
- for (my $i = 0; $i < ($align / 4); $i++) {
- insn32(rand(0xffffffff));
- };
-
- for (my $rt = 0; $rt <= 31; $rt++) {
- write_random_fpreg_var(4); # quad
+ for (0..31) {
+ write_random_fpreg_var(4); # quad
}
- if ($enable_aarch64_ld1) {
- # enable only when we have ld1
- for (my $rt = 0; $rt <= 31; $rt += 4) {
- insn32(0x4cdf2c00 | $rt); # ld1 {v0.2d-v3.2d}, [x0], #64
- }
- } else {
- # temporarily use LDP instead
- for (my $rt = 0; $rt <= 31; $rt += 2) {
- insn32(0xacc10000 | ($rt + 1) << 10 | ($rt)); # ldp q0,q1,[x0],#32
- }
+ printf "\t.text\n";
+ printf "\tadr\tx0, 1b\n";
+
+ for (my $rt = 0; $rt < 32; $rt += 4) {
+ printf "\tld1\t{v%d.2d-v%d.2d}, [x0], #64\n", $rt, $rt + 3;
}
}
sub write_random_aarch64_svedata()
{
+ # Max SVE size
+ my $vq = 16;
+
# Load SVE registers
- my $align = 16;
- my $vq = 16; # quadwords per vector
- my $veclen = 32 * $vq * 16;
- my $predlen = 16 * $vq * 2;
- my $datalen = $veclen + $predlen;
+ printf "\t.data\n";
+ printf "\t.balign\t16\n";
+ printf "1:\n";
- write_pc_adr(0, 2 * 4); # insn 1
- write_jump_fwd($datalen); # insn 2
+ for (my $i = 0; $i < 32 * 16 * $vq; $i += 16) {
+ write_random_fpreg_var(4); # quad
+ }
+ for (my $i = 0; $i < 16 * 2 * $vq; $i += 4) {
+ write_data32(rand(0xffffffff));
+ }
+
+ printf "\t.text\n";
+ printf "\tadr\tx0, 1b\n";
for (my $rt = 0; $rt <= 31; $rt++) {
- for (my $q = 0; $q < $vq; $q++) {
- write_random_fpreg_var(4); # quad
- }
+ printf "\tldr\tz%d, [x0, #%d, mul vl]\n", $rt, $rt;
}
- for (my $rt = 0; $rt <= 15; $rt++) {
- for (my $q = 0; $q < $vq; $q++) {
- insn16(rand(0xffff));
- }
- }
-
- for (my $rt = 0; $rt <= 31; $rt++) {
- # ldr z$rt, [x0, #$rt, mul vl]
- insn32(0x85804000 + $rt + (($rt & 7) << 10) + (($rt & 0x18) << 13));
- }
-
- write_add_rri(0, 0, $veclen);
+ write_add_rri(0, 0, 32 * 16 * $vq);
for (my $rt = 0; $rt <= 15; $rt++) {
- # ldr p$rt, [x0, #$pt, mul vl]
- insn32(0x85800000 + $rt + (($rt & 7) << 10) + (($rt & 0x18) << 13));
+ printf "\tldr\tp%d, [x0, #%d, mul vl]\n", $rt, $rt;
}
}
sub write_random_aarch64_regdata($$)
{
my ($fp_enabled, $sve_enabled) = @_;
+
# clear flags
- insn32(0xd51b421f); # msr nzcv, xzr
+ printf "\tmsr\tnzcv, xzr\n";
# Load floating point / SIMD registers
# (one or the other as they overlap)
@@ -612,65 +459,6 @@ sub write_random_register_data($$)
write_risuop($OP_COMPARE);
}
-# put PC + offset into a register.
-# this must emit an instruction of 4 bytes.
-sub write_pc_adr($$)
-{
- my ($rd, $imm) = @_;
-
- if ($is_aarch64) {
- # C2.3.5 PC-relative address calculation
- # The ADR instruction adds a signed, 21-bit value of the pc that fetched this instruction,
- my ($immhi, $immlo) = ($imm >> 2, $imm & 0x3);
- insn32(0x10000000 | $immlo << 29 | $immhi << 5 | $rd);
- } else {
- # A.2.3 ARM Core Registers:
- # When executing an ARM instruction, PC reads as the address of the current insn plus 8.
- $imm -= 8;
- insn32(0xe28f0000 | $rd << 12 | $imm);
-
- }
-}
-
-# clear bits in register to satisfy alignment.
-# Must use exactly 4 instruction-bytes (one instruction on arm)
-sub write_align_reg($$)
-{
- my ($rd, $align) = @_;
- die "bad alignment!" if ($align < 2);
-
- if ($is_aarch64) {
- # and rd, rd, ~(align - 1) ; A64 BIC imm is an alias for AND
-
- # Unfortunately we need to calculate the immr/imms/N values to get
- # our desired immediate value. In this case we want to use an element
- # size of 64, which means that N is 1, immS is the length of run of
- # set bits in the mask, and immR is the rotation.
- # N = 1, immR = 64 - ctz, imms = 63 - ctz
- # (Note that an all bits-set mask is not encodable here, but
- # the requirement for $align to be at least 2 avoids that.)
- my $cnt = ctz($align);
- insn32(0x92000000 | 1 << 22 | (64 - $cnt) << 16 | (63 - $cnt) << 10 | $rd << 5 | $rd);
- } else {
- # bic rd, rd, (align - 1)
- insn32(0xe3c00000 | $rd << 16 | $rd << 12 | ($align - 1));
- }
-}
-
-# jump ahead of n bytes starting from next instruction
-sub write_jump_fwd($)
-{
- my ($len) = @_;
-
- if ($is_aarch64) {
- # b pc + len
- insn32(0x14000000 | (($len / 4) + 1));
- } else {
- # b pc + len
- insn32(0xea000000 | (($len / 4) - 1));
- }
-}
-
sub write_memblock_setup()
{
# Write code which sets up the memory block for loads and stores.
@@ -678,36 +466,35 @@ sub write_memblock_setup()
# of random data, aligned to the maximum desired alignment.
write_switch_to_arm();
- my $align = $MAXALIGN;
- my $datalen = 8192 + $align;
- if (($align > 255) || !is_pow_of_2($align) || $align < 4) {
- die "bad alignment!";
+ printf "\tadr\t%s, 2f\n", xr(0);
+ if ($is_aarch64) {
+ printf "\t.data\n";
+ } else {
+ printf "\tb\t3f\n";
}
- # set r0 to (datablock + (align-1)) & ~(align-1)
- # datablock is at PC + (4 * 4 instructions) = PC + 16
- write_pc_adr(0, (4 * 4) + ($align - 1)); # insn 1
- write_align_reg(0, $align); # insn 2
- write_risuop($OP_SETMEMBLOCK); # insn 3
- write_jump_fwd($datalen); # insn 4
+ printf "\t.balign\t%d\n", $MAXALIGN;
+ printf "2:\n";
- for (my $i = 0; $i < $datalen / 4; $i++) {
- insn32(rand(0xffffffff));
+ for (my $i = 0; $i < $MEMBLOCKLEN; $i += 4) {
+ write_data32(rand(0xffffffff));
}
- # next:
+ if ($is_aarch64) {
+ printf "\t.text\n";
+ } else {
+ printf "3:\n";
+ }
+
+ write_risuop($OP_SETMEMBLOCK);
}
sub write_set_fpscr_arm($)
{
my ($fpscr) = @_;
write_switch_to_arm();
- # movw r0, imm16
- insn32(0xe3000000 | ($fpscr & 0xfff) | (($fpscr & 0xf000) << 4));
- # movt r0, imm16
- insn32(0xe3400000 | (($fpscr & 0xf0000000) >> 12) | (($fpscr & 0x0fff0000) >> 16));
- # vmsr fpscr, r0
- insn32(0xeee10a10);
+ write_mov_ri(0, $fpscr);
+ printf "\tvmsr\tfpscr, r0\n";
}
sub write_set_fpscr_aarch64($)
@@ -715,10 +502,9 @@ sub write_set_fpscr_aarch64($)
# on aarch64 we have split fpcr and fpsr registers.
# Status will be initialized to 0, while user param controls fpcr.
my ($fpcr) = @_;
- write_mov_ri(0, 0);
- insn32(0xd51b4420); # msr fpsr, x0
+ printf "\tmsr\tfpsr, xzr\n";
write_mov_ri(0, $fpcr);
- insn32(0xd51b4400); # msr fpcr, x0
+ printf "\tmsr\tfpcr, x0\n";
}
sub write_set_fpscr($)
@@ -752,17 +538,12 @@ sub align($)
$alignment_restriction = $a;
}
-# XXX claudio: this seems to get the full address, not the offset.
-sub write_get_offset()
+sub get_offset()
{
- # Emit code to get a random offset within the memory block, of the
- # right alignment, into r0
# We require the offset to not be within 256 bytes of either
# end, to (more than) allow for the worst case data transfer, which is
# 16 * 64 bit regs
- my $offset = (rand(2048 - 512) + 256) & ~($alignment_restriction - 1);
- write_mov_ri(0, $offset);
- write_risuop($OP_GETMEMBLOCK);
+ return (rand($MEMBLOCKLEN - 512) + 256) & ~($alignment_restriction - 1);
}
# Return the log2 of the memory size of an operation described by dtype.
@@ -774,15 +555,21 @@ sub dtype_msz($)
return $dtl >= $dth ? $dth : 3 - $dth;
}
-sub reg($@)
+sub reg_plus_imm($$@)
{
- my ($base, @trashed) = @_;
- write_get_offset();
- # Now r0 is the address we want to do the access to,
- # so just move it into the basereg
- if ($base != 0) {
- write_mov_rr($base, 0);
- write_mov_ri(0, 0);
+ # Handle reg + immediate addressing mode
+ my ($base, $imm, @trashed) = @_;
+ my $offset = get_offset() - $imm;
+
+ if ($is_aarch64) {
+ printf "\tadr\tx%d, 2b%+d\n", $base, $offset;
+ } else {
+ write_mov_ri(0, $offset);
+ write_risuop($OP_GETMEMBLOCK);
+ if ($base != 0) {
+ write_mov_rr($base, 0);
+ write_mov_ri(0, 0);
+ }
}
if (grep $_ == $base, @trashed) {
return -1;
@@ -790,64 +577,37 @@ sub reg($@)
return $base;
}
-sub reg_plus_imm($$@)
+sub reg($@)
{
- # Handle reg + immediate addressing mode
- my ($base, $imm, @trashed) = @_;
- if ($imm == 0) {
- return reg($base, @trashed);
- }
-
- write_get_offset();
- # Now r0 is the address we want to do the access to,
- # so set the basereg by doing the inverse of the
- # addressing mode calculation, ie base = r0 - imm
- # We could do this more cleverly with a sub immediate.
- if ($base != 0) {
- write_mov_ri($base, $imm);
- write_sub_rrr($base, 0, $base);
- # Clear r0 to avoid register compare mismatches
- # when the memory block location differs between machines.
- write_mov_ri(0, 0);
- } else {
- # We borrow r1 as a temporary (not a problem
- # as long as we don't leave anything in a register
- # which depends on the location of the memory block)
- write_mov_ri(1, $imm);
- write_sub_rrr($base, 0, 1);
- }
- if (grep $_ == $base, @trashed) {
- return -1;
- }
- return $base;
+ # Handle reg addressing mode
+ my ($base, @trashed) = @_;
+ return reg_plus_imm($base, 0, @trashed);
}
sub reg_plus_imm_pl($$@)
{
# Handle reg + immediate addressing mode
my ($base, $imm, @trashed) = @_;
- if ($imm == 0) {
- return reg($base, @trashed);
- }
- write_get_offset();
+ my $offset = get_offset();
- # Now r0 is the address we want to do the access to,
- # so set the basereg by doing the inverse of the
+ printf "\tadr\tx%d, 2b+%+d\n", $base, $offset;
+
+ # Set the basereg by doing the inverse of the
# addressing mode calculation, ie base = r0 - imm
#
# Note that addpl has a 6-bit immediate, but ldr has a 9-bit
# immediate, so we need to be able to support larger immediates.
-
if (-$imm >= -32 && -$imm <= 31) {
- write_addpl_rri($base, 0, -$imm);
+ write_addpl_rri($base, $base, -$imm);
} else {
- # We borrow r1 and r2 as a temporaries (not a problem
- # as long as we don't leave anything in a register
- # which depends on the location of the memory block)
- write_mov_ri(1, 0);
- write_mov_ri(2, $imm);
- write_addpl_rri(1, 1, 1);
- write_msub_rrrr($base, 1, 2, 0);
+ # Select two temporaries (no need to zero afterward, since we don't
+ # leave anything which depends on the location of the memory block.
+ my $t1 = $base == 0 ? 1 : 0;
+ my $t2 = $base == 1 ? 2 : 1;
+ write_mov_ri($t1, 0);
+ write_addpl_rri($t1, $t1, 1);
+ write_mov_ri($t2, -$imm);
+ write_madd_rrrr($base, $t1, $t2, $base);
}
if (grep $_ == $base, @trashed) {
return -1;
@@ -855,7 +615,7 @@ sub reg_plus_imm_pl($$@)
return $base;
}
-sub reg_plus_imm_vl($$@)
+sub reg_plus_imm_vl($$$@)
{
# The usual address formulation is
# elements = VL DIV esize
@@ -865,15 +625,13 @@ sub reg_plus_imm_vl($$@)
# scale = log2(esize / msize)
# base + (imm * VL) >> scale
my ($base, $imm, $scale, @trashed) = @_;
- if ($imm == 0) {
- return reg($base, @trashed);
- }
- write_get_offset();
+ my $offset = get_offset();
+ my $t1 = $base == 0 ? 1 : 0;
+ my $t2 = $base == 1 ? 2 : 1;
- # Now r0 is the address we want to do the access to,
- # so set the basereg by doing the inverse of the
- # addressing mode calculation, ie base = r0 - imm
- #
+ printf "\tadr\tx%d, 2b+%+d\n", $base, $offset;
+
+ # Set the basereg by doing the inverse of the addressing calculation.
# Note that rdvl/addvl have a 6-bit immediate, but ldr has a 9-bit
# immediate, so we need to be able to support larger immediates.
@@ -882,18 +640,19 @@ sub reg_plus_imm_vl($$@)
my $imm_div = $imm / $mul;
if ($imm == $imm_div * $mul && -$imm_div >= -32 && -$imm_div <= 31) {
- write_addvl_rri($base, 0, -$imm_div);
+ write_addvl_rri($base, $base, -$imm_div);
} elsif ($imm >= -32 && $imm <= 31) {
- write_rdvl_ri(1, $imm);
- write_sub_rrrs($base, 0, 1, $SHIFT_ASR, $scale);
+ write_rdvl_ri($t1, $imm);
+ write_sub_rrrs($base, $base, $t1, $SHIFT_ASR, $scale);
} else {
- write_rdvl_ri(1, 1);
- write_mov_ri(2, $imm);
+ write_rdvl_ri($t1, 1);
if ($scale == 0) {
- write_msub_rrrr($base, 1, 2, 0);
+ write_mov_ri($t2, -$imm);
+ write_madd_rrrr($base, $t1, $t2, $base);
} else {
- write_mul_rrr(1, 1, 2);
- write_sub_rrrs($base, 0, 1, $SHIFT_ASR, $scale);
+ write_mov_ri($t2, $imm);
+ write_mul_rrr($t1, $t1, $t2);
+ write_sub_rrrs($base, $base, $t1, $SHIFT_ASR, $scale);
}
}
if (grep $_ == $base, @trashed) {
@@ -912,35 +671,39 @@ sub reg_plus_reg_shifted($$$@)
{
# handle reg + reg LSL imm addressing mode
my ($base, $idx, $shift, @trashed) = @_;
- if ($shift < 0 || $shift > 4 || (!$is_aarch64 && $shift == 4)) {
+ my $offset = get_offset();
+ if ($shift < 0 || $shift > 4 || (!$is_aarch64 && $shift == 4)) {
print ("\n(shift) $shift\n");
print ("\n(arch) $is_aarch64\n");
die "reg_plus_reg_shifted: bad shift size\n";
}
- my $savedidx = 0;
- if ($idx == 0) {
- # save the index into some other register for the
- # moment, because the risuop will trash r0
- $idx = 1;
- $idx++ if $idx == $base;
- $savedidx = 1;
- write_mov_rr($idx, 0);
- }
- # Get a random offset within the memory block, of the
- # right alignment.
- write_get_offset();
- # Now r0 is the address we want to do the access to,
- # so set the basereg by doing the inverse of the
- # addressing mode calculation, ie base = r0 - idx LSL imm
- # LSL x is shift type 0,
- write_sub_rrrs($base, 0, $idx, $SHIFT_LSL, $shift);
- if ($savedidx) {
- # We can move this back to r0 now
- write_mov_rr(0, $idx);
- } elsif ($base != 0) {
- write_mov_ri(0, 0);
+ if ($is_aarch64) {
+ printf "\tadr\tx%d, 2b%+d\n", $base, $offset;
+ write_sub_rrrs($base, $base, $idx, $SHIFT_LSL, $shift);
+ } else {
+ my $savedidx = 0;
+
+ if ($idx == 0) {
+ # save the index into some other register for the
+ # moment, because the risuop will trash r0
+ $idx = 1;
+ $idx++ if $idx == $base;
+ $savedidx = 1;
+ write_mov_rr($idx, 0);
+ }
+
+ write_mov_ri(0, $offset);
+ write_risuop($OP_GETMEMBLOCK);
+ write_sub_rrrs($base, 0, $idx, $SHIFT_LSL, $shift);
+
+ if ($savedidx) {
+ # We can move idx back to r0 now
+ write_mov_rr(0, $idx);
+ } elsif ($base != 0) {
+ write_mov_ri(0, 0);
+ }
}
if (grep $_ == $base, @trashed) {
return -1;
@@ -1028,21 +791,18 @@ sub gen_one_insn($$)
}
if ($is_thumb) {
- # Since the encoding diagrams in the ARM ARM give 32 bit
- # Thumb instructions as low half | high half, we
- # flip the halves here so that the input format in
- # the config file can be in the same order as the ARM.
- # For a 16 bit Thumb instruction the generated insn is in
- # the high halfword (because we didn't bother to readjust
- # all the bit positions in parse_config_file() when we
- # got to the end and found we only had 16 bits).
- insn16($insn >> 16);
if ($insnwidth == 32) {
- insn16($insn & 0xffff);
+ printf "\t.inst.w\t%#08x\n", $insn;
+ } else {
+ # For a 16 bit Thumb instruction the generated insn is in
+ # the high halfword (because we didn't bother to readjust
+ # all the bit positions in parse_config_file() when we
+ # got to the end and found we only had 16 bits).
+ printf "\t.inst.n\t%#04x\n", $insn >> 16;
}
} else {
# ARM is simple, always a 32 bit word
- insn32($insn);
+ printf "\t.inst\t%#08x\n", $insn;
}
if (defined $memblock) {
@@ -1058,8 +818,12 @@ sub gen_one_insn($$)
}
if ($basereg != -1) {
- write_mov_ri(0, 0);
- write_risuop($OP_GETMEMBLOCK);
+ if ($is_aarch64) {
+ printf "\tadr\tx0, 2b\n";
+ } else {
+ write_mov_ri(0, 0);
+ write_risuop($OP_GETMEMBLOCK);
+ }
write_sub_rrr($basereg, $basereg, 0);
write_mov_ri(0, 0);
}
@@ -1097,7 +861,15 @@ sub write_test_code($$$$$$$$)
my %insn_details = %{ $params->{ 'details' } };
my @keys = @{ $params->{ 'keys' } };
- open_bin($outfile);
+ open_asm($outfile);
+
+ printf "\t.text\n";
+ if (!$is_aarch64) {
+ printf "\t.syntax unified\n";
+ printf "\t.arm\n";
+ printf "\t.arch armv7-a\n";
+ printf "\t.fpu neon\n" if ($fp_enabled);
+ }
# convert from probability that insn will be conditional to
# probability of forcing insn to unconditional
@@ -1106,7 +878,7 @@ sub write_test_code($$$$$$$$)
# TODO better random number generator?
srand(0);
- print "Generating code using patterns: @keys...\n";
+ print STDOUT "Generating code using patterns: @keys...\n";
progress_start(78, $numinsns);
if ($fp_enabled) {
@@ -1128,7 +900,7 @@ sub write_test_code($$$$$$$$)
write_risuop($OP_COMPARE);
# Rewrite the registers periodically. This avoids the tendency
# for the VFP registers to decay to NaNs and zeroes.
- if ($periodic_reg_random && ($i % 100) == 0) {
+ if (($i % 100) == 0) {
write_random_register_data($fp_enabled, $sve_enabled);
write_switch_to_test_mode();
}
@@ -1136,7 +908,10 @@ sub write_test_code($$$$$$$$)
}
write_risuop($OP_TESTEND);
progress_end();
- close_bin();
+
+ close_asm();
+ assemble_and_link($outfile, $params->{ 'cross_prefix' },
+ $params->{ 'keep' });
}
1;
Split random data and memory blocks into .data. Use ADR label+offset to address them. Fix some bugs in the (apparently unused) SVE memory addressing. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- risugen_arm.pm | 719 +++++++++++++++++-------------------------------- 1 file changed, 247 insertions(+), 472 deletions(-)