@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -83,6 +83,16 @@ static inline bool is_preloaded(DisasContext *ctx, int num)
return test_bit(num, ctx->regs_written);
}
+static inline bool is_tmp_vreg_preloaded(DisasContext *ctx, int num)
+{
+ return test_bit(num, ctx->vregs_updated_tmp);
+}
+
+static inline bool is_future_vreg_preloaded(DisasContext *ctx, int num)
+{
+ return test_bit(num, ctx->vregs_select);
+}
+
intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
int num, bool alloc_ok);
intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
@@ -541,6 +541,75 @@ static void test_vshuff(void)
check_output_b(__LINE__, 1);
}
+static void test_load_tmp_predicated(void)
+{
+ void *p0 = buffer0;
+ void *p1 = buffer1;
+ void *pout = output;
+ bool pred = true;
+
+ for (int i = 0; i < BUFSIZE; i++) {
+ /*
+ * Load into v12 as .tmp with a predicate
+ * When the predicate is true, we get the vector from buffer1[i]
+ * When the predicate is false, we get a vector of all 1's
+ * Regardless of the predicate, the next packet should have
+ * a vector of all 1's
+ */
+ asm("v3 = vmem(%0 + #0)\n\t"
+ "r1 = #1\n\t"
+ "v12 = vsplat(r1)\n\t"
+ "p1 = !cmp.eq(%3, #0)\n\t"
+ "{\n\t"
+ " if (p1) v12.tmp = vmem(%1 + #0)\n\t"
+ " v4.w = vadd(v12.w, v3.w)\n\t"
+ "}\n\t"
+ "v4.w = vadd(v4.w, v12.w)\n\t"
+ "vmem(%2 + #0) = v4\n\t"
+ : : "r"(p0), "r"(p1), "r"(pout), "r"(pred)
+ : "r1", "p1", "v12", "v3", "v4", "v6", "memory");
+ p0 += sizeof(MMVector);
+ p1 += sizeof(MMVector);
+ pout += sizeof(MMVector);
+
+ for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+ expect[i].w[j] =
+ pred ? buffer0[i].w[j] + buffer1[i].w[j] + 1
+ : buffer0[i].w[j] + 2;
+ }
+ pred = !pred;
+ }
+
+ check_output_w(__LINE__, BUFSIZE);
+}
+
+static void test_load_cur_predicated(void)
+{
+ bool pred = true;
+ for (int i = 0; i < BUFSIZE; i++) {
+ asm volatile("p0 = !cmp.eq(%3, #0)\n\t"
+ "v3 = vmem(%0+#0)\n\t"
+ /*
+ * Preload v4 to make sure that the assignment from the
+ * packet below is not being ignored when pred is false.
+ */
+ "r0 = #0x01237654\n\t"
+ "v4 = vsplat(r0)\n\t"
+ "{\n\t"
+ " if (p0) v3.cur = vmem(%1+#0)\n\t"
+ " v4 = v3\n\t"
+ "}\n\t"
+ "vmem(%2+#0) = v4\n\t"
+ :
+ : "r"(&buffer0[i]), "r"(&buffer1[i]),
+ "r"(&output[i]), "r"(pred)
+ : "r0", "p0", "v3", "v4", "memory");
+ expect[i] = pred ? buffer1[i] : buffer0[i];
+ pred = !pred;
+ }
+ check_output_w(__LINE__, BUFSIZE);
+}
+
int main()
{
init_buffers();
@@ -578,6 +647,9 @@ int main()
test_vshuff();
+ test_load_tmp_predicated();
+ test_load_cur_predicated();
+
puts(err ? "FAIL" : "PASS");
return err ? 1 : 0;
}
@@ -173,6 +173,22 @@ def genptr_decl(f, tag, regtype, regid, regno):
f.write(" ctx_future_vreg_off(ctx, %s%sN," % \
(regtype, regid))
f.write(" 1, true);\n");
+ if regid != "y" and 'A_CONDEXEC' in hex_common.attribdict[tag]:
+ if hex_common.is_tmp_result(tag):
+ preload_test_fn = "is_tmp_vreg_preloaded"
+ else:
+ preload_test_fn = "is_future_vreg_preloaded"
+ f.write(" if (!%s(ctx, %s)) {\n" % (preload_test_fn, regN))
+ f.write(" intptr_t src_off =")
+ f.write(" offsetof(CPUHexagonState, VRegs[%s%sN]);\n"% \
+ (regtype, regid))
+ f.write(" tcg_gen_gvec_mov(MO_64, %s%sV_off,\n" % \
+ (regtype, regid))
+ f.write(" src_off,\n")
+ f.write(" sizeof(MMVector),\n")
+ f.write(" sizeof(MMVector));\n")
+ f.write(" }\n")
+
if (not hex_common.skip_qemu_helper(tag)):
f.write(" TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \
(regtype, regid))