summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Maydell2020-05-12 18:39:04 +0200
committerPeter Maydell2020-05-14 16:03:09 +0200
commite95485f85657be21135c17a9226e297c21e73360 (patch)
tree5ff4380795ba22e0687309748584014e9340e773
parenttarget/arm: Convert Neon fp VMAX/VMIN/VMAXNM/VMINNM/VRECPS/VRSQRTS to decodetree (diff)
downloadqemu-e95485f85657be21135c17a9226e297c21e73360.tar.gz
qemu-e95485f85657be21135c17a9226e297c21e73360.tar.xz
qemu-e95485f85657be21135c17a9226e297c21e73360.zip
target/arm: Convert NEON VFMA, VFMS 3-reg-same insns to decodetree
Convert the Neon floating point VFMA and VFMS insn to decodetree. These are the last insns in the 3-reg-same group so we can remove all the support/loop code from the old decoder. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200512163904.10918-18-peter.maydell@linaro.org
-rw-r--r--target/arm/neon-dp.decode3
-rw-r--r--target/arm/translate-neon.inc.c41
-rw-r--r--target/arm/translate.c176
3 files changed, 46 insertions, 174 deletions
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index 9c28886507..8beb1db768 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -174,6 +174,9 @@ SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... \
SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VFMA_fp_3s 1111 001 0 0 . 0 . .... .... 1100 ... 1 .... @3same_fp
+VFMS_fp_3s 1111 001 0 0 . 1 . .... .... 1100 ... 1 .... @3same_fp
+
VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
VADD_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 5e9e53c5c3..3fe65a0b08 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -1207,6 +1207,47 @@ static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
return do_3same(s, a, gen_VRSQRTS_fp_3s);
}
+static void gen_VFMA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
+ TCGv_ptr fpstatus)
+{
+ gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
+}
+
+static bool trans_VFMA_fp_3s(DisasContext *s, arg_3same *a)
+{
+ if (!dc_isar_feature(aa32_simdfmac, s)) {
+ return false;
+ }
+
+ if (a->size != 0) {
+ /* TODO fp16 support */
+ return false;
+ }
+
+ return do_3same_fp(s, a, gen_VFMA_fp_3s, true);
+}
+
+static void gen_VFMS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
+ TCGv_ptr fpstatus)
+{
+ gen_helper_vfp_negs(vn, vn);
+ gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
+}
+
+static bool trans_VFMS_fp_3s(DisasContext *s, arg_3same *a)
+{
+ if (!dc_isar_feature(aa32_simdfmac, s)) {
+ return false;
+ }
+
+ if (a->size != 0) {
+ /* TODO fp16 support */
+ return false;
+ }
+
+ return do_3same_fp(s, a, gen_VFMS_fp_3s, true);
+}
+
static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
{
/* FP operations handled pairwise 32 bits at a time */
diff --git a/target/arm/translate.c b/target/arm/translate.c
index c1d4fab8e8..4c9bb8b5ac 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -3391,78 +3391,6 @@ static void gen_neon_narrow_op(int op, int u, int size,
}
}
-/* Symbolic constants for op fields for Neon 3-register same-length.
- * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
- * table A7-9.
- */
-#define NEON_3R_VHADD 0
-#define NEON_3R_VQADD 1
-#define NEON_3R_VRHADD 2
-#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
-#define NEON_3R_VHSUB 4
-#define NEON_3R_VQSUB 5
-#define NEON_3R_VCGT 6
-#define NEON_3R_VCGE 7
-#define NEON_3R_VSHL 8
-#define NEON_3R_VQSHL 9
-#define NEON_3R_VRSHL 10
-#define NEON_3R_VQRSHL 11
-#define NEON_3R_VMAX 12
-#define NEON_3R_VMIN 13
-#define NEON_3R_VABD 14
-#define NEON_3R_VABA 15
-#define NEON_3R_VADD_VSUB 16
-#define NEON_3R_VTST_VCEQ 17
-#define NEON_3R_VML 18 /* VMLA, VMLS */
-#define NEON_3R_VMUL 19
-#define NEON_3R_VPMAX 20
-#define NEON_3R_VPMIN 21
-#define NEON_3R_VQDMULH_VQRDMULH 22
-#define NEON_3R_VPADD_VQRDMLAH 23
-#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
-#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
-#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
-#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
-#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
-#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
-#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
-#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
-
-static const uint8_t neon_3r_sizes[] = {
- [NEON_3R_VHADD] = 0x7,
- [NEON_3R_VQADD] = 0xf,
- [NEON_3R_VRHADD] = 0x7,
- [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
- [NEON_3R_VHSUB] = 0x7,
- [NEON_3R_VQSUB] = 0xf,
- [NEON_3R_VCGT] = 0x7,
- [NEON_3R_VCGE] = 0x7,
- [NEON_3R_VSHL] = 0xf,
- [NEON_3R_VQSHL] = 0xf,
- [NEON_3R_VRSHL] = 0xf,
- [NEON_3R_VQRSHL] = 0xf,
- [NEON_3R_VMAX] = 0x7,
- [NEON_3R_VMIN] = 0x7,
- [NEON_3R_VABD] = 0x7,
- [NEON_3R_VABA] = 0x7,
- [NEON_3R_VADD_VSUB] = 0xf,
- [NEON_3R_VTST_VCEQ] = 0x7,
- [NEON_3R_VML] = 0x7,
- [NEON_3R_VMUL] = 0x7,
- [NEON_3R_VPMAX] = 0x7,
- [NEON_3R_VPMIN] = 0x7,
- [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
- [NEON_3R_VPADD_VQRDMLAH] = 0x7,
- [NEON_3R_SHA] = 0xf, /* size field encodes op type */
- [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
- [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
-};
-
/* Symbolic constants for op fields for Neon 2-register miscellaneous.
* The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
* table A7-13.
@@ -5383,108 +5311,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
rm_ofs = neon_reg_offset(rm, 0);
if ((insn & (1 << 23)) == 0) {
- /* Three register same length. */
- op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
- /* Catch invalid op and bad size combinations: UNDEF */
- if ((neon_3r_sizes[op] & (1 << size)) == 0) {
- return 1;
- }
- /* All insns of this form UNDEF for either this condition or the
- * superset of cases "Q==1"; we catch the latter later.
- */
- if (q && ((rd | rn | rm) & 1)) {
- return 1;
- }
- switch (op) {
- case NEON_3R_VFM_VQRDMLSH:
- if (!u) {
- /* VFM, VFMS */
- if (size == 1) {
- return 1;
- }
- break;
- }
- /* VQRDMLSH : handled by decodetree */
- return 1;
-
- case NEON_3R_VADD_VSUB:
- case NEON_3R_LOGIC:
- case NEON_3R_VMAX:
- case NEON_3R_VMIN:
- case NEON_3R_VTST_VCEQ:
- case NEON_3R_VCGT:
- case NEON_3R_VCGE:
- case NEON_3R_VQADD:
- case NEON_3R_VQSUB:
- case NEON_3R_VMUL:
- case NEON_3R_VML:
- case NEON_3R_VSHL:
- case NEON_3R_SHA:
- case NEON_3R_VHADD:
- case NEON_3R_VRHADD:
- case NEON_3R_VHSUB:
- case NEON_3R_VABD:
- case NEON_3R_VABA:
- case NEON_3R_VQSHL:
- case NEON_3R_VRSHL:
- case NEON_3R_VQRSHL:
- case NEON_3R_VPMAX:
- case NEON_3R_VPMIN:
- case NEON_3R_VPADD_VQRDMLAH:
- case NEON_3R_VQDMULH_VQRDMULH:
- case NEON_3R_FLOAT_ARITH:
- case NEON_3R_FLOAT_MULTIPLY:
- case NEON_3R_FLOAT_CMP:
- case NEON_3R_FLOAT_ACMP:
- case NEON_3R_FLOAT_MINMAX:
- case NEON_3R_FLOAT_MISC:
- /* Already handled by decodetree */
- return 1;
- }
-
- if (size == 3) {
- /* 64-bit element instructions: handled by decodetree */
- return 1;
- }
- switch (op) {
- case NEON_3R_VFM_VQRDMLSH:
- if (!dc_isar_feature(aa32_simdfmac, s)) {
- return 1;
- }
- break;
- default:
- break;
- }
-
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
-
- /* Elementwise. */
- tmp = neon_load_reg(rn, pass);
- tmp2 = neon_load_reg(rm, pass);
- switch (op) {
- case NEON_3R_VFM_VQRDMLSH:
- {
- /* VFMA, VFMS: fused multiply-add */
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- TCGv_i32 tmp3 = neon_load_reg(rd, pass);
- if (size) {
- /* VFMS */
- gen_helper_vfp_negs(tmp, tmp);
- }
- gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
- tcg_temp_free_i32(tmp3);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- default:
- abort();
- }
- tcg_temp_free_i32(tmp2);
-
- neon_store_reg(rd, pass, tmp);
-
- } /* for pass */
- /* End of 3 register same size operations. */
+ /* Three register same length: handled by decodetree */
+ return 1;
} else if (insn & (1 << 4)) {
if ((insn & 0x00380080) != 0) {
/* Two registers and shift. */