target/arm: Implement VFP fp16 for fused-multiply-add

Implement VFP fp16 support for fused multiply-add insns VFNMA, VFNMS, VFMA, VFMS. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200828183354.27913-7-peter.maydell@linaro.org
author: Peter Maydell 2020-08-28 20:33:15 +0200
committer: Peter Maydell 2020-09-01 12:19:32 +0200
commit: 9886fe2834b064a3cf0675a4659942ed547aed42 (patch)
tree: 3b3efa0b4fa2f302541c42cc7d123ec4a11e6d00 /target/arm/translate-vfp.c.inc
parent: target/arm: Macroify trans functions for VFMA, VFMS, VFNMA, VFNMS (diff)
download: qemu-9886fe2834b064a3cf0675a4659942ed547aed42.tar.gz
qemu-9886fe2834b064a3cf0675a4659942ed547aed42.tar.xz
qemu-9886fe2834b064a3cf0675a4659942ed547aed42.zip
1 files changed, 64 insertions, 0 deletions
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
index 9937fa569e..b5eb9d66b3 100644
--- a/target/arm/translate-vfp.c.inc
+++ b/target/arm/translate-vfp.c.inc
@@ -1913,6 +1913,69 @@ static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
                          a->vd, a->vn, a->vm, false);
 }
 
+static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
+{
+    /*
+     * VFNMA : fd = muladd(-fd,  fn, fm)
+     * VFNMS : fd = muladd(-fd, -fn, fm)
+     * VFMA  : fd = muladd( fd,  fn, fm)
+     * VFMS  : fd = muladd( fd, -fn, fm)
+     *
+     * These are fused multiply-add, and must be done as one floating
+     * point operation with no rounding between the multiplication and
+     * addition steps.  NB that doing the negations here as separate
+     * steps is correct : an input NaN should come out with its sign
+     * bit flipped if it is a negated-input.
+     */
+    TCGv_ptr fpst;
+    TCGv_i32 vn, vm, vd;
+
+    /*
+     * Present in VFPv4 only, and only with the FP16 extension.
+     * Note that we can't rely on the SIMDFMAC check alone, because
+     * in a Neon-no-VFP core that ID register field will be non-zero.
+     */
+    if (!dc_isar_feature(aa32_fp16_arith, s) ||
+        !dc_isar_feature(aa32_simdfmac, s) ||
+        !dc_isar_feature(aa32_fpsp_v2, s)) {
+        return false;
+    }
+
+    if (s->vec_len != 0 || s->vec_stride != 0) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vn = tcg_temp_new_i32();
+    vm = tcg_temp_new_i32();
+    vd = tcg_temp_new_i32();
+
+    neon_load_reg32(vn, a->vn);
+    neon_load_reg32(vm, a->vm);
+    if (neg_n) {
+        /* VFNMS, VFMS */
+        gen_helper_vfp_negh(vn, vn);
+    }
+    neon_load_reg32(vd, a->vd);
+    if (neg_d) {
+        /* VFNMA, VFNMS */
+        gen_helper_vfp_negh(vd, vd);
+    }
+    fpst = fpstatus_ptr(FPST_FPCR_F16);
+    gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
+    neon_store_reg32(vd, a->vd);
+
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(vn);
+    tcg_temp_free_i32(vm);
+    tcg_temp_free_i32(vd);
+
+    return true;
+}
+
 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
 {
     /*
@@ -2062,6 +2125,7 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
     MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
     MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
 
+MAKE_VFM_TRANS_FNS(hp)
 MAKE_VFM_TRANS_FNS(sp)
 MAKE_VFM_TRANS_FNS(dp)
author	Peter Maydell	2020-08-28 20:33:15 +0200
committer	Peter Maydell	2020-09-01 12:19:32 +0200
commit	9886fe2834b064a3cf0675a4659942ed547aed42 (patch)
tree	3b3efa0b4fa2f302541c42cc7d123ec4a11e6d00 /target/arm/translate-vfp.c.inc
parent	target/arm: Macroify trans functions for VFMA, VFMS, VFNMA, VFNMS (diff)
download	qemu-9886fe2834b064a3cf0675a4659942ed547aed42.tar.gz qemu-9886fe2834b064a3cf0675a4659942ed547aed42.tar.xz qemu-9886fe2834b064a3cf0675a4659942ed547aed42.zip