summaryrefslogtreecommitdiffstats
path: root/target/arm/translate-vfp.c.inc
diff options
context:
space:
mode:
authorPeter Maydell2020-08-28 20:33:15 +0200
committerPeter Maydell2020-09-01 12:19:32 +0200
commit9886fe2834b064a3cf0675a4659942ed547aed42 (patch)
tree3b3efa0b4fa2f302541c42cc7d123ec4a11e6d00 /target/arm/translate-vfp.c.inc
parenttarget/arm: Macroify trans functions for VFMA, VFMS, VFNMA, VFNMS (diff)
downloadqemu-9886fe2834b064a3cf0675a4659942ed547aed42.tar.gz
qemu-9886fe2834b064a3cf0675a4659942ed547aed42.tar.xz
qemu-9886fe2834b064a3cf0675a4659942ed547aed42.zip
target/arm: Implement VFP fp16 for fused-multiply-add
Implement VFP fp16 support for fused multiply-add insns VFNMA, VFNMS, VFMA, VFMS. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200828183354.27913-7-peter.maydell@linaro.org
Diffstat (limited to 'target/arm/translate-vfp.c.inc')
-rw-r--r--target/arm/translate-vfp.c.inc64
1 files changed, 64 insertions, 0 deletions
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
index 9937fa569e..b5eb9d66b3 100644
--- a/target/arm/translate-vfp.c.inc
+++ b/target/arm/translate-vfp.c.inc
@@ -1913,6 +1913,69 @@ static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
a->vd, a->vn, a->vm, false);
}
+static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
+{
+ /*
+ * VFNMA : fd = muladd(-fd, fn, fm)
+ * VFNMS : fd = muladd(-fd, -fn, fm)
+ * VFMA : fd = muladd( fd, fn, fm)
+ * VFMS : fd = muladd( fd, -fn, fm)
+ *
+ * These are fused multiply-add, and must be done as one floating
+ * point operation with no rounding between the multiplication and
+ * addition steps. NB that doing the negations here as separate
+ * steps is correct : an input NaN should come out with its sign
+ * bit flipped if it is a negated-input.
+ */
+ TCGv_ptr fpst;
+ TCGv_i32 vn, vm, vd;
+
+ /*
+ * Present in VFPv4 only, and only with the FP16 extension.
+ * Note that we can't rely on the SIMDFMAC check alone, because
+ * in a Neon-no-VFP core that ID register field will be non-zero.
+ */
+ if (!dc_isar_feature(aa32_fp16_arith, s) ||
+ !dc_isar_feature(aa32_simdfmac, s) ||
+ !dc_isar_feature(aa32_fpsp_v2, s)) {
+ return false;
+ }
+
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vn = tcg_temp_new_i32();
+ vm = tcg_temp_new_i32();
+ vd = tcg_temp_new_i32();
+
+ neon_load_reg32(vn, a->vn);
+ neon_load_reg32(vm, a->vm);
+ if (neg_n) {
+ /* VFNMS, VFMS */
+ gen_helper_vfp_negh(vn, vn);
+ }
+ neon_load_reg32(vd, a->vd);
+ if (neg_d) {
+ /* VFNMA, VFNMS */
+ gen_helper_vfp_negh(vd, vd);
+ }
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
+ neon_store_reg32(vd, a->vd);
+
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(vn);
+ tcg_temp_free_i32(vm);
+ tcg_temp_free_i32(vd);
+
+ return true;
+}
+
static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
{
/*
@@ -2062,6 +2125,7 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
+MAKE_VFM_TRANS_FNS(hp)
MAKE_VFM_TRANS_FNS(sp)
MAKE_VFM_TRANS_FNS(dp)