diff options
author | Peter Maydell | 2020-08-28 20:33:15 +0200 |
---|---|---|
committer | Peter Maydell | 2020-09-01 12:19:32 +0200 |
commit | 9886fe2834b064a3cf0675a4659942ed547aed42 (patch) | |
tree | 3b3efa0b4fa2f302541c42cc7d123ec4a11e6d00 /target/arm/translate-vfp.c.inc | |
parent | target/arm: Macroify trans functions for VFMA, VFMS, VFNMA, VFNMS (diff) | |
download | qemu-9886fe2834b064a3cf0675a4659942ed547aed42.tar.gz qemu-9886fe2834b064a3cf0675a4659942ed547aed42.tar.xz qemu-9886fe2834b064a3cf0675a4659942ed547aed42.zip |
target/arm: Implement VFP fp16 for fused-multiply-add
Implement VFP fp16 support for fused multiply-add insns
VFNMA, VFNMS, VFMA, VFMS.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200828183354.27913-7-peter.maydell@linaro.org
Diffstat (limited to 'target/arm/translate-vfp.c.inc')
-rw-r--r-- | target/arm/translate-vfp.c.inc | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc index 9937fa569e..b5eb9d66b3 100644 --- a/target/arm/translate-vfp.c.inc +++ b/target/arm/translate-vfp.c.inc @@ -1913,6 +1913,69 @@ static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a) a->vd, a->vn, a->vm, false); } +static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) +{ + /* + * VFNMA : fd = muladd(-fd, fn, fm) + * VFNMS : fd = muladd(-fd, -fn, fm) + * VFMA : fd = muladd( fd, fn, fm) + * VFMS : fd = muladd( fd, -fn, fm) + * + * These are fused multiply-add, and must be done as one floating + * point operation with no rounding between the multiplication and + * addition steps. NB that doing the negations here as separate + * steps is correct : an input NaN should come out with its sign + * bit flipped if it is a negated-input. + */ + TCGv_ptr fpst; + TCGv_i32 vn, vm, vd; + + /* + * Present in VFPv4 only, and only with the FP16 extension. + * Note that we can't rely on the SIMDFMAC check alone, because + * in a Neon-no-VFP core that ID register field will be non-zero. + */ + if (!dc_isar_feature(aa32_fp16_arith, s) || + !dc_isar_feature(aa32_simdfmac, s) || + !dc_isar_feature(aa32_fpsp_v2, s)) { + return false; + } + + if (s->vec_len != 0 || s->vec_stride != 0) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + vn = tcg_temp_new_i32(); + vm = tcg_temp_new_i32(); + vd = tcg_temp_new_i32(); + + neon_load_reg32(vn, a->vn); + neon_load_reg32(vm, a->vm); + if (neg_n) { + /* VFNMS, VFMS */ + gen_helper_vfp_negh(vn, vn); + } + neon_load_reg32(vd, a->vd); + if (neg_d) { + /* VFNMA, VFNMS */ + gen_helper_vfp_negh(vd, vd); + } + fpst = fpstatus_ptr(FPST_FPCR_F16); + gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst); + neon_store_reg32(vd, a->vd); + + tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(vn); + tcg_temp_free_i32(vm); + tcg_temp_free_i32(vd); + + return true; +} + static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) { /* @@ -2062,6 +2125,7 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \ MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true) +MAKE_VFM_TRANS_FNS(hp) MAKE_VFM_TRANS_FNS(sp) MAKE_VFM_TRANS_FNS(dp) |