target/ppc: Moved VMLADDUHM to decodetree and use gvec

This patch moves VMLADDUHM to decodetree a creates a gvec implementation using mul_vec and add_vec. rept loop master patch 8 12500 0,01810500 0,00903100 (-50.1%) 25 4000 0,01739400 0,00747700 (-57.0%) 100 1000 0,01843600 0,00901400 (-51.1%) 500 200 0,02574600 0,01971000 (-23.4%) 2500 40 0,05921600 0,07121800 (+20.3%) 8000 12 0,15326700 0,21725200 (+41.7%) The significant difference in performance when REPT is low and LOOP is high I think is due to the fact that the new implementation has a higher translation time, as when using a helper only 5 TCGop are used but with the patch a total of 10 TCGop are needed (Power lacks a direct mul_vec equivalent so this instruction is implemented with the help of 5 others, vmuleu, vmulou, vmrgh, vmrgl and vpkum). Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20221019125040.48028-2-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
author: Lucas Mateus Castro (alqotel) 2022-10-19 14:50:29 +0200
committer: Daniel Henrique Barboza 2022-10-28 18:15:22 +0200
commit: dc46167a2225d3c0861cd4fcbc350e3e0e89bc61 (patch)
tree: 863ec5f60b2a8874a1d76fd5508040d810f0ab62 /target/ppc/translate
parent: target/ppc: move msgsync to decodetree (diff)
download: qemu-dc46167a2225d3c0861cd4fcbc350e3e0e89bc61.tar.gz
qemu-dc46167a2225d3c0861cd4fcbc350e3e0e89bc61.tar.xz
qemu-dc46167a2225d3c0861cd4fcbc350e3e0e89bc61.zip
1 files changed, 30 insertions, 18 deletions
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index e644ad3236..9f18c6d4f2 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -2523,24 +2523,6 @@ static void glue(gen_, name0##_##name1)(DisasContext *ctx)              \
 
 GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16)
 
-static void gen_vmladduhm(DisasContext *ctx)
-{
-    TCGv_ptr ra, rb, rc, rd;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    ra = gen_avr_ptr(rA(ctx->opcode));
-    rb = gen_avr_ptr(rB(ctx->opcode));
-    rc = gen_avr_ptr(rC(ctx->opcode));
-    rd = gen_avr_ptr(rD(ctx->opcode));
-    gen_helper_vmladduhm(rd, ra, rb, rc);
-    tcg_temp_free_ptr(ra);
-    tcg_temp_free_ptr(rb);
-    tcg_temp_free_ptr(rc);
-    tcg_temp_free_ptr(rd);
-}
-
 static bool do_va_helper(DisasContext *ctx, arg_VA *a,
     void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
 {
@@ -2569,6 +2551,36 @@ TRANS_FLAGS2(ALTIVEC_207, VSUBECUQ, do_va_helper, gen_helper_VSUBECUQ)
 TRANS_FLAGS(ALTIVEC, VPERM, do_va_helper, gen_helper_VPERM)
 TRANS_FLAGS2(ISA300, VPERMR, do_va_helper, gen_helper_VPERMR)
 
+static void gen_vmladduhm_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
+                              TCGv_vec c)
+{
+    tcg_gen_mul_vec(vece, t, a, b);
+    tcg_gen_add_vec(vece, t, t, c);
+}
+
+static bool trans_VMLADDUHM(DisasContext *ctx, arg_VA *a)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_add_vec, INDEX_op_mul_vec, 0
+    };
+
+    static const GVecGen4 op = {
+        .fno = gen_helper_VMLADDUHM,
+        .fniv = gen_vmladduhm_vec,
+        .opt_opc = vecop_list,
+        .vece = MO_16
+    };
+
+    REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
+    REQUIRE_VECTOR(ctx);
+
+    tcg_gen_gvec_4(avr_full_offset(a->vrt), avr_full_offset(a->vra),
+                   avr_full_offset(a->vrb), avr_full_offset(a->rc),
+                   16, 16, &op);
+
+    return true;
+}
+
 static bool trans_VSEL(DisasContext *ctx, arg_VA *a)
 {
     REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
author	Lucas Mateus Castro (alqotel)	2022-10-19 14:50:29 +0200
committer	Daniel Henrique Barboza	2022-10-28 18:15:22 +0200
commit	dc46167a2225d3c0861cd4fcbc350e3e0e89bc61 (patch)
tree	863ec5f60b2a8874a1d76fd5508040d810f0ab62 /target/ppc/translate
parent	target/ppc: move msgsync to decodetree (diff)
download	qemu-dc46167a2225d3c0861cd4fcbc350e3e0e89bc61.tar.gz qemu-dc46167a2225d3c0861cd4fcbc350e3e0e89bc61.tar.xz qemu-dc46167a2225d3c0861cd4fcbc350e3e0e89bc61.zip