summaryrefslogtreecommitdiffstats
path: root/target/ppc/translate
diff options
context:
space:
mode:
authorLucas Mateus Castro (alqotel)2022-10-19 14:50:33 +0200
committerDaniel Henrique Barboza2022-10-28 18:15:22 +0200
commitd57fbd8fd9ea4f559cf4ed6c8fb6f064b73d6882 (patch)
treec6753cae39af4f88408d20bb5222779948d747dd /target/ppc/translate
parenttarget/ppc: Move VNEG[WD] to decodtree and use gvec (diff)
downloadqemu-d57fbd8fd9ea4f559cf4ed6c8fb6f064b73d6882.tar.gz
qemu-d57fbd8fd9ea4f559cf4ed6c8fb6f064b73d6882.tar.xz
qemu-d57fbd8fd9ea4f559cf4ed6c8fb6f064b73d6882.zip
target/ppc: Move VPRTYB[WDQ] to decodetree and use gvec
Moved VPRTYBW and VPRTYBD to use gvec and both of them and VPRTYBQ to decodetree. VPRTYBW and VPRTYBD now also use .fni4 and .fni8, respectively. vprtybw: rept loop master patch 8 12500 0,01198900 0,00703100 (-41.4%) 25 4000 0,01070100 0,00571400 (-46.6%) 100 1000 0,01123300 0,00678200 (-39.6%) 500 200 0,01601500 0,01535600 (-4.1%) 2500 40 0,03872900 0,05562100 (43.6%) 8000 12 0,10047000 0,16643000 (65.7%) vprtybd: rept loop master patch 8 12500 0,00757700 0,00788100 (4.0%) 25 4000 0,00652500 0,00669600 (2.6%) 100 1000 0,00714400 0,00825400 (15.5%) 500 200 0,01211000 0,01903700 (57.2%) 2500 40 0,03483800 0,07021200 (101.5%) 8000 12 0,09591800 0,21036200 (119.3%) vprtybq: rept loop master patch 8 12500 0,00675600 0,00667200 (-1.2%) 25 4000 0,00619400 0,00643200 (3.8%) 100 1000 0,00707100 0,00751100 (6.2%) 500 200 0,01199300 0,01342000 (11.9%) 2500 40 0,03490900 0,04092900 (17.2%) 8000 12 0,09588200 0,11465100 (19.6%) I wasn't expecting such a performance lost in both VPRTYBD and VPRTYBQ, I'm not sure if it's worth to move those instructions. Comparing the assembly of the helper with the TCGop they are pretty similar, so I'm not sure why vprtybd took so much more time. Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20221019125040.48028-6-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Diffstat (limited to 'target/ppc/translate')
-rw-r--r--target/ppc/translate/vmx-impl.c.inc68
-rw-r--r--target/ppc/translate/vmx-ops.c.inc3
2 files changed, 65 insertions, 6 deletions
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index b9a9e83ab3..cbb2a3ebe7 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -1659,9 +1659,71 @@ GEN_VXFORM_NOA_ENV(vrfim, 5, 11);
GEN_VXFORM_NOA_ENV(vrfin, 5, 8);
GEN_VXFORM_NOA_ENV(vrfip, 5, 10);
GEN_VXFORM_NOA_ENV(vrfiz, 5, 9);
-GEN_VXFORM_NOA(vprtybw, 1, 24);
-GEN_VXFORM_NOA(vprtybd, 1, 24);
-GEN_VXFORM_NOA(vprtybq, 1, 24);
+
+static void gen_vprtyb_vec(unsigned vece, TCGv_vec t, TCGv_vec b)
+{
+ int i;
+ TCGv_vec tmp = tcg_temp_new_vec_matching(b);
+ /* MO_32 is 2, so 2 iteractions for MO_32 and 3 for MO_64 */
+ for (i = 0; i < vece; i++) {
+ tcg_gen_shri_vec(vece, tmp, b, (4 << (vece - i)));
+ tcg_gen_xor_vec(vece, b, tmp, b);
+ }
+ tcg_gen_and_vec(vece, t, b, tcg_constant_vec_matching(t, vece, 1));
+ tcg_temp_free_vec(tmp);
+}
+
+/* vprtybw */
+static void gen_vprtyb_i32(TCGv_i32 t, TCGv_i32 b)
+{
+ tcg_gen_ctpop_i32(t, b);
+ tcg_gen_and_i32(t, t, tcg_constant_i32(1));
+}
+
+/* vprtybd */
+static void gen_vprtyb_i64(TCGv_i64 t, TCGv_i64 b)
+{
+ tcg_gen_ctpop_i64(t, b);
+ tcg_gen_and_i64(t, t, tcg_constant_i64(1));
+}
+
+static bool do_vx_vprtyb(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, 0
+ };
+
+ static const GVecGen2 op[] = {
+ {
+ .fniv = gen_vprtyb_vec,
+ .fni4 = gen_vprtyb_i32,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vprtyb_vec,
+ .fni8 = gen_vprtyb_i64,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ {
+ .fno = gen_helper_VPRTYBQ,
+ .vece = MO_128
+ },
+ };
+
+ REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+ REQUIRE_VECTOR(ctx);
+
+ tcg_gen_gvec_2(avr_full_offset(a->vrt), avr_full_offset(a->vrb),
+ 16, 16, &op[vece - MO_32]);
+
+ return true;
+}
+
+TRANS(VPRTYBW, do_vx_vprtyb, MO_32)
+TRANS(VPRTYBD, do_vx_vprtyb, MO_64)
+TRANS(VPRTYBQ, do_vx_vprtyb, MO_128)
static void gen_vsplt(DisasContext *ctx, int vece)
{
diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc
index 27908533dd..46a620a232 100644
--- a/target/ppc/translate/vmx-ops.c.inc
+++ b/target/ppc/translate/vmx-ops.c.inc
@@ -106,9 +106,6 @@ GEN_VXFORM_300(vsrv, 2, 28),
GEN_VXFORM_300(vslv, 2, 29),
GEN_VXFORM(vslo, 6, 16),
GEN_VXFORM(vsro, 6, 17),
-GEN_HANDLER_E_2(vprtybw, 0x4, 0x1, 0x18, 8, 0, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E_2(vprtybd, 0x4, 0x1, 0x18, 9, 0, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E_2(vprtybq, 0x4, 0x1, 0x18, 10, 0, PPC_NONE, PPC2_ISA300),
GEN_VXFORM(xpnd04_1, 0, 22),
GEN_VXFORM_300(bcdsr, 0, 23),