summaryrefslogtreecommitdiffstats
path: root/tcg
diff options
context:
space:
mode:
authorRichard Henderson2021-12-18 20:04:34 +0100
committerRichard Henderson2022-03-04 19:50:41 +0100
commit786c7ef3b7668f232b3fe31a6867a7c4d114780e (patch)
treea4f7e4a44ce8aedd7441becaa2b02d7052772a20 /tcg
parenttcg/i386: Support avx512vbmi2 vector shift-double instructions (diff)
downloadqemu-786c7ef3b7668f232b3fe31a6867a7c4d114780e.tar.gz
qemu-786c7ef3b7668f232b3fe31a6867a7c4d114780e.tar.xz
qemu-786c7ef3b7668f232b3fe31a6867a7c4d114780e.zip
tcg/i386: Expand vector word rotate as avx512vbmi2 shift-double
While there are no specific 16-bit rotate instructions, there are double-word shifts, which can perform the same operation. Tested-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'tcg')
-rw-r--r--tcg/i386/tcg-target.c.inc18
1 files changed, 17 insertions, 1 deletions
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index a39f890a7d..19cf124456 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -3444,6 +3444,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_rotlv_vec:
case INDEX_op_rotrv_vec:
switch (vece) {
+ case MO_16:
+ return have_avx512vbmi2 ? -1 : 0;
case MO_32:
case MO_64:
return have_avx512vl ? 1 : have_avx2 ? -1 : 0;
@@ -3588,6 +3590,12 @@ static void expand_vec_rotli(TCGType type, unsigned vece,
return;
}
+ if (have_avx512vbmi2) {
+ vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece,
+ tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm);
+ return;
+ }
+
t = tcg_temp_new_vec(type);
tcg_gen_shli_vec(vece, t, v1, imm);
tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
@@ -3618,8 +3626,16 @@ static void expand_vec_rotls(TCGType type, unsigned vece,
static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
TCGv_vec v1, TCGv_vec sh, bool right)
{
- TCGv_vec t = tcg_temp_new_vec(type);
+ TCGv_vec t;
+ if (have_avx512vbmi2) {
+ vec_gen_4(right ? INDEX_op_x86_vpshrdv_vec : INDEX_op_x86_vpshldv_vec,
+ type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v1),
+ tcgv_vec_arg(v1), tcgv_vec_arg(sh));
+ return;
+ }
+
+ t = tcg_temp_new_vec(type);
tcg_gen_dupi_vec(vece, t, 8 << vece);
tcg_gen_sub_vec(vece, t, t, sh);
if (right) {