summaryrefslogtreecommitdiffstats
path: root/tcg/i386
diff options
context:
space:
mode:
authorRichard Henderson2021-12-18 07:02:57 +0100
committerRichard Henderson2022-03-04 19:50:41 +0100
commit4e73f842e8b4d90776fb25a5b699b807902f1881 (patch)
tree9b0b35c0f0d92e05bc1b7a6c7b15ba9f6b9fdbb5 /tcg/i386
parenttcg/i386: Implement avx512 immediate sari shift (diff)
downloadqemu-4e73f842e8b4d90776fb25a5b699b807902f1881.tar.gz
qemu-4e73f842e8b4d90776fb25a5b699b807902f1881.tar.xz
qemu-4e73f842e8b4d90776fb25a5b699b807902f1881.zip
tcg/i386: Implement avx512 immediate rotate
AVX512VL has VPROLD and VPROLQ, layered onto the same opcode as PSHIFTD, but requires EVEX encoding and W1. Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'tcg/i386')
-rw-r--r--tcg/i386/tcg-target.c.inc15
-rw-r--r--tcg/i386/tcg-target.h2
2 files changed, 14 insertions, 3 deletions
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index de01fbf40c..3a9f6a3360 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -362,7 +362,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2)
#define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3)
#define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */
-#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */
+#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */
#define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */
#define OPC_PSLLW (0xf1 | P_EXT | P_DATA16)
#define OPC_PSLLD (0xf2 | P_EXT | P_DATA16)
@@ -3000,6 +3000,14 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
insn = shift_imm_insn[vece];
}
sub = 4;
+ goto gen_shift;
+ case INDEX_op_rotli_vec:
+ insn = OPC_PSHIFTD_Ib | P_EVEX; /* VPROL[DQ] */
+ if (vece == MO_64) {
+ insn |= P_VEXW;
+ }
+ sub = 1;
+ goto gen_shift;
gen_shift:
tcg_debug_assert(vece != MO_8);
if (type == TCG_TYPE_V256) {
@@ -3289,6 +3297,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
case INDEX_op_sari_vec:
+ case INDEX_op_rotli_vec:
case INDEX_op_x86_psrldq_vec:
return C_O1_I1(x, x);
@@ -3310,11 +3319,13 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_xor_vec:
case INDEX_op_andc_vec:
return 1;
- case INDEX_op_rotli_vec:
case INDEX_op_cmp_vec:
case INDEX_op_cmpsel_vec:
return -1;
+ case INDEX_op_rotli_vec:
+ return have_avx512vl && vece >= MO_32 ? 1 : -1;
+
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
/* We must expand the operation for MO_8. */
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 79af353860..23a8b2a8c8 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -195,7 +195,7 @@ extern bool have_movbe;
#define TCG_TARGET_HAS_not_vec 0
#define TCG_TARGET_HAS_neg_vec 0
#define TCG_TARGET_HAS_abs_vec 1
-#define TCG_TARGET_HAS_roti_vec 0
+#define TCG_TARGET_HAS_roti_vec have_avx512vl
#define TCG_TARGET_HAS_rots_vec 0
#define TCG_TARGET_HAS_rotv_vec 0
#define TCG_TARGET_HAS_shi_vec 1