diff options
Diffstat (limited to 'tcg/i386/tcg-target.inc.c')
-rw-r--r-- | tcg/i386/tcg-target.inc.c | 163 |
1 files changed, 144 insertions, 19 deletions
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index d5ed9f1ffd..aafd01cb49 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -358,7 +358,6 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_MOVBE_MyGy (0xf1 | P_EXT38) #define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16) #define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16) -#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2) #define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16) #define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16) #define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3) @@ -370,6 +369,9 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_MOVSLQ (0x63 | P_REXW) #define OPC_MOVZBL (0xb6 | P_EXT) #define OPC_MOVZWL (0xb7 | P_EXT) +#define OPC_PABSB (0x1c | P_EXT38 | P_DATA16) +#define OPC_PABSW (0x1d | P_EXT38 | P_DATA16) +#define OPC_PABSD (0x1e | P_EXT38 | P_DATA16) #define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16) #define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16) #define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16) @@ -421,6 +423,14 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */ #define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */ #define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */ +#define OPC_PSLLW (0xf1 | P_EXT | P_DATA16) +#define OPC_PSLLD (0xf2 | P_EXT | P_DATA16) +#define OPC_PSLLQ (0xf3 | P_EXT | P_DATA16) +#define OPC_PSRAW (0xe1 | P_EXT | P_DATA16) +#define OPC_PSRAD (0xe2 | P_EXT | P_DATA16) +#define OPC_PSRLW (0xd1 | P_EXT | P_DATA16) +#define OPC_PSRLD (0xd2 | P_EXT | P_DATA16) +#define OPC_PSRLQ (0xd3 | P_EXT | P_DATA16) #define OPC_PSUBB (0xf8 | P_EXT | P_DATA16) #define OPC_PSUBW (0xf9 | P_EXT | P_DATA16) #define OPC_PSUBD (0xfa | P_EXT | P_DATA16) @@ -458,12 +468,21 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_UD2 (0x0b | P_EXT) #define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16) #define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16) +#define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16) +#define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16) +#define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16) +#define OPC_VBROADCASTSD (0x19 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16) #define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_REXW) #define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL) +#define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16) +#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_REXW) +#define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16) +#define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16) +#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_REXW) #define OPC_VZEROUPPER (0x77 | P_EXT) #define OPC_XCHG_ax_r32 (0x90) @@ -809,12 +828,12 @@ static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src); } -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { int rexw = 0; if (arg == ret) { - return; + return true; } switch (type) { case TCG_TYPE_I64: @@ -852,18 +871,20 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) default: g_assert_not_reached(); } + return true; } -static void tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, +static const int avx2_dup_insn[4] = { + OPC_VPBROADCASTB, OPC_VPBROADCASTW, + OPC_VPBROADCASTD, OPC_VPBROADCASTQ, +}; + +static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg r, TCGReg a) { if (have_avx2) { - static const int dup_insn[4] = { - OPC_VPBROADCASTB, OPC_VPBROADCASTW, - OPC_VPBROADCASTD, OPC_VPBROADCASTQ, - }; int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); - tcg_out_vex_modrm(s, dup_insn[vece] + vex_l, r, 0, a); + tcg_out_vex_modrm(s, avx2_dup_insn[vece] + vex_l, r, 0, a); } else { switch (vece) { case MO_8: @@ -887,6 +908,39 @@ static void tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, g_assert_not_reached(); } } + return true; +} + +static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg r, TCGReg base, intptr_t offset) +{ + if (have_avx2) { + int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); + tcg_out_vex_modrm_offset(s, avx2_dup_insn[vece] + vex_l, + r, 0, base, offset); + } else { + switch (vece) { + case MO_64: + tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSD, r, 0, base, offset); + break; + case MO_32: + tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset); + break; + case MO_16: + tcg_out_vex_modrm_offset(s, OPC_VPINSRW, r, r, base, offset); + tcg_out8(s, 0); /* imm8 */ + tcg_out_dup_vec(s, type, vece, r, r); + break; + case MO_8: + tcg_out_vex_modrm_offset(s, OPC_VPINSRB, r, r, base, offset); + tcg_out8(s, 0); /* imm8 */ + tcg_out_dup_vec(s, type, vece, r, r); + break; + default: + g_assert_not_reached(); + } + } + return true; } static void tcg_out_dupi_vec(TCGContext *s, TCGType type, @@ -909,16 +963,16 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, } else if (have_avx2) { tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret); } else { - tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret); + tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD, ret); } new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4); - } else if (have_avx2) { - tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret); - new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0); } else { - tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy, ret); + if (have_avx2) { + tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD + vex_l, ret); + } else { + tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret); + } new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0); - tcg_out_dup_vec(s, type, MO_32, ret, ret); } } @@ -2601,10 +2655,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: - case INDEX_op_mov_vec: case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ case INDEX_op_movi_i64: - case INDEX_op_dupi_vec: case INDEX_op_call: /* Always emitted via tcg_out_call. */ default: tcg_abort(); @@ -2671,6 +2723,31 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, static int const umax_insn[4] = { OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2 }; + static int const shlv_insn[4] = { + /* TODO: AVX512 adds support for MO_16. */ + OPC_UD2, OPC_UD2, OPC_VPSLLVD, OPC_VPSLLVQ + }; + static int const shrv_insn[4] = { + /* TODO: AVX512 adds support for MO_16. */ + OPC_UD2, OPC_UD2, OPC_VPSRLVD, OPC_VPSRLVQ + }; + static int const sarv_insn[4] = { + /* TODO: AVX512 adds support for MO_16, MO_64. */ + OPC_UD2, OPC_UD2, OPC_VPSRAVD, OPC_UD2 + }; + static int const shls_insn[4] = { + OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ + }; + static int const shrs_insn[4] = { + OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ + }; + static int const sars_insn[4] = { + OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_UD2 + }; + static int const abs_insn[4] = { + /* TODO: AVX512 adds support for MO_64. */ + OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2 + }; TCGType type = vecl + TCG_TYPE_V64; int insn, sub; @@ -2723,6 +2800,24 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_umax_vec: insn = umax_insn[vece]; goto gen_simd; + case INDEX_op_shlv_vec: + insn = shlv_insn[vece]; + goto gen_simd; + case INDEX_op_shrv_vec: + insn = shrv_insn[vece]; + goto gen_simd; + case INDEX_op_sarv_vec: + insn = sarv_insn[vece]; + goto gen_simd; + case INDEX_op_shls_vec: + insn = shls_insn[vece]; + goto gen_simd; + case INDEX_op_shrs_vec: + insn = shrs_insn[vece]; + goto gen_simd; + case INDEX_op_sars_vec: + insn = sars_insn[vece]; + goto gen_simd; case INDEX_op_x86_punpckl_vec: insn = punpckl_insn[vece]; goto gen_simd; @@ -2741,6 +2836,11 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, insn = OPC_PUNPCKLDQ; goto gen_simd; #endif + case INDEX_op_abs_vec: + insn = abs_insn[vece]; + a2 = a1; + a1 = 0; + goto gen_simd; gen_simd: tcg_debug_assert(insn != OPC_UD2); if (type == TCG_TYPE_V256) { @@ -2793,8 +2893,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_st_vec: tcg_out_st(s, type, a0, a1, a2); break; - case INDEX_op_dup_vec: - tcg_out_dup_vec(s, type, vece, a0, a1); + case INDEX_op_dupm_vec: + tcg_out_dupm_vec(s, type, vece, a0, a1, a2); break; case INDEX_op_x86_shufps_vec: @@ -2837,6 +2937,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, tcg_out8(s, a2); break; + case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ + case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */ + case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ default: g_assert_not_reached(); } @@ -3079,6 +3182,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_ld_vec: case INDEX_op_st_vec: + case INDEX_op_dupm_vec: return &x_r; case INDEX_op_add_vec: @@ -3096,6 +3200,12 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_umin_vec: case INDEX_op_smax_vec: case INDEX_op_umax_vec: + case INDEX_op_shlv_vec: + case INDEX_op_shrv_vec: + case INDEX_op_sarv_vec: + case INDEX_op_shls_vec: + case INDEX_op_shrs_vec: + case INDEX_op_sars_vec: case INDEX_op_cmp_vec: case INDEX_op_x86_shufps_vec: case INDEX_op_x86_blend_vec: @@ -3108,6 +3218,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_dup2_vec: #endif return &x_x_x; + case INDEX_op_abs_vec: case INDEX_op_dup_vec: case INDEX_op_shli_vec: case INDEX_op_shri_vec: @@ -3153,6 +3264,18 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) } return 1; + case INDEX_op_shls_vec: + case INDEX_op_shrs_vec: + return vece >= MO_16; + case INDEX_op_sars_vec: + return vece >= MO_16 && vece <= MO_32; + + case INDEX_op_shlv_vec: + case INDEX_op_shrv_vec: + return have_avx2 && vece >= MO_32; + case INDEX_op_sarv_vec: + return have_avx2 && vece == MO_32; + case INDEX_op_mul_vec: if (vece == MO_8) { /* We can expand the operation for MO_8. */ @@ -3173,6 +3296,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_umin_vec: case INDEX_op_umax_vec: return vece <= MO_32 ? 1 : -1; + case INDEX_op_abs_vec: + return vece <= MO_32; default: return 0; |