diff options
Diffstat (limited to 'tcg/tcg-op-gvec.c')
-rw-r--r-- | tcg/tcg-op-gvec.c | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index 049a55e700..3707c0effb 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -2694,6 +2694,74 @@ void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs, } } +void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) +{ + uint64_t mask = dup_const(MO_8, 0xff << c); + + tcg_gen_shli_i64(d, a, c); + tcg_gen_shri_i64(a, a, 8 - c); + tcg_gen_andi_i64(d, d, mask); + tcg_gen_andi_i64(a, a, ~mask); + tcg_gen_or_i64(d, d, a); +} + +void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) +{ + uint64_t mask = dup_const(MO_16, 0xffff << c); + + tcg_gen_shli_i64(d, a, c); + tcg_gen_shri_i64(a, a, 16 - c); + tcg_gen_andi_i64(d, d, mask); + tcg_gen_andi_i64(a, a, ~mask); + tcg_gen_or_i64(d, d, a); +} + +void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; + static const GVecGen2i g[4] = { + { .fni8 = tcg_gen_vec_rotl8i_i64, + .fniv = tcg_gen_rotli_vec, + .fno = gen_helper_gvec_rotl8i, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = tcg_gen_vec_rotl16i_i64, + .fniv = tcg_gen_rotli_vec, + .fno = gen_helper_gvec_rotl16i, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = tcg_gen_rotli_i32, + .fniv = tcg_gen_rotli_vec, + .fno = gen_helper_gvec_rotl32i, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = tcg_gen_rotli_i64, + .fniv = tcg_gen_rotli_vec, + .fno = gen_helper_gvec_rotl64i, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_debug_assert(shift >= 0 && shift < (8 << vece)); + if (shift == 0) { + tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz); + } else { + tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]); + } +} + +void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + tcg_debug_assert(vece <= MO_64); + tcg_debug_assert(shift >= 0 && shift < (8 << vece)); + tcg_gen_gvec_rotli(vece, dofs, aofs, -shift & ((8 << vece) - 1), + oprsz, maxsz); +} + /* * Specialized generation vector shifts by a non-constant scalar. */ @@ -2908,6 +2976,28 @@ void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs, do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g); } +void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen2sh g = { + .fni4 = tcg_gen_rotl_i32, + .fni8 = tcg_gen_rotl_i64, + .fniv_s = tcg_gen_rotls_vec, + .fniv_v = tcg_gen_rotlv_vec, + .fno = { + gen_helper_gvec_rotl8i, + gen_helper_gvec_rotl16i, + gen_helper_gvec_rotl32i, + gen_helper_gvec_rotl64i, + }, + .s_list = { INDEX_op_rotls_vec, 0 }, + .v_list = { INDEX_op_rotlv_vec, 0 }, + }; + + tcg_debug_assert(vece <= MO_64); + do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g); +} + /* * Expand D = A << (B % element bits) * @@ -3103,6 +3193,128 @@ void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs, tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); } +/* + * Similarly for rotates. + */ + +static void tcg_gen_rotlv_mod_vec(unsigned vece, TCGv_vec d, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, t, (8 << vece) - 1); + tcg_gen_and_vec(vece, t, t, b); + tcg_gen_rotlv_vec(vece, d, a, t); + tcg_temp_free_vec(t); +} + +static void tcg_gen_rotl_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_andi_i32(t, b, 31); + tcg_gen_rotl_i32(d, a, t); + tcg_temp_free_i32(t); +} + +static void tcg_gen_rotl_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_andi_i64(t, b, 63); + tcg_gen_rotl_i64(d, a, t); + tcg_temp_free_i64(t); +} + +void tcg_gen_gvec_rotlv(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_rotlv_vec, 0 }; + static const GVecGen3 g[4] = { + { .fniv = tcg_gen_rotlv_mod_vec, + .fno = gen_helper_gvec_rotl8v, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = tcg_gen_rotlv_mod_vec, + .fno = gen_helper_gvec_rotl16v, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = tcg_gen_rotl_mod_i32, + .fniv = tcg_gen_rotlv_mod_vec, + .fno = gen_helper_gvec_rotl32v, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = tcg_gen_rotl_mod_i64, + .fniv = tcg_gen_rotlv_mod_vec, + .fno = gen_helper_gvec_rotl64v, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); +} + +static void tcg_gen_rotrv_mod_vec(unsigned vece, TCGv_vec d, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, t, (8 << vece) - 1); + tcg_gen_and_vec(vece, t, t, b); + tcg_gen_rotrv_vec(vece, d, a, t); + tcg_temp_free_vec(t); +} + +static void tcg_gen_rotr_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_andi_i32(t, b, 31); + tcg_gen_rotr_i32(d, a, t); + tcg_temp_free_i32(t); +} + +static void tcg_gen_rotr_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_andi_i64(t, b, 63); + tcg_gen_rotr_i64(d, a, t); + tcg_temp_free_i64(t); +} + +void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_rotrv_vec, 0 }; + static const GVecGen3 g[4] = { + { .fniv = tcg_gen_rotrv_mod_vec, + .fno = gen_helper_gvec_rotr8v, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = tcg_gen_rotrv_mod_vec, + .fno = gen_helper_gvec_rotr16v, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = tcg_gen_rotr_mod_i32, + .fniv = tcg_gen_rotrv_mod_vec, + .fno = gen_helper_gvec_rotr32v, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = tcg_gen_rotr_mod_i64, + .fniv = tcg_gen_rotrv_mod_vec, + .fno = gen_helper_gvec_rotr64v, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); +} + /* Expand OPSZ bytes worth of three-operand operations using i32 elements. */ static void expand_cmp_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, TCGCond cond) |