From 0aec21d8fa1be9a2b57b0e018b36ba566508d21c Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Thu, 29 Sep 2016 00:11:56 +0530 Subject: target-ppc: improve stxvw4x implementation Manipulate data and store 8bytes instead of 4bytes. Vector (32-bit elements): +----------+----------+----------+----------+ | 00112233 | 44556677 | 8899AABB | CCDDEEFF | +----------+----------+----------+----------+ Store results in following: Big-Endian Storage +-------------+-------------+-------------+-------------+ | 00 11 22 33 | 44 55 66 77 | 88 99 AA BB | CC DD EE FF | +-------------+-------------+-------------+-------------+ Little-Endian Storage +-------------+-------------+-------------+-------------+ | 33 22 11 00 | 77 66 55 44 | BB AA 99 88 | FF EE DD CC | +-------------+-------------+-------------+-------------+ Signed-off-by: Nikunj A Dadhania Reviewed-by: Richard Henderson Signed-off-by: David Gibson --- target-ppc/translate/vsx-impl.inc.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c index ec871b2430..a7e35d06e9 100644 --- a/target-ppc/translate/vsx-impl.inc.c +++ b/target-ppc/translate/vsx-impl.inc.c @@ -146,7 +146,8 @@ static void gen_stxvd2x(DisasContext *ctx) static void gen_stxvw4x(DisasContext *ctx) { - TCGv_i64 tmp; + TCGv_i64 xsh = cpu_vsrh(xS(ctx->opcode)); + TCGv_i64 xsl = cpu_vsrl(xS(ctx->opcode)); TCGv EA; if (unlikely(!ctx->vsx_enabled)) { gen_exception(ctx, POWERPC_EXCP_VSXU); @@ -155,21 +156,25 @@ static void gen_stxvw4x(DisasContext *ctx) gen_set_access_type(ctx, ACCESS_INT); EA = tcg_temp_new(); gen_addr_reg_index(ctx, EA); - tmp = tcg_temp_new_i64(); - - tcg_gen_shri_i64(tmp, cpu_vsrh(xS(ctx->opcode)), 32); - gen_qemu_st32_i64(ctx, tmp, EA); - tcg_gen_addi_tl(EA, EA, 4); - gen_qemu_st32_i64(ctx, cpu_vsrh(xS(ctx->opcode)), EA); - - tcg_gen_shri_i64(tmp, cpu_vsrl(xS(ctx->opcode)), 32); - tcg_gen_addi_tl(EA, EA, 4); - gen_qemu_st32_i64(ctx, tmp, EA); - tcg_gen_addi_tl(EA, EA, 4); - gen_qemu_st32_i64(ctx, cpu_vsrl(xS(ctx->opcode)), EA); + if (ctx->le_mode) { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_shri_i64(t0, xsh, 32); + tcg_gen_deposit_i64(t1, t0, xsh, 32, 32); + tcg_gen_qemu_st_i64(t1, EA, ctx->mem_idx, MO_LEQ); + tcg_gen_addi_tl(EA, EA, 8); + tcg_gen_shri_i64(t0, xsl, 32); + tcg_gen_deposit_i64(t1, t0, xsl, 32, 32); + tcg_gen_qemu_st_i64(t1, EA, ctx->mem_idx, MO_LEQ); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } else { + tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEQ); + tcg_gen_addi_tl(EA, EA, 8); + tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEQ); + } tcg_temp_free(EA); - tcg_temp_free_i64(tmp); } #define MV_VSRW(name, tcgop1, tcgop2, target, source) \ -- cgit v1.2.3-55-g7522