summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Maydell2020-06-16 11:32:28 +0200
committerPeter Maydell2020-06-16 11:32:28 +0200
commit0aad761fb0aed40c99039eacac470cbd03d07019 (patch)
treed6a9e4f00b3504bb26a5ff4d0278ca75c90f0786
parenttarget/arm: Convert Neon 2-reg-scalar long multiplies to decodetree (diff)
downloadqemu-0aad761fb0aed40c99039eacac470cbd03d07019.tar.gz
qemu-0aad761fb0aed40c99039eacac470cbd03d07019.tar.xz
qemu-0aad761fb0aed40c99039eacac470cbd03d07019.zip
target/arm: Convert Neon VEXT to decodetree
Convert the Neon VEXT insn to decodetree. Rather than keeping the old implementation which used fixed temporaries cpu_V0 and cpu_V1 and did the extraction with by-hand shift and logic ops, we use the TCG extract2 insn. We don't need to special case 0 or 8 immediates any more as the optimizer is smart enough to throw away the dead code. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--target/arm/neon-dp.decode8
-rw-r--r--target/arm/translate-neon.inc.c76
-rw-r--r--target/arm/translate.c58
3 files changed, 85 insertions, 57 deletions
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index 9ff182f56d..26d6022016 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -413,7 +413,13 @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
# return false for size==3.
######################################################################
{
- # 0b11 subgroup will go here
+ [
+ ##################################################################
+ # Miscellaneous size=0b11 insns
+ ##################################################################
+ VEXT 1111 001 0 1 . 11 .... .... imm:4 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+ ]
# Subgroup for size != 0b11
[
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 48a0dee150..84bc2b239c 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -2812,3 +2812,79 @@ static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
}
+
+static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (a->imm > 7 && !a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (!a->q) {
+ /* Extract 64 bits from <Vm:Vn> */
+ TCGv_i64 left, right, dest;
+
+ left = tcg_temp_new_i64();
+ right = tcg_temp_new_i64();
+ dest = tcg_temp_new_i64();
+
+ neon_load_reg64(right, a->vn);
+ neon_load_reg64(left, a->vm);
+ tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
+ neon_store_reg64(dest, a->vd);
+
+ tcg_temp_free_i64(left);
+ tcg_temp_free_i64(right);
+ tcg_temp_free_i64(dest);
+ } else {
+ /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
+ TCGv_i64 left, middle, right, destleft, destright;
+
+ left = tcg_temp_new_i64();
+ middle = tcg_temp_new_i64();
+ right = tcg_temp_new_i64();
+ destleft = tcg_temp_new_i64();
+ destright = tcg_temp_new_i64();
+
+ if (a->imm < 8) {
+ neon_load_reg64(right, a->vn);
+ neon_load_reg64(middle, a->vn + 1);
+ tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
+ neon_load_reg64(left, a->vm);
+ tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
+ } else {
+ neon_load_reg64(right, a->vn + 1);
+ neon_load_reg64(middle, a->vm);
+ tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
+ neon_load_reg64(left, a->vm + 1);
+ tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
+ }
+
+ neon_store_reg64(destright, a->vd);
+ neon_store_reg64(destleft, a->vd + 1);
+
+ tcg_temp_free_i64(destright);
+ tcg_temp_free_i64(destleft);
+ tcg_temp_free_i64(right);
+ tcg_temp_free_i64(middle);
+ tcg_temp_free_i64(left);
+ }
+ return true;
+}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 4d39bbf035..a0822dba5e 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -5030,10 +5030,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
int pass;
int u;
int vec_size;
- uint32_t imm;
TCGv_i32 tmp, tmp2, tmp3, tmp5;
TCGv_ptr ptr1;
- TCGv_i64 tmp64;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return 1;
@@ -5076,60 +5074,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
return 1;
} else { /* size == 3 */
if (!u) {
- /* Extract. */
- imm = (insn >> 8) & 0xf;
-
- if (imm > 7 && !q)
- return 1;
-
- if (q && ((rd | rn | rm) & 1)) {
- return 1;
- }
-
- if (imm == 0) {
- neon_load_reg64(cpu_V0, rn);
- if (q) {
- neon_load_reg64(cpu_V1, rn + 1);
- }
- } else if (imm == 8) {
- neon_load_reg64(cpu_V0, rn + 1);
- if (q) {
- neon_load_reg64(cpu_V1, rm);
- }
- } else if (q) {
- tmp64 = tcg_temp_new_i64();
- if (imm < 8) {
- neon_load_reg64(cpu_V0, rn);
- neon_load_reg64(tmp64, rn + 1);
- } else {
- neon_load_reg64(cpu_V0, rn + 1);
- neon_load_reg64(tmp64, rm);
- }
- tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
- tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
- if (imm < 8) {
- neon_load_reg64(cpu_V1, rm);
- } else {
- neon_load_reg64(cpu_V1, rm + 1);
- imm -= 8;
- }
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
- tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
- tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
- tcg_temp_free_i64(tmp64);
- } else {
- /* BUGFIX */
- neon_load_reg64(cpu_V0, rn);
- tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
- neon_load_reg64(cpu_V1, rm);
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
- }
- neon_store_reg64(cpu_V0, rd);
- if (q) {
- neon_store_reg64(cpu_V1, rd + 1);
- }
+ /* Extract: handled by decodetree */
+ return 1;
} else if ((insn & (1 << 11)) == 0) {
/* Two register misc. */
op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);