diff options
Diffstat (limited to 'target/arm')
-rw-r--r-- | target/arm/arm-powerctl.c | 10 | ||||
-rw-r--r-- | target/arm/cpu.c | 241 | ||||
-rw-r--r-- | target/arm/cpu.h | 329 | ||||
-rw-r--r-- | target/arm/cpu64.c | 224 | ||||
-rw-r--r-- | target/arm/helper-a64.c | 253 | ||||
-rw-r--r-- | target/arm/helper-sve.h | 389 | ||||
-rw-r--r-- | target/arm/helper.c | 743 | ||||
-rw-r--r-- | target/arm/helper.h | 2 | ||||
-rw-r--r-- | target/arm/internals.h | 89 | ||||
-rw-r--r-- | target/arm/kvm.c | 62 | ||||
-rw-r--r-- | target/arm/kvm32.c | 13 | ||||
-rw-r--r-- | target/arm/kvm64.c | 15 | ||||
-rw-r--r-- | target/arm/kvm_arm.h | 28 | ||||
-rw-r--r-- | target/arm/machine.c | 25 | ||||
-rw-r--r-- | target/arm/op_helper.c | 30 | ||||
-rw-r--r-- | target/arm/sve_helper.c | 1969 | ||||
-rw-r--r-- | target/arm/translate-a64.c | 759 | ||||
-rw-r--r-- | target/arm/translate-sve.c | 690 | ||||
-rw-r--r-- | target/arm/translate.c | 1643 | ||||
-rw-r--r-- | target/arm/translate.h | 22 |
20 files changed, 4869 insertions, 2667 deletions
diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c index ce55eeb682..2b856930fb 100644 --- a/target/arm/arm-powerctl.c +++ b/target/arm/arm-powerctl.c @@ -103,6 +103,16 @@ static void arm_set_cpu_on_async_work(CPUState *target_cpu_state, } else { /* Processor is not in secure mode */ target_cpu->env.cp15.scr_el3 |= SCR_NS; + + /* + * If QEMU is providing the equivalent of EL3 firmware, then we need + * to make sure a CPU targeting EL2 comes out of reset with a + * functional HVC insn. + */ + if (arm_feature(&target_cpu->env, ARM_FEATURE_EL3) + && info->target_el == 2) { + target_cpu->env.cp15.scr_el3 |= SCR_HCE; + } } /* We check if the started CPU is now at the correct level */ diff --git a/target/arm/cpu.c b/target/arm/cpu.c index b5e61cc177..8f16e96b6c 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -144,9 +144,9 @@ static void arm_cpu_reset(CPUState *s) g_hash_table_foreach(cpu->cp_regs, cp_reg_check_reset, cpu); env->vfp.xregs[ARM_VFP_FPSID] = cpu->reset_fpsid; - env->vfp.xregs[ARM_VFP_MVFR0] = cpu->mvfr0; - env->vfp.xregs[ARM_VFP_MVFR1] = cpu->mvfr1; - env->vfp.xregs[ARM_VFP_MVFR2] = cpu->mvfr2; + env->vfp.xregs[ARM_VFP_MVFR0] = cpu->isar.mvfr0; + env->vfp.xregs[ARM_VFP_MVFR1] = cpu->isar.mvfr1; + env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.mvfr2; cpu->power_state = cpu->start_powered_off ? PSCI_OFF : PSCI_ON; s->halted = cpu->start_powered_off; @@ -814,7 +814,11 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) /* Some features automatically imply others: */ if (arm_feature(env, ARM_FEATURE_V8)) { - set_feature(env, ARM_FEATURE_V7VE); + if (arm_feature(env, ARM_FEATURE_M)) { + set_feature(env, ARM_FEATURE_V7); + } else { + set_feature(env, ARM_FEATURE_V7VE); + } } if (arm_feature(env, ARM_FEATURE_V7VE)) { /* v7 Virtualization Extensions. In real hardware this implies @@ -825,7 +829,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * Presence of EL2 itself is ARM_FEATURE_EL2, and of the * Security Extensions is ARM_FEATURE_EL3. */ - set_feature(env, ARM_FEATURE_ARM_DIV); + assert(cpu_isar_feature(arm_div, cpu)); set_feature(env, ARM_FEATURE_LPAE); set_feature(env, ARM_FEATURE_V7); } @@ -850,20 +854,14 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (arm_feature(env, ARM_FEATURE_V6)) { set_feature(env, ARM_FEATURE_V5); - set_feature(env, ARM_FEATURE_JAZELLE); if (!arm_feature(env, ARM_FEATURE_M)) { + assert(cpu_isar_feature(jazelle, cpu)); set_feature(env, ARM_FEATURE_AUXCR); } } if (arm_feature(env, ARM_FEATURE_V5)) { set_feature(env, ARM_FEATURE_V4T); } - if (arm_feature(env, ARM_FEATURE_M)) { - set_feature(env, ARM_FEATURE_THUMB_DIV); - } - if (arm_feature(env, ARM_FEATURE_ARM_DIV)) { - set_feature(env, ARM_FEATURE_THUMB_DIV); - } if (arm_feature(env, ARM_FEATURE_VFP4)) { set_feature(env, ARM_FEATURE_VFP3); set_feature(env, ARM_FEATURE_VFP_FP16); @@ -938,7 +936,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * registers as well. These are id_pfr1[7:4] and id_aa64pfr0[15:12]. */ cpu->id_pfr1 &= ~0xf0; - cpu->id_aa64pfr0 &= ~0xf000; + cpu->isar.id_aa64pfr0 &= ~0xf000; } if (!cpu->has_el2) { @@ -955,7 +953,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * registers if we don't have EL2. These are id_pfr1[15:12] and * id_aa64pfr0_el1[11:8]. */ - cpu->id_aa64pfr0 &= ~0xf00; + cpu->isar.id_aa64pfr0 &= ~0xf00; cpu->id_pfr1 &= ~0xf000; } @@ -1084,11 +1082,16 @@ static void arm926_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_VFP); set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN); - set_feature(&cpu->env, ARM_FEATURE_JAZELLE); cpu->midr = 0x41069265; cpu->reset_fpsid = 0x41011090; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00090078; + + /* + * ARMv5 does not have the ID_ISAR registers, but we can still + * set the field to indicate Jazelle support within QEMU. + */ + cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); } static void arm946_initfn(Object *obj) @@ -1114,12 +1117,18 @@ static void arm1026_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_AUXCR); set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN); - set_feature(&cpu->env, ARM_FEATURE_JAZELLE); cpu->midr = 0x4106a262; cpu->reset_fpsid = 0x410110a0; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00090078; cpu->reset_auxcr = 1; + + /* + * ARMv5 does not have the ID_ISAR registers, but we can still + * set the field to indicate Jazelle support within QEMU. + */ + cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); + { /* The 1026 had an IFAR at c6,c0,0,1 rather than the ARMv6 c6,c0,0,2 */ ARMCPRegInfo ifar = { @@ -1151,8 +1160,8 @@ static void arm1136_r2_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); cpu->midr = 0x4107b362; cpu->reset_fpsid = 0x410120b4; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; @@ -1162,11 +1171,11 @@ static void arm1136_r2_initfn(Object *obj) cpu->id_mmfr0 = 0x01130003; cpu->id_mmfr1 = 0x10030302; cpu->id_mmfr2 = 0x01222110; - cpu->id_isar0 = 0x00140011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11231111; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x141; + cpu->isar.id_isar0 = 0x00140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231111; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; cpu->reset_auxcr = 7; } @@ -1183,8 +1192,8 @@ static void arm1136_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); cpu->midr = 0x4117b363; cpu->reset_fpsid = 0x410120b4; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; @@ -1194,11 +1203,11 @@ static void arm1136_initfn(Object *obj) cpu->id_mmfr0 = 0x01130003; cpu->id_mmfr1 = 0x10030302; cpu->id_mmfr2 = 0x01222110; - cpu->id_isar0 = 0x00140011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11231111; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x141; + cpu->isar.id_isar0 = 0x00140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231111; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; cpu->reset_auxcr = 7; } @@ -1216,8 +1225,8 @@ static void arm1176_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_EL3); cpu->midr = 0x410fb767; cpu->reset_fpsid = 0x410120b5; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; cpu->id_pfr0 = 0x111; @@ -1227,11 +1236,11 @@ static void arm1176_initfn(Object *obj) cpu->id_mmfr0 = 0x01130003; cpu->id_mmfr1 = 0x10030302; cpu->id_mmfr2 = 0x01222100; - cpu->id_isar0 = 0x0140011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11231121; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x01141; + cpu->isar.id_isar0 = 0x0140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231121; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x01141; cpu->reset_auxcr = 7; } @@ -1247,8 +1256,8 @@ static void arm11mpcore_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); cpu->midr = 0x410fb022; cpu->reset_fpsid = 0x410120b4; - cpu->mvfr0 = 0x11111111; - cpu->mvfr1 = 0x00000000; + cpu->isar.mvfr0 = 0x11111111; + cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ cpu->id_pfr0 = 0x111; cpu->id_pfr1 = 0x1; @@ -1257,11 +1266,11 @@ static void arm11mpcore_initfn(Object *obj) cpu->id_mmfr0 = 0x01100103; cpu->id_mmfr1 = 0x10020302; cpu->id_mmfr2 = 0x01222000; - cpu->id_isar0 = 0x00100011; - cpu->id_isar1 = 0x12002111; - cpu->id_isar2 = 0x11221011; - cpu->id_isar3 = 0x01102131; - cpu->id_isar4 = 0x141; + cpu->isar.id_isar0 = 0x00100011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11221011; + cpu->isar.id_isar3 = 0x01102131; + cpu->isar.id_isar4 = 0x141; cpu->reset_auxcr = 1; } @@ -1290,13 +1299,13 @@ static void cortex_m3_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x00000000; cpu->id_mmfr3 = 0x00000000; - cpu->id_isar0 = 0x01141110; - cpu->id_isar1 = 0x02111000; - cpu->id_isar2 = 0x21112231; - cpu->id_isar3 = 0x01111110; - cpu->id_isar4 = 0x01310102; - cpu->id_isar5 = 0x00000000; - cpu->id_isar6 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; + cpu->isar.id_isar3 = 0x01111110; + cpu->isar.id_isar4 = 0x01310102; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; } static void cortex_m4_initfn(Object *obj) @@ -1317,13 +1326,13 @@ static void cortex_m4_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x00000000; cpu->id_mmfr3 = 0x00000000; - cpu->id_isar0 = 0x01141110; - cpu->id_isar1 = 0x02111000; - cpu->id_isar2 = 0x21112231; - cpu->id_isar3 = 0x01111110; - cpu->id_isar4 = 0x01310102; - cpu->id_isar5 = 0x00000000; - cpu->id_isar6 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; + cpu->isar.id_isar3 = 0x01111110; + cpu->isar.id_isar4 = 0x01310102; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; } static void cortex_m33_initfn(Object *obj) @@ -1346,13 +1355,13 @@ static void cortex_m33_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x01000000; cpu->id_mmfr3 = 0x00000000; - cpu->id_isar0 = 0x01101110; - cpu->id_isar1 = 0x02212000; - cpu->id_isar2 = 0x20232232; - cpu->id_isar3 = 0x01111131; - cpu->id_isar4 = 0x01310132; - cpu->id_isar5 = 0x00000000; - cpu->id_isar6 = 0x00000000; + cpu->isar.id_isar0 = 0x01101110; + cpu->isar.id_isar1 = 0x02212000; + cpu->isar.id_isar2 = 0x20232232; + cpu->isar.id_isar3 = 0x01111131; + cpu->isar.id_isar4 = 0x01310132; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; cpu->clidr = 0x00000000; cpu->ctr = 0x8000c000; } @@ -1384,8 +1393,6 @@ static void cortex_r5_initfn(Object *obj) ARMCPU *cpu = ARM_CPU(obj); set_feature(&cpu->env, ARM_FEATURE_V7); - set_feature(&cpu->env, ARM_FEATURE_THUMB_DIV); - set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); set_feature(&cpu->env, ARM_FEATURE_V7MP); set_feature(&cpu->env, ARM_FEATURE_PMSA); cpu->midr = 0x411fc153; /* r1p3 */ @@ -1397,13 +1404,13 @@ static void cortex_r5_initfn(Object *obj) cpu->id_mmfr1 = 0x00000000; cpu->id_mmfr2 = 0x01200000; cpu->id_mmfr3 = 0x0211; - cpu->id_isar0 = 0x2101111; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232141; - cpu->id_isar3 = 0x01112131; - cpu->id_isar4 = 0x0010142; - cpu->id_isar5 = 0x0; - cpu->id_isar6 = 0x0; + cpu->isar.id_isar0 = 0x02101111; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232141; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x0010142; + cpu->isar.id_isar5 = 0x0; + cpu->isar.id_isar6 = 0x0; cpu->mp_is_up = true; cpu->pmsav7_dregion = 16; define_arm_cp_regs(cpu, cortexr5_cp_reginfo); @@ -1438,8 +1445,8 @@ static void cortex_a8_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_EL3); cpu->midr = 0x410fc080; cpu->reset_fpsid = 0x410330c0; - cpu->mvfr0 = 0x11110222; - cpu->mvfr1 = 0x00011111; + cpu->isar.mvfr0 = 0x11110222; + cpu->isar.mvfr1 = 0x00011111; cpu->ctr = 0x82048004; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x1031; @@ -1450,11 +1457,11 @@ static void cortex_a8_initfn(Object *obj) cpu->id_mmfr1 = 0x20000000; cpu->id_mmfr2 = 0x01202000; cpu->id_mmfr3 = 0x11; - cpu->id_isar0 = 0x00101111; - cpu->id_isar1 = 0x12112111; - cpu->id_isar2 = 0x21232031; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x00111142; + cpu->isar.id_isar0 = 0x00101111; + cpu->isar.id_isar1 = 0x12112111; + cpu->isar.id_isar2 = 0x21232031; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x00111142; cpu->dbgdidr = 0x15141000; cpu->clidr = (1 << 27) | (2 << 24) | 3; cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */ @@ -1512,8 +1519,8 @@ static void cortex_a9_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_CBAR); cpu->midr = 0x410fc090; cpu->reset_fpsid = 0x41033090; - cpu->mvfr0 = 0x11110222; - cpu->mvfr1 = 0x01111111; + cpu->isar.mvfr0 = 0x11110222; + cpu->isar.mvfr1 = 0x01111111; cpu->ctr = 0x80038003; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x1031; @@ -1524,11 +1531,11 @@ static void cortex_a9_initfn(Object *obj) cpu->id_mmfr1 = 0x20000000; cpu->id_mmfr2 = 0x01230000; cpu->id_mmfr3 = 0x00002111; - cpu->id_isar0 = 0x00101111; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232041; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x00111142; + cpu->isar.id_isar0 = 0x00101111; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x00111142; cpu->dbgdidr = 0x35141000; cpu->clidr = (1 << 27) | (1 << 24) | 3; cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */ @@ -1573,8 +1580,8 @@ static void cortex_a7_initfn(Object *obj) cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A7; cpu->midr = 0x410fc075; cpu->reset_fpsid = 0x41023075; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x11111111; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x11111111; cpu->ctr = 0x84448003; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x00001131; @@ -1587,11 +1594,14 @@ static void cortex_a7_initfn(Object *obj) cpu->id_mmfr1 = 0x40000000; cpu->id_mmfr2 = 0x01240000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x01101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232041; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x10011142; + /* a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but + * table 4-41 gives 0x02101110, which includes the arm div insns. + */ + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x10011142; cpu->dbgdidr = 0x3515f005; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ @@ -1616,8 +1626,8 @@ static void cortex_a15_initfn(Object *obj) cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15; cpu->midr = 0x412fc0f1; cpu->reset_fpsid = 0x410430f0; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x11111111; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x11111111; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50078; cpu->id_pfr0 = 0x00001131; @@ -1630,11 +1640,11 @@ static void cortex_a15_initfn(Object *obj) cpu->id_mmfr1 = 0x20000000; cpu->id_mmfr2 = 0x01240000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232041; - cpu->id_isar3 = 0x11112131; - cpu->id_isar4 = 0x10011142; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x10011142; cpu->dbgdidr = 0x3515f021; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ @@ -1827,17 +1837,26 @@ static void arm_max_initfn(Object *obj) cortex_a15_initfn(obj); #ifdef CONFIG_USER_ONLY /* We don't set these in system emulation mode for the moment, - * since we don't correctly set the ID registers to advertise them, + * since we don't correctly set (all of) the ID registers to + * advertise them. */ set_feature(&cpu->env, ARM_FEATURE_V8); - set_feature(&cpu->env, ARM_FEATURE_V8_AES); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); - set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); - set_feature(&cpu->env, ARM_FEATURE_CRC); - set_feature(&cpu->env, ARM_FEATURE_V8_RDM); - set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD); - set_feature(&cpu->env, ARM_FEATURE_V8_FCMA); + { + uint32_t t; + + t = cpu->isar.id_isar5; + t = FIELD_DP32(t, ID_ISAR5, AES, 2); + t = FIELD_DP32(t, ID_ISAR5, SHA1, 1); + t = FIELD_DP32(t, ID_ISAR5, SHA2, 1); + t = FIELD_DP32(t, ID_ISAR5, CRC32, 1); + t = FIELD_DP32(t, ID_ISAR5, RDM, 1); + t = FIELD_DP32(t, ID_ISAR5, VCMA, 1); + cpu->isar.id_isar5 = t; + + t = cpu->isar.id_isar6; + t = FIELD_DP32(t, ID_ISAR6, DP, 1); + cpu->isar.id_isar6 = t; + } #endif } } diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 65c0fa0a65..8e6779936e 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -56,6 +56,7 @@ #define EXCP_SEMIHOST 16 /* semihosting call */ #define EXCP_NOCP 17 /* v7M NOCP UsageFault */ #define EXCP_INVSTATE 18 /* v7M INVSTATE UsageFault */ +#define EXCP_STKOF 19 /* v8M STKOF UsageFault */ /* NB: add new EXCP_ defines to the array in arm_log_exception() too */ #define ARMV7M_EXCP_RESET 1 @@ -530,6 +531,13 @@ typedef struct CPUARMState { */ } exception; + /* Information associated with an SError */ + struct { + uint8_t pending; + uint8_t has_esr; + uint64_t esr; + } serror; + /* Thumb-2 EE state. */ uint32_t teecr; uint32_t teehbr; @@ -668,6 +676,8 @@ typedef enum ARMPSCIState { PSCI_ON_PENDING = 2 } ARMPSCIState; +typedef struct ARMISARegisters ARMISARegisters; + /** * ARMCPU: * @env: #CPUARMState @@ -787,13 +797,28 @@ struct ARMCPU { * ARMv7AR ARM Architecture Reference Manual. A reset_ prefix * is used for reset values of non-constant registers; no reset_ * prefix means a constant register. + * Some of these registers are split out into a substructure that + * is shared with the translators to control the ISA. */ + struct ARMISARegisters { + uint32_t id_isar0; + uint32_t id_isar1; + uint32_t id_isar2; + uint32_t id_isar3; + uint32_t id_isar4; + uint32_t id_isar5; + uint32_t id_isar6; + uint32_t mvfr0; + uint32_t mvfr1; + uint32_t mvfr2; + uint64_t id_aa64isar0; + uint64_t id_aa64isar1; + uint64_t id_aa64pfr0; + uint64_t id_aa64pfr1; + } isar; uint32_t midr; uint32_t revidr; uint32_t reset_fpsid; - uint32_t mvfr0; - uint32_t mvfr1; - uint32_t mvfr2; uint32_t ctr; uint32_t reset_sctlr; uint32_t id_pfr0; @@ -807,21 +832,10 @@ struct ARMCPU { uint32_t id_mmfr2; uint32_t id_mmfr3; uint32_t id_mmfr4; - uint32_t id_isar0; - uint32_t id_isar1; - uint32_t id_isar2; - uint32_t id_isar3; - uint32_t id_isar4; - uint32_t id_isar5; - uint32_t id_isar6; - uint64_t id_aa64pfr0; - uint64_t id_aa64pfr1; uint64_t id_aa64dfr0; uint64_t id_aa64dfr1; uint64_t id_aa64afr0; uint64_t id_aa64afr1; - uint64_t id_aa64isar0; - uint64_t id_aa64isar1; uint64_t id_aa64mmfr0; uint64_t id_aa64mmfr1; uint32_t dbgdidr; @@ -910,12 +924,23 @@ int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg); int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq); +void aarch64_sve_change_el(CPUARMState *env, int old_el, + int new_el, bool el0_a64); +#else +static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { } +static inline void aarch64_sve_change_el(CPUARMState *env, int o, + int n, bool a) +{ } #endif target_ulong do_arm_semihosting(CPUARMState *env); void aarch64_sync_32_to_64(CPUARMState *env); void aarch64_sync_64_to_32(CPUARMState *env); +int fp_exception_el(CPUARMState *env, int cur_el); +int sve_exception_el(CPUARMState *env, int cur_el); +uint32_t sve_zcr_len_for_el(CPUARMState *env, int el); + static inline bool is_a64(CPUARMState *env) { return env->aarch64; @@ -1336,8 +1361,10 @@ FIELD(V7M_CCR, UNALIGN_TRP, 3, 1) FIELD(V7M_CCR, DIV_0_TRP, 4, 1) FIELD(V7M_CCR, BFHFNMIGN, 8, 1) FIELD(V7M_CCR, STKALIGN, 9, 1) +FIELD(V7M_CCR, STKOFHFNMIGN, 10, 1) FIELD(V7M_CCR, DC, 16, 1) FIELD(V7M_CCR, IC, 17, 1) +FIELD(V7M_CCR, BP, 18, 1) /* V7M SCR bits */ FIELD(V7M_SCR, SLEEPONEXIT, 1, 1) @@ -1378,6 +1405,7 @@ FIELD(V7M_CFSR, UNDEFINSTR, 16 + 0, 1) FIELD(V7M_CFSR, INVSTATE, 16 + 1, 1) FIELD(V7M_CFSR, INVPC, 16 + 2, 1) FIELD(V7M_CFSR, NOCP, 16 + 3, 1) +FIELD(V7M_CFSR, STKOF, 16 + 4, 1) FIELD(V7M_CFSR, UNALIGNED, 16 + 8, 1) FIELD(V7M_CFSR, DIVBYZERO, 16 + 9, 1) @@ -1428,6 +1456,104 @@ FIELD(V7M_CSSELR, LEVEL, 1, 3) */ FIELD(V7M_CSSELR, INDEX, 0, 4) +/* + * System register ID fields. + */ +FIELD(ID_ISAR0, SWAP, 0, 4) +FIELD(ID_ISAR0, BITCOUNT, 4, 4) +FIELD(ID_ISAR0, BITFIELD, 8, 4) +FIELD(ID_ISAR0, CMPBRANCH, 12, 4) +FIELD(ID_ISAR0, COPROC, 16, 4) +FIELD(ID_ISAR0, DEBUG, 20, 4) +FIELD(ID_ISAR0, DIVIDE, 24, 4) + +FIELD(ID_ISAR1, ENDIAN, 0, 4) +FIELD(ID_ISAR1, EXCEPT, 4, 4) +FIELD(ID_ISAR1, EXCEPT_AR, 8, 4) +FIELD(ID_ISAR1, EXTEND, 12, 4) +FIELD(ID_ISAR1, IFTHEN, 16, 4) +FIELD(ID_ISAR1, IMMEDIATE, 20, 4) +FIELD(ID_ISAR1, INTERWORK, 24, 4) +FIELD(ID_ISAR1, JAZELLE, 28, 4) + +FIELD(ID_ISAR2, LOADSTORE, 0, 4) +FIELD(ID_ISAR2, MEMHINT, 4, 4) +FIELD(ID_ISAR2, MULTIACCESSINT, 8, 4) +FIELD(ID_ISAR2, MULT, 12, 4) +FIELD(ID_ISAR2, MULTS, 16, 4) +FIELD(ID_ISAR2, MULTU, 20, 4) +FIELD(ID_ISAR2, PSR_AR, 24, 4) +FIELD(ID_ISAR2, REVERSAL, 28, 4) + +FIELD(ID_ISAR3, SATURATE, 0, 4) +FIELD(ID_ISAR3, SIMD, 4, 4) +FIELD(ID_ISAR3, SVC, 8, 4) +FIELD(ID_ISAR3, SYNCHPRIM, 12, 4) +FIELD(ID_ISAR3, TABBRANCH, 16, 4) +FIELD(ID_ISAR3, T32COPY, 20, 4) +FIELD(ID_ISAR3, TRUENOP, 24, 4) +FIELD(ID_ISAR3, T32EE, 28, 4) + +FIELD(ID_ISAR4, UNPRIV, 0, 4) +FIELD(ID_ISAR4, WITHSHIFTS, 4, 4) +FIELD(ID_ISAR4, WRITEBACK, 8, 4) +FIELD(ID_ISAR4, SMC, 12, 4) +FIELD(ID_ISAR4, BARRIER, 16, 4) +FIELD(ID_ISAR4, SYNCHPRIM_FRAC, 20, 4) +FIELD(ID_ISAR4, PSR_M, 24, 4) +FIELD(ID_ISAR4, SWP_FRAC, 28, 4) + +FIELD(ID_ISAR5, SEVL, 0, 4) +FIELD(ID_ISAR5, AES, 4, 4) +FIELD(ID_ISAR5, SHA1, 8, 4) +FIELD(ID_ISAR5, SHA2, 12, 4) +FIELD(ID_ISAR5, CRC32, 16, 4) +FIELD(ID_ISAR5, RDM, 24, 4) +FIELD(ID_ISAR5, VCMA, 28, 4) + +FIELD(ID_ISAR6, JSCVT, 0, 4) +FIELD(ID_ISAR6, DP, 4, 4) +FIELD(ID_ISAR6, FHM, 8, 4) +FIELD(ID_ISAR6, SB, 12, 4) +FIELD(ID_ISAR6, SPECRES, 16, 4) + +FIELD(ID_AA64ISAR0, AES, 4, 4) +FIELD(ID_AA64ISAR0, SHA1, 8, 4) +FIELD(ID_AA64ISAR0, SHA2, 12, 4) +FIELD(ID_AA64ISAR0, CRC32, 16, 4) +FIELD(ID_AA64ISAR0, ATOMIC, 20, 4) +FIELD(ID_AA64ISAR0, RDM, 28, 4) +FIELD(ID_AA64ISAR0, SHA3, 32, 4) +FIELD(ID_AA64ISAR0, SM3, 36, 4) +FIELD(ID_AA64ISAR0, SM4, 40, 4) +FIELD(ID_AA64ISAR0, DP, 44, 4) +FIELD(ID_AA64ISAR0, FHM, 48, 4) +FIELD(ID_AA64ISAR0, TS, 52, 4) +FIELD(ID_AA64ISAR0, TLB, 56, 4) +FIELD(ID_AA64ISAR0, RNDR, 60, 4) + +FIELD(ID_AA64ISAR1, DPB, 0, 4) +FIELD(ID_AA64ISAR1, APA, 4, 4) +FIELD(ID_AA64ISAR1, API, 8, 4) +FIELD(ID_AA64ISAR1, JSCVT, 12, 4) +FIELD(ID_AA64ISAR1, FCMA, 16, 4) +FIELD(ID_AA64ISAR1, LRCPC, 20, 4) +FIELD(ID_AA64ISAR1, GPA, 24, 4) +FIELD(ID_AA64ISAR1, GPI, 28, 4) +FIELD(ID_AA64ISAR1, FRINTTS, 32, 4) +FIELD(ID_AA64ISAR1, SB, 36, 4) +FIELD(ID_AA64ISAR1, SPECRES, 40, 4) + +FIELD(ID_AA64PFR0, EL0, 0, 4) +FIELD(ID_AA64PFR0, EL1, 4, 4) +FIELD(ID_AA64PFR0, EL2, 8, 4) +FIELD(ID_AA64PFR0, EL3, 12, 4) +FIELD(ID_AA64PFR0, FP, 16, 4) +FIELD(ID_AA64PFR0, ADVSIMD, 20, 4) +FIELD(ID_AA64PFR0, GIC, 24, 4) +FIELD(ID_AA64PFR0, RAS, 28, 4) +FIELD(ID_AA64PFR0, SVE, 32, 4) + QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK); /* If adding a feature bit which corresponds to a Linux ELF @@ -1447,7 +1573,6 @@ enum arm_features { ARM_FEATURE_VFP3, ARM_FEATURE_VFP_FP16, ARM_FEATURE_NEON, - ARM_FEATURE_THUMB_DIV, /* divide supported in Thumb encoding */ ARM_FEATURE_M, /* Microcontroller profile. */ ARM_FEATURE_OMAPCP, /* OMAP specific CP15 ops handling. */ ARM_FEATURE_THUMB2EE, @@ -1457,7 +1582,6 @@ enum arm_features { ARM_FEATURE_V5, ARM_FEATURE_STRONGARM, ARM_FEATURE_VAPA, /* cp15 VA to PA lookups */ - ARM_FEATURE_ARM_DIV, /* divide supported in ARM encoding */ ARM_FEATURE_VFP4, /* VFPv4 (implies that NEON is v2) */ ARM_FEATURE_GENERIC_TIMER, ARM_FEATURE_MVFR, /* Media and VFP Feature Registers 0 and 1 */ @@ -1470,30 +1594,15 @@ enum arm_features { ARM_FEATURE_LPAE, /* has Large Physical Address Extension */ ARM_FEATURE_V8, ARM_FEATURE_AARCH64, /* supports 64 bit mode */ - ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */ ARM_FEATURE_CBAR, /* has cp15 CBAR */ ARM_FEATURE_CRC, /* ARMv8 CRC instructions */ ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */ ARM_FEATURE_EL2, /* has EL2 Virtualization support */ ARM_FEATURE_EL3, /* has EL3 Secure monitor support */ - ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */ ARM_FEATURE_THUMB_DSP, /* DSP insns supported in the Thumb encodings */ ARM_FEATURE_PMU, /* has PMU support */ ARM_FEATURE_VBAR, /* has cp15 VBAR */ ARM_FEATURE_M_SECURITY, /* M profile Security Extension */ - ARM_FEATURE_JAZELLE, /* has (trivial) Jazelle implementation */ - ARM_FEATURE_SVE, /* has Scalable Vector Extension */ - ARM_FEATURE_V8_SHA512, /* implements SHA512 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */ - ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */ - ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */ - ARM_FEATURE_V8_DOTPROD, /* implements v8.2 simd dot product */ - ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */ - ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */ ARM_FEATURE_M_MAIN, /* M profile Main Extension */ }; @@ -2842,6 +2951,9 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env) /* For M profile only, Handler (ie not Thread) mode */ #define ARM_TBFLAG_HANDLER_SHIFT 21 #define ARM_TBFLAG_HANDLER_MASK (1 << ARM_TBFLAG_HANDLER_SHIFT) +/* For M profile only, whether we should generate stack-limit checks */ +#define ARM_TBFLAG_STACKCHECK_SHIFT 22 +#define ARM_TBFLAG_STACKCHECK_MASK (1 << ARM_TBFLAG_STACKCHECK_SHIFT) /* Bit usage when in AArch64 state */ #define ARM_TBFLAG_TBI0_SHIFT 0 /* TBI0 for EL0/1 or TBI for EL2/3 */ @@ -2884,6 +2996,8 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env) (((F) & ARM_TBFLAG_BE_DATA_MASK) >> ARM_TBFLAG_BE_DATA_SHIFT) #define ARM_TBFLAG_HANDLER(F) \ (((F) & ARM_TBFLAG_HANDLER_MASK) >> ARM_TBFLAG_HANDLER_SHIFT) +#define ARM_TBFLAG_STACKCHECK(F) \ + (((F) & ARM_TBFLAG_STACKCHECK_MASK) >> ARM_TBFLAG_STACKCHECK_SHIFT) #define ARM_TBFLAG_TBI0(F) \ (((F) & ARM_TBFLAG_TBI0_MASK) >> ARM_TBFLAG_TBI0_SHIFT) #define ARM_TBFLAG_TBI1(F) \ @@ -3040,4 +3154,157 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno) /* Shared between translate-sve.c and sve_helper.c. */ extern const uint64_t pred_esz_masks[4]; +/* + * 32-bit feature tests via id registers. + */ +static inline bool isar_feature_thumb_div(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0; +} + +static inline bool isar_feature_arm_div(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1; +} + +static inline bool isar_feature_jazelle(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0; +} + +static inline bool isar_feature_aa32_aes(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) != 0; +} + +static inline bool isar_feature_aa32_pmull(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) > 1; +} + +static inline bool isar_feature_aa32_sha1(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA1) != 0; +} + +static inline bool isar_feature_aa32_sha2(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA2) != 0; +} + +static inline bool isar_feature_aa32_crc32(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, CRC32) != 0; +} + +static inline bool isar_feature_aa32_rdm(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, RDM) != 0; +} + +static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0; +} + +static inline bool isar_feature_aa32_dp(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0; +} + +static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) +{ + /* + * This is a placeholder for use by VCMA until the rest of + * the ARMv8.2-FP16 extension is implemented for aa32 mode. + * At which point we can properly set and check MVFR1.FPHP. + */ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; +} + +/* + * 64-bit feature tests via id registers. + */ +static inline bool isar_feature_aa64_aes(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) != 0; +} + +static inline bool isar_feature_aa64_pmull(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) > 1; +} + +static inline bool isar_feature_aa64_sha1(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA1) != 0; +} + +static inline bool isar_feature_aa64_sha256(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) != 0; +} + +static inline bool isar_feature_aa64_sha512(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) > 1; +} + +static inline bool isar_feature_aa64_crc32(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, CRC32) != 0; +} + +static inline bool isar_feature_aa64_atomics(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, ATOMIC) != 0; +} + +static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RDM) != 0; +} + +static inline bool isar_feature_aa64_sha3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA3) != 0; +} + +static inline bool isar_feature_aa64_sm3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM3) != 0; +} + +static inline bool isar_feature_aa64_sm4(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM4) != 0; +} + +static inline bool isar_feature_aa64_dp(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0; +} + +static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0; +} + +static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id) +{ + /* We always set the AdvSIMD and FP fields identically wrt FP16. */ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; +} + +static inline bool isar_feature_aa64_sve(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0; +} + +/* + * Forward to the above feature tests given an ARMCPU pointer. + */ +#define cpu_isar_feature(name, cpu) \ + ({ ARMCPU *cpu_ = (cpu); isar_feature_##name(&cpu_->isar); }) + #endif diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 800bff780e..873f059bf2 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -51,7 +51,7 @@ static uint64_t a57_a53_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) } #endif -static const ARMCPRegInfo cortex_a57_a53_cp_reginfo[] = { +static const ARMCPRegInfo cortex_a72_a57_a53_cp_reginfo[] = { #ifndef CONFIG_USER_ONLY { .name = "L2CTLR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 2, @@ -109,11 +109,6 @@ static void aarch64_a57_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); - set_feature(&cpu->env, ARM_FEATURE_V8_AES); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); - set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); - set_feature(&cpu->env, ARM_FEATURE_CRC); set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); set_feature(&cpu->env, ARM_FEATURE_PMU); @@ -121,9 +116,9 @@ static void aarch64_a57_initfn(Object *obj) cpu->midr = 0x411fd070; cpu->revidr = 0x00000000; cpu->reset_fpsid = 0x41034070; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x12111111; - cpu->mvfr2 = 0x00000043; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; cpu->id_pfr0 = 0x00000131; @@ -134,18 +129,18 @@ static void aarch64_a57_initfn(Object *obj) cpu->id_mmfr1 = 0x40000000; cpu->id_mmfr2 = 0x01260000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232042; - cpu->id_isar3 = 0x01112131; - cpu->id_isar4 = 0x00011142; - cpu->id_isar5 = 0x00011121; - cpu->id_isar6 = 0; - cpu->id_aa64pfr0 = 0x00002222; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_isar6 = 0; + cpu->isar.id_aa64pfr0 = 0x00002222; cpu->id_aa64dfr0 = 0x10305106; cpu->pmceid0 = 0x00000000; cpu->pmceid1 = 0x00000000; - cpu->id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64isar0 = 0x00011120; cpu->id_aa64mmfr0 = 0x00001124; cpu->dbgdidr = 0x3516d000; cpu->clidr = 0x0a200023; @@ -156,7 +151,7 @@ static void aarch64_a57_initfn(Object *obj) cpu->gic_num_lrs = 4; cpu->gic_vpribits = 5; cpu->gic_vprebits = 5; - define_arm_cp_regs(cpu, cortex_a57_a53_cp_reginfo); + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); } static void aarch64_a53_initfn(Object *obj) @@ -170,11 +165,6 @@ static void aarch64_a53_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); - set_feature(&cpu->env, ARM_FEATURE_V8_AES); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); - set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); - set_feature(&cpu->env, ARM_FEATURE_CRC); set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); set_feature(&cpu->env, ARM_FEATURE_PMU); @@ -182,9 +172,9 @@ static void aarch64_a53_initfn(Object *obj) cpu->midr = 0x410fd034; cpu->revidr = 0x00000000; cpu->reset_fpsid = 0x41034070; - cpu->mvfr0 = 0x10110222; - cpu->mvfr1 = 0x12111111; - cpu->mvfr2 = 0x00000043; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x84448004; /* L1Ip = VIPT */ cpu->reset_sctlr = 0x00c50838; cpu->id_pfr0 = 0x00000131; @@ -195,16 +185,16 @@ static void aarch64_a53_initfn(Object *obj) cpu->id_mmfr1 = 0x40000000; cpu->id_mmfr2 = 0x01260000; cpu->id_mmfr3 = 0x02102211; - cpu->id_isar0 = 0x02101110; - cpu->id_isar1 = 0x13112111; - cpu->id_isar2 = 0x21232042; - cpu->id_isar3 = 0x01112131; - cpu->id_isar4 = 0x00011142; - cpu->id_isar5 = 0x00011121; - cpu->id_isar6 = 0; - cpu->id_aa64pfr0 = 0x00002222; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_isar6 = 0; + cpu->isar.id_aa64pfr0 = 0x00002222; cpu->id_aa64dfr0 = 0x10305106; - cpu->id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64isar0 = 0x00011120; cpu->id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ cpu->dbgdidr = 0x3516d000; cpu->clidr = 0x0a200023; @@ -215,7 +205,61 @@ static void aarch64_a53_initfn(Object *obj) cpu->gic_num_lrs = 4; cpu->gic_vpribits = 5; cpu->gic_vprebits = 5; - define_arm_cp_regs(cpu, cortex_a57_a53_cp_reginfo); + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); +} + +static void aarch64_a72_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a72"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_VFP4); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + cpu->midr = 0x410fd083; + cpu->revidr = 0x00000000; + cpu->reset_fpsid = 0x41034080; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; + cpu->ctr = 0x8444c004; + cpu->reset_sctlr = 0x00c50838; + cpu->id_pfr0 = 0x00000131; + cpu->id_pfr1 = 0x00011011; + cpu->id_dfr0 = 0x03010066; + cpu->id_afr0 = 0x00000000; + cpu->id_mmfr0 = 0x10201105; + cpu->id_mmfr1 = 0x40000000; + cpu->id_mmfr2 = 0x01260000; + cpu->id_mmfr3 = 0x02102211; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_aa64pfr0 = 0x00002222; + cpu->id_aa64dfr0 = 0x10305106; + cpu->pmceid0 = 0x00000000; + cpu->pmceid1 = 0x00000000; + cpu->isar.id_aa64isar0 = 0x00011120; + cpu->id_aa64mmfr0 = 0x00001124; + cpu->dbgdidr = 0x3516d000; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ + cpu->ccsidr[2] = 0x707fe07a; /* 1MB L2 cache */ + cpu->dcz_blocksize = 4; /* 64 bytes */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); } static void cpu_max_get_sve_vq(Object *obj, Visitor *v, const char *name, @@ -253,24 +297,55 @@ static void aarch64_max_initfn(Object *obj) if (kvm_enabled()) { kvm_arm_set_cpu_features_from_host(cpu); } else { + uint64_t t; + uint32_t u; aarch64_a57_initfn(obj); -#ifdef CONFIG_USER_ONLY - /* We don't set these in system emulation mode for the moment, - * since we don't correctly set the ID registers to advertise them, - * and in some cases they're only available in AArch64 and not AArch32, - * whereas the architecture requires them to be present in both if - * present in either. + + t = cpu->isar.id_aa64isar0; + t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2); /* AES + PMULL */ + t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 2); /* SHA512 */ + t = FIELD_DP64(t, ID_AA64ISAR0, CRC32, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, ATOMIC, 2); + t = FIELD_DP64(t, ID_AA64ISAR0, RDM, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SHA3, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, DP, 1); + cpu->isar.id_aa64isar0 = t; + + t = cpu->isar.id_aa64isar1; + t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1); + cpu->isar.id_aa64isar1 = t; + + t = cpu->isar.id_aa64pfr0; + t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1); + t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); + t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); + cpu->isar.id_aa64pfr0 = t; + + /* Replicate the same data to the 32-bit id registers. */ + u = cpu->isar.id_isar5; + u = FIELD_DP32(u, ID_ISAR5, AES, 2); /* AES + PMULL */ + u = FIELD_DP32(u, ID_ISAR5, SHA1, 1); + u = FIELD_DP32(u, ID_ISAR5, SHA2, 1); + u = FIELD_DP32(u, ID_ISAR5, CRC32, 1); + u = FIELD_DP32(u, ID_ISAR5, RDM, 1); + u = FIELD_DP32(u, ID_ISAR5, VCMA, 1); + cpu->isar.id_isar5 = u; + + u = cpu->isar.id_isar6; + u = FIELD_DP32(u, ID_ISAR6, DP, 1); + cpu->isar.id_isar6 = u; + + /* + * FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet, + * so do not set MVFR1.FPHP. Strictly speaking this is not legal, + * but it is also not legal to enable SVE without support for FP16, + * and enabling SVE in system mode is more useful in the short term. */ - set_feature(&cpu->env, ARM_FEATURE_V8_SHA512); - set_feature(&cpu->env, ARM_FEATURE_V8_SHA3); - set_feature(&cpu->env, ARM_FEATURE_V8_SM3); - set_feature(&cpu->env, ARM_FEATURE_V8_SM4); - set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS); - set_feature(&cpu->env, ARM_FEATURE_V8_RDM); - set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD); - set_feature(&cpu->env, ARM_FEATURE_V8_FP16); - set_feature(&cpu->env, ARM_FEATURE_V8_FCMA); - set_feature(&cpu->env, ARM_FEATURE_SVE); + +#ifdef CONFIG_USER_ONLY /* For usermode -cpu max we can use a larger and more efficient DCZ * blocksize since we don't have to follow what the hardware does. */ @@ -293,6 +368,7 @@ typedef struct ARMCPUInfo { static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, { .name = "max", .initfn = aarch64_max_initfn }, { .name = NULL } }; @@ -410,45 +486,3 @@ static void aarch64_cpu_register_types(void) } type_init(aarch64_cpu_register_types) - -/* The manual says that when SVE is enabled and VQ is widened the - * implementation is allowed to zero the previously inaccessible - * portion of the registers. The corollary to that is that when - * SVE is enabled and VQ is narrowed we are also allowed to zero - * the now inaccessible portion of the registers. - * - * The intent of this is that no predicate bit beyond VQ is ever set. - * Which means that some operations on predicate registers themselves - * may operate on full uint64_t or even unrolled across the maximum - * uint64_t[4]. Performing 4 bits of host arithmetic unconditionally - * may well be cheaper than conditionals to restrict the operation - * to the relevant portion of a uint16_t[16]. - * - * TODO: Need to call this for changes to the real system registers - * and EL state changes. - */ -void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) -{ - int i, j; - uint64_t pmask; - - assert(vq >= 1 && vq <= ARM_MAX_VQ); - assert(vq <= arm_env_get_cpu(env)->sve_max_vq); - - /* Zap the high bits of the zregs. */ - for (i = 0; i < 32; i++) { - memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq)); - } - - /* Zap the high bits of the pregs and ffr. */ - pmask = 0; - if (vq & 3) { - pmask = ~(-1ULL << (16 * (vq & 3))); - } - for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) { - for (i = 0; i < 17; ++i) { - env->vfp.pregs[i].p[j] &= pmask; - } - pmask = 0; - } -} diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 7f6ad3000b..61799d20e1 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -30,6 +30,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "qemu/int128.h" +#include "qemu/atomic128.h" #include "tcg.h" #include "fpu/softfloat.h" #include <zlib.h> /* For crc32 */ @@ -509,189 +510,187 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) return crc32c(acc, buf, bytes) ^ 0xffffffff; } -/* Returns 0 on success; 1 otherwise. */ -static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi, - bool parallel, uintptr_t ra) +uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, + uint64_t new_lo, uint64_t new_hi) { - Int128 oldv, cmpv, newv; + Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + Int128 newv = int128_make128(new_lo, new_hi); + Int128 oldv; + uintptr_t ra = GETPC(); + uint64_t o0, o1; bool success; - cmpv = int128_make128(env->exclusive_val, env->exclusive_high); - newv = int128_make128(new_lo, new_hi); - - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); - success = int128_eq(oldv, cmpv); -#endif - } else { - uint64_t o0, o1; - #ifdef CONFIG_USER_ONLY - /* ??? Enforce alignment. */ - uint64_t *haddr = g2h(addr); - - helper_retaddr = ra; - o0 = ldq_le_p(haddr + 0); - o1 = ldq_le_p(haddr + 1); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - stq_le_p(haddr + 0, int128_getlo(newv)); - stq_le_p(haddr + 1, int128_gethi(newv)); - } - helper_retaddr = 0; + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + + helper_retaddr = ra; + o0 = ldq_le_p(haddr + 0); + o1 = ldq_le_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_le_p(haddr + 0, int128_getlo(newv)); + stq_le_p(haddr + 1, int128_gethi(newv)); + } + helper_retaddr = 0; #else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); - - o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); - o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); - helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); - } -#endif + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); + + o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); + o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); + helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); } +#endif return !success; } -uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC()); -} - uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC()); -} - -static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi, - bool parallel, uintptr_t ra) -{ Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); bool success; + int mem_idx; + TCGMemOpIdx oi; - /* high and low need to be switched here because this is not actually a - * 128bit store but two doublewords stored consecutively - */ - cmpv = int128_make128(env->exclusive_high, env->exclusive_val); - newv = int128_make128(new_hi, new_lo); + assert(HAVE_CMPXCHG128); - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); - oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - success = int128_eq(oldv, cmpv); -#endif - } else { - uint64_t o0, o1; + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); -#ifdef CONFIG_USER_ONLY - /* ??? Enforce alignment. */ - uint64_t *haddr = g2h(addr); - - helper_retaddr = ra; - o1 = ldq_be_p(haddr + 0); - o0 = ldq_be_p(haddr + 1); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - stq_be_p(haddr + 0, int128_gethi(newv)); - stq_be_p(haddr + 1, int128_getlo(newv)); - } - helper_retaddr = 0; -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); - TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); - - o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); - o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); - helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); - } -#endif - } + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + newv = int128_make128(new_lo, new_hi); + oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); + success = int128_eq(oldv, cmpv); return !success; } uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC()); + /* + * High and low need to be switched here because this is not actually a + * 128bit store but two doublewords stored consecutively + */ + Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + Int128 newv = int128_make128(new_lo, new_hi); + Int128 oldv; + uintptr_t ra = GETPC(); + uint64_t o0, o1; + bool success; + +#ifdef CONFIG_USER_ONLY + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + + helper_retaddr = ra; + o1 = ldq_be_p(haddr + 0); + o0 = ldq_be_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_be_p(haddr + 0, int128_gethi(newv)); + stq_be_p(haddr + 1, int128_getlo(newv)); + } + helper_retaddr = 0; +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); + + o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); + o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); + helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); + } +#endif + + return !success; } uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) + uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC()); + Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + bool success; + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + + /* + * High and low need to be switched here because this is not actually a + * 128bit store but two doublewords stored consecutively + */ + cmpv = int128_make128(env->exclusive_high, env->exclusive_val); + newv = int128_make128(new_hi, new_lo); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + + success = int128_eq(oldv, cmpv); + return !success; } /* Writes back the old data into Rs. */ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - uintptr_t ra = GETPC(); -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]); newv = int128_make128(new_lo, new_hi); - - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); env->xregs[rs] = int128_getlo(oldv); env->xregs[rs + 1] = int128_gethi(oldv); -#endif } void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, uint64_t new_hi, uint64_t new_lo) { - uintptr_t ra = GETPC(); -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]); newv = int128_make128(new_lo, new_hi); - - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); env->xregs[rs + 1] = int128_getlo(oldv); env->xregs[rs] = int128_gethi(oldv); -#endif } /* diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 023952a9a4..9e79182ab4 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -1128,20 +1128,35 @@ DEF_HELPER_FLAGS_4(sve_ld2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1150,13 +1165,21 @@ DEF_HELPER_FLAGS_4(sve_ld1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1sdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1sds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1166,17 +1189,28 @@ DEF_HELPER_FLAGS_4(sve_ldff1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1sdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1sds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1186,218 +1220,357 @@ DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1sdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1sds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1bh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1bs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1bd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hs_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hs_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1sd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhsu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_le_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldssu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldss_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbss_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhss_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbsu_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhsu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldssu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldss_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbss_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhss_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhss_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbdu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhdu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsdu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldddu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbds_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhds_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsds_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbdu_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhdu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsdu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_lddd_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldddu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_lddd_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbds_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhds_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhds_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsds_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhds_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbdu_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhdu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsdu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldddu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_lddd_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbds_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhds_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhds_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsds_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhsu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffssu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbss_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhss_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbsu_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhsu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffssu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffss_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbss_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhss_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhdu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffsdu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffddu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbds_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhds_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffsds_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbdu_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhdu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffsdu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffddu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbds_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhds_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffsds_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbdu_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhdu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffsdu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffddu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffbds_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffhds_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldffsds_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stbs_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sths_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sths_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stss_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stss_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stbs_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sths_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sths_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stss_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stss_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stbd_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sthd_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sthd_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stsd_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stsd_le_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stdd_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stsd_be_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stbd_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sthd_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sthd_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stsd_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stsd_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stdd_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stdd_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stbd_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sthd_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sthd_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stsd_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stdd_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) diff --git a/target/arm/helper.c b/target/arm/helper.c index 64b1564594..0ea95b0815 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -56,6 +56,8 @@ static void v8m_security_lookup(CPUARMState *env, uint32_t address, V8M_SAttributes *sattrs); #endif +static void switch_mode(CPUARMState *env, int mode); + static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg) { int nregs; @@ -552,12 +554,61 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, raw_write(env, ri, value); } +/* IS variants of TLB operations must affect all cores */ +static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + tlb_flush_all_cpus_synced(cs); +} + +static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + tlb_flush_all_cpus_synced(cs); +} + +static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); +} + +static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); +} + +/* + * Non-IS variants of TLB operations are upgraded to + * IS versions if we are at NS EL1 and HCR_EL2.FB is set to + * force broadcast of these operations. + */ +static bool tlb_force_broadcast(CPUARMState *env) +{ + return (env->cp15.hcr_el2 & HCR_FB) && + arm_current_el(env) == 1 && arm_is_secure_below_el3(env); +} + static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { /* Invalidate all (TLBIALL) */ ARMCPU *cpu = arm_env_get_cpu(env); + if (tlb_force_broadcast(env)) { + tlbiall_is_write(env, NULL, value); + return; + } + tlb_flush(CPU(cpu)); } @@ -567,6 +618,11 @@ static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri, /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */ ARMCPU *cpu = arm_env_get_cpu(env); + if (tlb_force_broadcast(env)) { + tlbimva_is_write(env, NULL, value); + return; + } + tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); } @@ -576,6 +632,11 @@ static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri, /* Invalidate by ASID (TLBIASID) */ ARMCPU *cpu = arm_env_get_cpu(env); + if (tlb_force_broadcast(env)) { + tlbiasid_is_write(env, NULL, value); + return; + } + tlb_flush(CPU(cpu)); } @@ -585,40 +646,12 @@ static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri, /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */ ARMCPU *cpu = arm_env_get_cpu(env); - tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); -} - -/* IS variants of TLB operations must affect all cores */ -static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); - - tlb_flush_all_cpus_synced(cs); -} - -static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); - - tlb_flush_all_cpus_synced(cs); -} - -static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); - - tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); -} - -static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); + if (tlb_force_broadcast(env)) { + tlbimvaa_is_write(env, NULL, value); + return; + } - tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); + tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); } static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -1179,6 +1212,7 @@ static void pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { + value &= pmu_counter_mask(env); env->cp15.c9_pmovsr &= ~value; } @@ -1295,12 +1329,26 @@ static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri) CPUState *cs = ENV_GET_CPU(env); uint64_t ret = 0; - if (cs->interrupt_request & CPU_INTERRUPT_HARD) { - ret |= CPSR_I; + if (arm_hcr_el2_imo(env)) { + if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) { + ret |= CPSR_I; + } + } else { + if (cs->interrupt_request & CPU_INTERRUPT_HARD) { + ret |= CPSR_I; + } } - if (cs->interrupt_request & CPU_INTERRUPT_FIQ) { - ret |= CPSR_F; + + if (arm_hcr_el2_fmo(env)) { + if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) { + ret |= CPSR_F; + } + } else { + if (cs->interrupt_request & CPU_INTERRUPT_FIQ) { + ret |= CPSR_F; + } } + /* External aborts are not possible in QEMU so A bit is always clear */ return ret; } @@ -1423,12 +1471,14 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .writefn = pmintenset_write, .raw_writefn = raw_write, .resetvalue = 0x0 }, { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2, - .access = PL1_RW, .accessfn = access_tpm, .type = ARM_CP_ALIAS, + .access = PL1_RW, .accessfn = access_tpm, + .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), .writefn = pmintenclr_write, }, { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2, - .access = PL1_RW, .accessfn = access_tpm, .type = ARM_CP_ALIAS, + .access = PL1_RW, .accessfn = access_tpm, + .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), .writefn = pmintenclr_write }, { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH, @@ -2267,13 +2317,15 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, * * The Non-secure TTBCR.EAE bit is set to 1 * * The implementation includes EL2, and the value of HCR.VM is 1 * + * (Note that HCR.DC makes HCR.VM behave as if it is 1.) + * * ATS1Hx always uses the 64bit format (not supported yet). */ format64 = arm_s1_regime_using_lpae_format(env, mmu_idx); if (arm_feature(env, ARM_FEATURE_EL2)) { if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) { - format64 |= env->cp15.hcr_el2 & HCR_VM; + format64 |= env->cp15.hcr_el2 & (HCR_VM | HCR_DC); } else { format64 |= arm_current_el(env) == 2; } @@ -2706,12 +2758,10 @@ static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri, static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* 64 bit accesses to the TTBRs can change the ASID and so we - * must flush the TLB. - */ - if (cpreg_field_is_64bit(ri)) { + /* If the ASID changes (with a 64-bit write), we must flush the TLB. */ + if (cpreg_field_is_64bit(ri) && + extract64(raw_read(env, ri) ^ value, 48, 16) != 0) { ARMCPU *cpu = arm_env_get_cpu(env); - tlb_flush(CPU(cpu)); } raw_write(env, ri, value); @@ -3080,22 +3130,6 @@ static CPAccessResult aa64_cacheop_access(CPUARMState *env, * Page D4-1736 (DDI0487A.b) */ -static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = ENV_GET_CPU(env); - - if (arm_is_secure_below_el3(env)) { - tlb_flush_by_mmuidx(cs, - ARMMMUIdxBit_S1SE1 | - ARMMMUIdxBit_S1SE0); - } else { - tlb_flush_by_mmuidx(cs, - ARMMMUIdxBit_S12NSE1 | - ARMMMUIdxBit_S12NSE0); - } -} - static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3113,6 +3147,27 @@ static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, } } +static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = ENV_GET_CPU(env); + + if (tlb_force_broadcast(env)) { + tlbi_aa64_vmalle1_write(env, NULL, value); + return; + } + + if (arm_is_secure_below_el3(env)) { + tlb_flush_by_mmuidx(cs, + ARMMMUIdxBit_S1SE1 | + ARMMMUIdxBit_S1SE0); + } else { + tlb_flush_by_mmuidx(cs, + ARMMMUIdxBit_S12NSE1 | + ARMMMUIdxBit_S12NSE0); + } +} + static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3202,29 +3257,6 @@ static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_S1E3); } -static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate by VA, EL1&0 (AArch64 version). - * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1, - * since we don't support flush-for-specific-ASID-only or - * flush-last-level-only. - */ - ARMCPU *cpu = arm_env_get_cpu(env); - CPUState *cs = CPU(cpu); - uint64_t pageaddr = sextract64(value << 12, 0, 56); - - if (arm_is_secure_below_el3(env)) { - tlb_flush_page_by_mmuidx(cs, pageaddr, - ARMMMUIdxBit_S1SE1 | - ARMMMUIdxBit_S1SE0); - } else { - tlb_flush_page_by_mmuidx(cs, pageaddr, - ARMMMUIdxBit_S12NSE1 | - ARMMMUIdxBit_S12NSE0); - } -} - static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3272,6 +3304,34 @@ static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri, } } +static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate by VA, EL1&0 (AArch64 version). + * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1, + * since we don't support flush-for-specific-ASID-only or + * flush-last-level-only. + */ + ARMCPU *cpu = arm_env_get_cpu(env); + CPUState *cs = CPU(cpu); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + + if (tlb_force_broadcast(env)) { + tlbi_aa64_vae1is_write(env, NULL, value); + return; + } + + if (arm_is_secure_below_el3(env)) { + tlb_flush_page_by_mmuidx(cs, pageaddr, + ARMMMUIdxBit_S1SE1 | + ARMMMUIdxBit_S1SE0); + } else { + tlb_flush_page_by_mmuidx(cs, pageaddr, + ARMMMUIdxBit_S12NSE1 | + ARMMMUIdxBit_S12NSE0); + } +} + static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3869,6 +3929,7 @@ static const ARMCPRegInfo el3_no_el2_v8_cp_reginfo[] = { static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { ARMCPU *cpu = arm_env_get_cpu(env); + CPUState *cs = ENV_GET_CPU(env); uint64_t valid_mask = HCR_MASK; if (arm_feature(env, ARM_FEATURE_EL3)) { @@ -3887,6 +3948,28 @@ static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) /* Clear RES0 bits. */ value &= valid_mask; + /* + * VI and VF are kept in cs->interrupt_request. Modifying that + * requires that we have the iothread lock, which is done by + * marking the reginfo structs as ARM_CP_IO. + * Note that if a write to HCR pends a VIRQ or VFIQ it is never + * possible for it to be taken immediately, because VIRQ and + * VFIQ are masked unless running at EL0 or EL1, and HCR + * can only be written at EL2. + */ + g_assert(qemu_mutex_iothread_locked()); + if (value & HCR_VI) { + cs->interrupt_request |= CPU_INTERRUPT_VIRQ; + } else { + cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; + } + if (value & HCR_VF) { + cs->interrupt_request |= CPU_INTERRUPT_VFIQ; + } else { + cs->interrupt_request &= ~CPU_INTERRUPT_VFIQ; + } + value &= ~(HCR_VI | HCR_VF); + /* These bits change the MMU setup: * HCR_VM enables stage 2 translation * HCR_PTW forbids certain page-table setups @@ -3914,16 +3997,32 @@ static void hcr_writelow(CPUARMState *env, const ARMCPRegInfo *ri, hcr_write(env, NULL, value); } +static uint64_t hcr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* The VI and VF bits live in cs->interrupt_request */ + uint64_t ret = env->cp15.hcr_el2 & ~(HCR_VI | HCR_VF); + CPUState *cs = ENV_GET_CPU(env); + + if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) { + ret |= HCR_VI; + } + if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) { + ret |= HCR_VF; + } + return ret; +} + static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "HCR_EL2", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_IO, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), - .writefn = hcr_write }, + .writefn = hcr_write, .readfn = hcr_read }, { .name = "HCR", .state = ARM_CP_STATE_AA32, - .type = ARM_CP_ALIAS, + .type = ARM_CP_ALIAS | ARM_CP_IO, .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), - .writefn = hcr_writelow }, + .writefn = hcr_writelow, .readfn = hcr_read }, { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1, @@ -4160,7 +4259,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { static const ARMCPRegInfo el2_v8_cp_reginfo[] = { { .name = "HCR2", .state = ARM_CP_STATE_AA32, - .type = ARM_CP_ALIAS, + .type = ARM_CP_ALIAS | ARM_CP_IO, .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4, .access = PL2_RW, .fieldoffset = offsetofhigh32(CPUARMState, cp15.hcr_el2), @@ -4211,7 +4310,7 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { .fieldoffset = offsetof(CPUARMState, cp15.mvbar) }, { .name = "TTBR0_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 0, - .access = PL3_RW, .writefn = vmsa_ttbr_write, .resetvalue = 0, + .access = PL3_RW, .resetvalue = 0, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[3]) }, { .name = "TCR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 2, @@ -4400,78 +4499,105 @@ static const ARMCPRegInfo debug_lpae_cp_reginfo[] = { REGINFO_SENTINEL }; -/* Return the exception level to which SVE-disabled exceptions should - * be taken, or 0 if SVE is enabled. +/* Return the exception level to which exceptions should be taken + * via SVEAccessTrap. If an exception should be routed through + * AArch64.AdvSIMDFPAccessTrap, return 0; fp_exception_el should + * take care of raising that exception. + * C.f. the ARM pseudocode function CheckSVEEnabled. */ -static int sve_exception_el(CPUARMState *env) +int sve_exception_el(CPUARMState *env, int el) { #ifndef CONFIG_USER_ONLY - unsigned current_el = arm_current_el(env); + if (el <= 1) { + bool disabled = false; - /* The CPACR.ZEN controls traps to EL1: - * 0, 2 : trap EL0 and EL1 accesses - * 1 : trap only EL0 accesses - * 3 : trap no accesses - */ - switch (extract32(env->cp15.cpacr_el1, 16, 2)) { - default: - if (current_el <= 1) { - /* Trap to PL1, which might be EL1 or EL3 */ - if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) { - return 3; - } - return 1; + /* The CPACR.ZEN controls traps to EL1: + * 0, 2 : trap EL0 and EL1 accesses + * 1 : trap only EL0 accesses + * 3 : trap no accesses + */ + if (!extract32(env->cp15.cpacr_el1, 16, 1)) { + disabled = true; + } else if (!extract32(env->cp15.cpacr_el1, 17, 1)) { + disabled = el == 0; } - break; - case 1: - if (current_el == 0) { - return 1; + if (disabled) { + /* route_to_el2 */ + return (arm_feature(env, ARM_FEATURE_EL2) + && !arm_is_secure(env) + && (env->cp15.hcr_el2 & HCR_TGE) ? 2 : 1); } - break; - case 3: - break; - } - /* Similarly for CPACR.FPEN, after having checked ZEN. */ - switch (extract32(env->cp15.cpacr_el1, 20, 2)) { - default: - if (current_el <= 1) { - if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) { - return 3; - } - return 1; + /* Check CPACR.FPEN. */ + if (!extract32(env->cp15.cpacr_el1, 20, 1)) { + disabled = true; + } else if (!extract32(env->cp15.cpacr_el1, 21, 1)) { + disabled = el == 0; } - break; - case 1: - if (current_el == 0) { - return 1; + if (disabled) { + return 0; } - break; - case 3: - break; } - /* CPTR_EL2. Check both TZ and TFP. */ - if (current_el <= 2 - && (env->cp15.cptr_el[2] & (CPTR_TFP | CPTR_TZ)) - && !arm_is_secure_below_el3(env)) { - return 2; + /* CPTR_EL2. Since TZ and TFP are positive, + * they will be zero when EL2 is not present. + */ + if (el <= 2 && !arm_is_secure_below_el3(env)) { + if (env->cp15.cptr_el[2] & CPTR_TZ) { + return 2; + } + if (env->cp15.cptr_el[2] & CPTR_TFP) { + return 0; + } } - /* CPTR_EL3. Check both EZ and TFP. */ - if (!(env->cp15.cptr_el[3] & CPTR_EZ) - || (env->cp15.cptr_el[3] & CPTR_TFP)) { + /* CPTR_EL3. Since EZ is negative we must check for EL3. */ + if (arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.cptr_el[3] & CPTR_EZ)) { return 3; } #endif return 0; } +/* + * Given that SVE is enabled, return the vector length for EL. + */ +uint32_t sve_zcr_len_for_el(CPUARMState *env, int el) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + uint32_t zcr_len = cpu->sve_max_vq - 1; + + if (el <= 1) { + zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]); + } + if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) { + zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]); + } + if (el < 3 && arm_feature(env, ARM_FEATURE_EL3)) { + zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]); + } + return zcr_len; +} + static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { + int cur_el = arm_current_el(env); + int old_len = sve_zcr_len_for_el(env, cur_el); + int new_len; + /* Bits other than [3:0] are RAZ/WI. */ raw_write(env, ri, value & 0xf); + + /* + * Because we arrived here, we know both FP and SVE are enabled; + * otherwise we would have trapped access to the ZCR_ELn register. + */ + new_len = sve_zcr_len_for_el(env, cur_el); + if (new_len < old_len) { + aarch64_sve_narrow_vq(env, new_len + 1); + } } static const ARMCPRegInfo zcr_el1_reginfo = { @@ -4843,7 +4969,7 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = arm_env_get_cpu(env); - uint64_t pfr0 = cpu->id_aa64pfr0; + uint64_t pfr0 = cpu->isar.id_aa64pfr0; if (env->gicv3state) { pfr0 |= 1 << 24; @@ -4910,27 +5036,27 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar0 }, + .resetvalue = cpu->isar.id_isar0 }, { .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar1 }, + .resetvalue = cpu->isar.id_isar1 }, { .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar2 }, + .resetvalue = cpu->isar.id_isar2 }, { .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar3 }, + .resetvalue = cpu->isar.id_isar3 }, { .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar4 }, + .resetvalue = cpu->isar.id_isar4 }, { .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar5 }, + .resetvalue = cpu->isar.id_isar5 }, { .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, @@ -4938,7 +5064,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_isar6 }, + .resetvalue = cpu->isar.id_isar6 }, REGINFO_SENTINEL }; define_arm_cp_regs(cpu, v6_idregs); @@ -5009,7 +5135,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_AA64PFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_aa64pfr1}, + .resetvalue = cpu->isar.id_aa64pfr1}, { .name = "ID_AA64PFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, @@ -5018,9 +5144,10 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "ID_AA64PFR4_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + { .name = "ID_AA64ZFR0_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, + /* At present, only SVEver == 0 is defined anyway. */ .resetvalue = 0 }, { .name = "ID_AA64PFR5_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 5, @@ -5069,11 +5196,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ID_AA64ISAR0_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_aa64isar0 }, + .resetvalue = cpu->isar.id_aa64isar0 }, { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->id_aa64isar1 }, + .resetvalue = cpu->isar.id_aa64isar1 }, { .name = "ID_AA64ISAR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, @@ -5133,15 +5260,15 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "MVFR0_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->mvfr0 }, + .resetvalue = cpu->isar.mvfr0 }, { .name = "MVFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->mvfr1 }, + .resetvalue = cpu->isar.mvfr1 }, { .name = "MVFR2_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, - .resetvalue = cpu->mvfr2 }, + .resetvalue = cpu->isar.mvfr2 }, { .name = "MVFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, @@ -5587,7 +5714,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_one_arm_cp_reg(cpu, &sctlr); } - if (arm_feature(env, ARM_FEATURE_SVE)) { + if (cpu_isar_feature(aa64_sve, cpu)) { define_one_arm_cp_reg(cpu, &zcr_el1_reginfo); if (arm_feature(env, ARM_FEATURE_EL2)) { define_one_arm_cp_reg(cpu, &zcr_el2_reginfo); @@ -6177,7 +6304,17 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, mask |= CPSR_IL; val |= CPSR_IL; } + qemu_log_mask(LOG_GUEST_ERROR, + "Illegal AArch32 mode switch attempt from %s to %s\n", + aarch32_mode_name(env->uncached_cpsr), + aarch32_mode_name(val)); } else { + qemu_log_mask(CPU_LOG_INT, "%s %s to %s PC 0x%" PRIx32 "\n", + write_type == CPSRWriteExceptionReturn ? + "Exception return from AArch32" : + "AArch32 mode switch from", + aarch32_mode_name(env->uncached_cpsr), + aarch32_mode_name(val), env->regs[15]); switch_mode(env, val & CPSR_M); } } @@ -6275,7 +6412,7 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op) return 0; } -void switch_mode(CPUARMState *env, int mode) +static void switch_mode(CPUARMState *env, int mode) { ARMCPU *cpu = arm_env_get_cpu(env); @@ -6297,7 +6434,7 @@ void aarch64_sync_64_to_32(CPUARMState *env) #else -void switch_mode(CPUARMState *env, int mode) +static void switch_mode(CPUARMState *env, int mode) { int old_mode; int i; @@ -6441,7 +6578,7 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value, target_ulong page_size; hwaddr physaddr; int prot; - ARMMMUFaultInfo fi; + ARMMMUFaultInfo fi = {}; bool secure = mmu_idx & ARM_MMU_IDX_M_S; int exc; bool exc_secure; @@ -6503,7 +6640,7 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr, target_ulong page_size; hwaddr physaddr; int prot; - ARMMMUFaultInfo fi; + ARMMMUFaultInfo fi = {}; bool secure = mmu_idx & ARM_MMU_IDX_M_S; int exc; bool exc_secure; @@ -6554,18 +6691,6 @@ pend_fault: return false; } -/* Return true if we're using the process stack pointer (not the MSP) */ -static bool v7m_using_psp(CPUARMState *env) -{ - /* Handler mode always uses the main stack; for thread mode - * the CONTROL.SPSEL bit determines the answer. - * Note that in v7M it is not possible to be in Handler mode with - * CONTROL.SPSEL non-zero, but in v8M it is, so we must check both. - */ - return !arm_v7m_is_handler_mode(env) && - env->v7m.control[env->v7m.secure] & R_V7M_CONTROL_SPSEL_MASK; -} - /* Write to v7M CONTROL.SPSEL bit for the specified security bank. * This may change the current stack pointer between Main and Process * stack pointers if it is done for the CONTROL register for the current @@ -6722,6 +6847,10 @@ void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest) "BLXNS with misaligned SP is UNPREDICTABLE\n"); } + if (sp < v7m_sp_limit(env)) { + raise_exception(env, EXCP_STKOF, 0, 1); + } + saved_psr = env->v7m.exception; if (env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK) { saved_psr |= XPSR_SFPA; @@ -6851,6 +6980,8 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain, uint32_t frameptr; ARMMMUIdx mmu_idx; bool stacked_ok; + uint32_t limit; + bool want_psp; if (dotailchain) { bool mode = lr & R_V7M_EXCRET_MODE_MASK; @@ -6860,12 +6991,34 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain, mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, M_REG_S, priv); frame_sp_p = get_v7m_sp_ptr(env, M_REG_S, mode, lr & R_V7M_EXCRET_SPSEL_MASK); + want_psp = mode && (lr & R_V7M_EXCRET_SPSEL_MASK); + if (want_psp) { + limit = env->v7m.psplim[M_REG_S]; + } else { + limit = env->v7m.msplim[M_REG_S]; + } } else { mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false)); frame_sp_p = &env->regs[13]; + limit = v7m_sp_limit(env); } frameptr = *frame_sp_p - 0x28; + if (frameptr < limit) { + /* + * Stack limit failure: set SP to the limit value, and generate + * STKOF UsageFault. Stack pushes below the limit must not be + * performed. It is IMPDEF whether pushes above the limit are + * performed; we choose not to. + */ + qemu_log_mask(CPU_LOG_INT, + "...STKOF during callee-saves register stacking\n"); + env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK; + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, + env->v7m.secure); + *frame_sp_p = limit; + return true; + } /* Write as much of the stack frame as we can. A write failure may * cause us to pend a derived exception. @@ -6889,10 +7042,7 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain, v7m_stack_write(cpu, frameptr + 0x24, env->regs[11], mmu_idx, ignore_faults); - /* Update SP regardless of whether any of the stack accesses failed. - * When we implement v8M stack limit checking then this attempt to - * update SP might also fail and result in a derived exception. - */ + /* Update SP regardless of whether any of the stack accesses failed. */ *frame_sp_p = frameptr; return !stacked_ok; @@ -6938,7 +7088,7 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain, * not already saved. */ if (lr & R_V7M_EXCRET_DCRS_MASK && - !(dotailchain && (lr & R_V7M_EXCRET_ES_MASK))) { + !(dotailchain && !(lr & R_V7M_EXCRET_ES_MASK))) { push_failed = v7m_push_callee_stack(cpu, lr, dotailchain, ignore_stackfaults); } @@ -7040,6 +7190,26 @@ static bool v7m_push_stack(ARMCPU *cpu) frameptr -= 0x20; + if (arm_feature(env, ARM_FEATURE_V8)) { + uint32_t limit = v7m_sp_limit(env); + + if (frameptr < limit) { + /* + * Stack limit failure: set SP to the limit value, and generate + * STKOF UsageFault. Stack pushes below the limit must not be + * performed. It is IMPDEF whether pushes above the limit are + * performed; we choose not to. + */ + qemu_log_mask(CPU_LOG_INT, + "...STKOF during stacking\n"); + env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK; + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, + env->v7m.secure); + env->regs[13] = limit; + return true; + } + } + /* Write as much of the stack frame as we can. If we fail a stack * write this will result in a derived exception being pended * (which may be taken in preference to the one we started with @@ -7055,10 +7225,7 @@ static bool v7m_push_stack(ARMCPU *cpu) v7m_stack_write(cpu, frameptr + 24, env->regs[15], mmu_idx, false) && v7m_stack_write(cpu, frameptr + 28, xpsr, mmu_idx, false); - /* Update SP regardless of whether any of the stack accesses failed. - * When we implement v8M stack limit checking then this attempt to - * update SP might also fail and result in a derived exception. - */ + /* Update SP regardless of whether any of the stack accesses failed. */ env->regs[13] = frameptr; return !stacked_ok; @@ -7304,7 +7471,6 @@ static void do_v7m_exception_exit(ARMCPU *cpu) pop_ok = pop_ok && v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) && - v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) && v7m_stack_read(cpu, &env->regs[5], frameptr + 0xc, mmu_idx) && v7m_stack_read(cpu, &env->regs[6], frameptr + 0x10, mmu_idx) && v7m_stack_read(cpu, &env->regs[7], frameptr + 0x14, mmu_idx) && @@ -7512,6 +7678,7 @@ static void arm_log_exception(int idx) [EXCP_SEMIHOST] = "Semihosting call", [EXCP_NOCP] = "v7M NOCP UsageFault", [EXCP_INVSTATE] = "v7M INVSTATE UsageFault", + [EXCP_STKOF] = "v8M STKOF UsageFault", }; if (idx >= 0 && idx < ARRAY_SIZE(excnames)) { @@ -7667,6 +7834,10 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure); env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVSTATE_MASK; break; + case EXCP_STKOF: + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure); + env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK; + break; case EXCP_SWI: /* The PC already points to the next instruction. */ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC, env->v7m.secure); @@ -8129,6 +8300,19 @@ static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs) } if (cs->exception_index != EXCP_IRQ && cs->exception_index != EXCP_FIQ) { + if (!arm_feature(env, ARM_FEATURE_V8)) { + /* + * QEMU syndrome values are v8-style. v7 has the IL bit + * UNK/SBZP for "field not valid" cases, where v8 uses RES1. + * If this is a v7 CPU, squash the IL bit in those cases. + */ + if (cs->exception_index == EXCP_PREFETCH_ABORT || + (cs->exception_index == EXCP_DATA_ABORT && + !(env->exception.syndrome & ARM_EL_ISV)) || + syn_get_ec(env->exception.syndrome) == EC_UNCATEGORIZED) { + env->exception.syndrome &= ~ARM_EL_IL; + } + } env->cp15.esr_el[2] = env->exception.syndrome; } @@ -8163,7 +8347,7 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs) uint32_t moe; /* If this is a debug exception we must update the DBGDSCR.MOE bits */ - switch (env->exception.syndrome >> ARM_EL_EC_SHIFT) { + switch (syn_get_ec(env->exception.syndrome)) { case EC_BREAKPOINT: case EC_BREAKPOINT_SAME_EL: moe = 1; @@ -8310,8 +8494,15 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) unsigned int new_el = env->exception.target_el; target_ulong addr = env->cp15.vbar_el[new_el]; unsigned int new_mode = aarch64_pstate_mode(new_el, true); + unsigned int cur_el = arm_current_el(env); + + /* + * Note that new_el can never be 0. If cur_el is 0, then + * el0_a64 is is_a64(), else el0_a64 is ignored. + */ + aarch64_sve_change_el(env, cur_el, new_el, is_a64(env)); - if (arm_current_el(env) < new_el) { + if (cur_el < new_el) { /* Entry vector offset depends on whether the implemented EL * immediately lower than the target level is using AArch32 or AArch64 */ @@ -8353,6 +8544,15 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) case EXCP_HVC: case EXCP_HYP_TRAP: case EXCP_SMC: + if (syn_get_ec(env->exception.syndrome) == EC_ADVSIMDFPACCESSTRAP) { + /* + * QEMU internal FP/SIMD syndromes from AArch32 include the + * TA and coproc fields which are only exposed if the exception + * is taken to AArch32 Hyp mode. Mask them out to get a valid + * AArch64 format syndrome. + */ + env->exception.syndrome &= ~MAKE_64BIT_MASK(0, 20); + } env->cp15.esr_el[new_el] = env->exception.syndrome; break; case EXCP_IRQ: @@ -8496,7 +8696,7 @@ void arm_cpu_do_interrupt(CPUState *cs) if (qemu_loglevel_mask(CPU_LOG_INT) && !excp_is_internal(cs->exception_index)) { qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%x/0x%" PRIx32 "\n", - env->exception.syndrome >> ARM_EL_EC_SHIFT, + syn_get_ec(env->exception.syndrome), env->exception.syndrome); } @@ -8593,7 +8793,8 @@ static inline bool regime_translation_disabled(CPUARMState *env, } if (mmu_idx == ARMMMUIdx_S2NS) { - return (env->cp15.hcr_el2 & HCR_VM) == 0; + /* HCR.DC means HCR.VM behaves as 1 */ + return (env->cp15.hcr_el2 & (HCR_DC | HCR_VM)) == 0; } if (env->cp15.hcr_el2 & HCR_TGE) { @@ -8603,6 +8804,12 @@ static inline bool regime_translation_disabled(CPUARMState *env, } } + if ((env->cp15.hcr_el2 & HCR_DC) && + (mmu_idx == ARMMMUIdx_S1NSE0 || mmu_idx == ARMMMUIdx_S1NSE1)) { + /* HCR.DC means SCTLR_EL1.M behaves as 0 */ + return true; + } + return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0; } @@ -8954,9 +9161,20 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, hwaddr s2pa; int s2prot; int ret; + ARMCacheAttrs cacheattrs = {}; + ARMCacheAttrs *pcacheattrs = NULL; + + if (env->cp15.hcr_el2 & HCR_PTW) { + /* + * PTW means we must fault if this S1 walk touches S2 Device + * memory; otherwise we don't care about the attributes and can + * save the S2 translation the effort of computing them. + */ + pcacheattrs = &cacheattrs; + } ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_S2NS, &s2pa, - &txattrs, &s2prot, &s2size, fi, NULL); + &txattrs, &s2prot, &s2size, fi, pcacheattrs); if (ret) { assert(fi->type != ARMFault_None); fi->s2addr = addr; @@ -8964,6 +9182,14 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, fi->s1ptw = true; return ~0; } + if (pcacheattrs && (pcacheattrs->attrs & 0xf0) == 0) { + /* Access was to Device memory: generate Permission fault */ + fi->type = ARMFault_Permission; + fi->s2addr = addr; + fi->stage2 = true; + fi->s1ptw = true; + return ~0; + } addr = s2pa; } return addr; @@ -10583,6 +10809,16 @@ static bool get_phys_addr(CPUARMState *env, target_ulong address, /* Combine the S1 and S2 cache attributes, if needed */ if (!ret && cacheattrs != NULL) { + if (env->cp15.hcr_el2 & HCR_DC) { + /* + * HCR.DC forces the first stage attributes to + * Normal Non-Shareable, + * Inner Write-Back Read-Allocate Write-Allocate, + * Outer Write-Back Read-Allocate Write-Allocate. + */ + cacheattrs->attrs = 0xff; + cacheattrs->shareability = 0; + } *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2); } @@ -10929,11 +11165,23 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val) * currently in handler mode or not, using the NS CONTROL.SPSEL. */ bool spsel = env->v7m.control[M_REG_NS] & R_V7M_CONTROL_SPSEL_MASK; + bool is_psp = !arm_v7m_is_handler_mode(env) && spsel; + uint32_t limit; if (!env->v7m.secure) { return; } - if (!arm_v7m_is_handler_mode(env) && spsel) { + + limit = is_psp ? env->v7m.psplim[false] : env->v7m.msplim[false]; + + if (val < limit) { + CPUState *cs = CPU(arm_env_get_cpu(env)); + + cpu_restore_state(cs, GETPC(), true); + raise_exception(env, EXCP_STKOF, 0, 1); + } + + if (is_psp) { env->v7m.other_ss_psp = val; } else { env->v7m.other_ss_msp = val; @@ -11528,7 +11776,7 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) uint32_t changed; /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */ - if (!arm_feature(env, ARM_FEATURE_V8_FP16)) { + if (!cpu_isar_feature(aa64_fp16, arm_env_get_cpu(env))) { val &= ~FPCR_FZ16; } @@ -12516,11 +12764,10 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) /* Return the exception level to which FP-disabled exceptions should * be taken, or 0 if FP is enabled. */ -static inline int fp_exception_el(CPUARMState *env) +int fp_exception_el(CPUARMState *env, int cur_el) { #ifndef CONFIG_USER_ONLY int fpen; - int cur_el = arm_current_el(env); /* CPACR and the CPTR registers don't exist before v6, so FP is * always accessible @@ -12583,18 +12830,21 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, target_ulong *cs_base, uint32_t *pflags) { ARMMMUIdx mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false)); - int fp_el = fp_exception_el(env); + int current_el = arm_current_el(env); + int fp_el = fp_exception_el(env, current_el); uint32_t flags; if (is_a64(env)) { + ARMCPU *cpu = arm_env_get_cpu(env); + *pc = env->pc; flags = ARM_TBFLAG_AARCH64_STATE_MASK; /* Get control bits for tagged addresses */ flags |= (arm_regime_tbi0(env, mmu_idx) << ARM_TBFLAG_TBI0_SHIFT); flags |= (arm_regime_tbi1(env, mmu_idx) << ARM_TBFLAG_TBI1_SHIFT); - if (arm_feature(env, ARM_FEATURE_SVE)) { - int sve_el = sve_exception_el(env); + if (cpu_isar_feature(aa64_sve, cpu)) { + int sve_el = sve_exception_el(env, current_el); uint32_t zcr_len; /* If SVE is disabled, but FP is enabled, @@ -12603,19 +12853,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, if (sve_el != 0 && fp_el == 0) { zcr_len = 0; } else { - int current_el = arm_current_el(env); - ARMCPU *cpu = arm_env_get_cpu(env); - - zcr_len = cpu->sve_max_vq - 1; - if (current_el <= 1) { - zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]); - } - if (current_el < 2 && arm_feature(env, ARM_FEATURE_EL2)) { - zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]); - } - if (current_el < 3 && arm_feature(env, ARM_FEATURE_EL3)) { - zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]); - } + zcr_len = sve_zcr_len_for_el(env, current_el); } flags |= sve_el << ARM_TBFLAG_SVEEXC_EL_SHIFT; flags |= zcr_len << ARM_TBFLAG_ZCR_LEN_SHIFT; @@ -12668,6 +12906,103 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, flags |= ARM_TBFLAG_HANDLER_MASK; } + /* v8M always applies stack limit checks unless CCR.STKOFHFNMIGN is + * suppressing them because the requested execution priority is less than 0. + */ + if (arm_feature(env, ARM_FEATURE_V8) && + arm_feature(env, ARM_FEATURE_M) && + !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) && + (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) { + flags |= ARM_TBFLAG_STACKCHECK_MASK; + } + *pflags = flags; *cs_base = 0; } + +#ifdef TARGET_AARCH64 +/* + * The manual says that when SVE is enabled and VQ is widened the + * implementation is allowed to zero the previously inaccessible + * portion of the registers. The corollary to that is that when + * SVE is enabled and VQ is narrowed we are also allowed to zero + * the now inaccessible portion of the registers. + * + * The intent of this is that no predicate bit beyond VQ is ever set. + * Which means that some operations on predicate registers themselves + * may operate on full uint64_t or even unrolled across the maximum + * uint64_t[4]. Performing 4 bits of host arithmetic unconditionally + * may well be cheaper than conditionals to restrict the operation + * to the relevant portion of a uint16_t[16]. + */ +void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) +{ + int i, j; + uint64_t pmask; + + assert(vq >= 1 && vq <= ARM_MAX_VQ); + assert(vq <= arm_env_get_cpu(env)->sve_max_vq); + + /* Zap the high bits of the zregs. */ + for (i = 0; i < 32; i++) { + memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq)); + } + + /* Zap the high bits of the pregs and ffr. */ + pmask = 0; + if (vq & 3) { + pmask = ~(-1ULL << (16 * (vq & 3))); + } + for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) { + for (i = 0; i < 17; ++i) { + env->vfp.pregs[i].p[j] &= pmask; + } + pmask = 0; + } +} + +/* + * Notice a change in SVE vector size when changing EL. + */ +void aarch64_sve_change_el(CPUARMState *env, int old_el, + int new_el, bool el0_a64) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + int old_len, new_len; + bool old_a64, new_a64; + + /* Nothing to do if no SVE. */ + if (!cpu_isar_feature(aa64_sve, cpu)) { + return; + } + + /* Nothing to do if FP is disabled in either EL. */ + if (fp_exception_el(env, old_el) || fp_exception_el(env, new_el)) { + return; + } + + /* + * DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped + * at ELx, or not available because the EL is in AArch32 state, then + * for all purposes other than a direct read, the ZCR_ELx.LEN field + * has an effective value of 0". + * + * Consider EL2 (aa64, vq=4) -> EL0 (aa32) -> EL1 (aa64, vq=0). + * If we ignore aa32 state, we would fail to see the vq4->vq0 transition + * from EL2->EL1. Thus we go ahead and narrow when entering aa32 so that + * we already have the correct register contents when encountering the + * vq0->vq0 transition between EL0->EL1. + */ + old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64; + old_len = (old_a64 && !sve_exception_el(env, old_el) + ? sve_zcr_len_for_el(env, old_el) : 0); + new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64; + new_len = (new_a64 && !sve_exception_el(env, new_el) + ? sve_zcr_len_for_el(env, new_el) : 0); + + /* When changing vector length, clear inaccessible state. */ + if (new_len < old_len) { + aarch64_sve_narrow_vq(env, new_len + 1); + } +} +#endif diff --git a/target/arm/helper.h b/target/arm/helper.h index 59e8c3bd1b..8c9590091b 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -69,6 +69,8 @@ DEF_HELPER_2(v7m_blxns, void, env, i32) DEF_HELPER_3(v7m_tt, i32, env, i32, i32) +DEF_HELPER_2(v8m_stackcheck, void, env, i32) + DEF_HELPER_4(access_check_cp_reg, void, env, ptr, i32, i32) DEF_HELPER_3(set_cp_reg, void, env, ptr, i32) DEF_HELPER_2(get_cp_reg, i32, env, ptr) diff --git a/target/arm/internals.h b/target/arm/internals.h index dc9357766c..6c2bb2deeb 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -94,6 +94,15 @@ FIELD(V7M_EXCRET, RES1, 7, 25) /* including the must-be-1 prefix */ #define M_FAKE_FSR_NSC_EXEC 0xf /* NS executing in S&NSC memory */ #define M_FAKE_FSR_SFAULT 0xe /* SecureFault INVTRAN, INVEP or AUVIOL */ +/** + * raise_exception: Raise the specified exception. + * Raise a guest exception with the specified value, syndrome register + * and target exception level. This should be called from helper functions, + * and never returns because we will longjump back up to the CPU main loop. + */ +void QEMU_NORETURN raise_exception(CPUARMState *env, uint32_t excp, + uint32_t syndrome, uint32_t target_el); + /* * For AArch64, map a given EL to an index in the banked_spsr array. * Note that this mapping and the AArch32 mapping defined in bank_number() @@ -136,7 +145,6 @@ static inline int bank_number(int mode) g_assert_not_reached(); } -void switch_mode(CPUARMState *, int); void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu); void arm_translate_init(void); @@ -270,14 +278,19 @@ enum arm_exception_class { #define ARM_EL_IL (1 << ARM_EL_IL_SHIFT) #define ARM_EL_ISV (1 << ARM_EL_ISV_SHIFT) +static inline uint32_t syn_get_ec(uint32_t syn) +{ + return syn >> ARM_EL_EC_SHIFT; +} + /* Utility functions for constructing various kinds of syndrome value. * Note that in general we follow the AArch64 syndrome values; in a * few cases the value in HSR for exceptions taken to AArch32 Hyp - * mode differs slightly, so if we ever implemented Hyp mode then the - * syndrome value would need some massaging on exception entry. - * (One example of this is that AArch64 defaults to IL bit set for - * exceptions which don't specifically indicate information about the - * trapping instruction, whereas AArch32 defaults to IL bit clear.) + * mode differs slightly, and we fix this up when populating HSR in + * arm_cpu_do_interrupt_aarch32_hyp(). + * The exception is FP/SIMD access traps -- these report extra information + * when taking an exception to AArch32. For those we include the extra coproc + * and TA fields, and mask them out when taking the exception to AArch64. */ static inline uint32_t syn_uncategorized(void) { @@ -377,9 +390,18 @@ static inline uint32_t syn_cp15_rrt_trap(int cv, int cond, int opc1, int crm, static inline uint32_t syn_fp_access_trap(int cv, int cond, bool is_16bit) { + /* AArch32 FP trap or any AArch64 FP/SIMD trap: TA == 0 coproc == 0xa */ return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT) | (is_16bit ? 0 : ARM_EL_IL) - | (cv << 24) | (cond << 20); + | (cv << 24) | (cond << 20) | 0xa; +} + +static inline uint32_t syn_simd_access_trap(int cv, int cond, bool is_16bit) +{ + /* AArch32 SIMD trap: TA == 1 coproc == 0 */ + return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT) + | (is_16bit ? 0 : ARM_EL_IL) + | (cv << 24) | (cond << 20) | (1 << 5); } static inline uint32_t syn_sve_access_trap(void) @@ -796,4 +818,57 @@ static inline uint32_t arm_debug_exception_fsr(CPUARMState *env) } } +/* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3. + * Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits. + */ +#define MEMOPIDX_SHIFT 8 + +/** + * v7m_using_psp: Return true if using process stack pointer + * Return true if the CPU is currently using the process stack + * pointer, or false if it is using the main stack pointer. + */ +static inline bool v7m_using_psp(CPUARMState *env) +{ + /* Handler mode always uses the main stack; for thread mode + * the CONTROL.SPSEL bit determines the answer. + * Note that in v7M it is not possible to be in Handler mode with + * CONTROL.SPSEL non-zero, but in v8M it is, so we must check both. + */ + return !arm_v7m_is_handler_mode(env) && + env->v7m.control[env->v7m.secure] & R_V7M_CONTROL_SPSEL_MASK; +} + +/** + * v7m_sp_limit: Return SP limit for current CPU state + * Return the SP limit value for the current CPU security state + * and stack pointer. + */ +static inline uint32_t v7m_sp_limit(CPUARMState *env) +{ + if (v7m_using_psp(env)) { + return env->v7m.psplim[env->v7m.secure]; + } else { + return env->v7m.msplim[env->v7m.secure]; + } +} + +/** + * aarch32_mode_name(): Return name of the AArch32 CPU mode + * @psr: Program Status Register indicating CPU mode + * + * Returns, for debug logging purposes, a printable representation + * of the AArch32 CPU mode ("svc", "usr", etc) as indicated by + * the low bits of the specified PSR. + */ +static inline const char *aarch32_mode_name(uint32_t psr) +{ + static const char cpu_mode_names[16][4] = { + "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt", + "???", "???", "hyp", "und", "???", "???", "???", "sys" + }; + + return cpu_mode_names[psr & 0xf]; +} + #endif diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 65f867d569..09a86e2820 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -34,6 +34,7 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { }; static bool cap_has_mp_state; +static bool cap_has_inject_serror_esr; static ARMHostCPUFeatures arm_host_cpu_features; @@ -48,6 +49,12 @@ int kvm_arm_vcpu_init(CPUState *cs) return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init); } +void kvm_arm_init_serror_injection(CPUState *cs) +{ + cap_has_inject_serror_esr = kvm_check_extension(cs->kvm_state, + KVM_CAP_ARM_INJECT_SERROR_ESR); +} + bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, int *fdarray, struct kvm_vcpu_init *init) @@ -310,7 +317,7 @@ static int compare_u64(const void *a, const void *b) return 0; } -/* Initialize the CPUState's cpreg list according to the kernel's +/* Initialize the ARMCPU cpreg list according to the kernel's * definition of what CPU registers it knows about (and throw away * the previous TCG-created cpreg list). */ @@ -522,6 +529,59 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) return 0; } +int kvm_put_vcpu_events(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + struct kvm_vcpu_events events; + int ret; + + if (!kvm_has_vcpu_events()) { + return 0; + } + + memset(&events, 0, sizeof(events)); + events.exception.serror_pending = env->serror.pending; + + /* Inject SError to guest with specified syndrome if host kernel + * supports it, otherwise inject SError without syndrome. + */ + if (cap_has_inject_serror_esr) { + events.exception.serror_has_esr = env->serror.has_esr; + events.exception.serror_esr = env->serror.esr; + } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); + if (ret) { + error_report("failed to put vcpu events"); + } + + return ret; +} + +int kvm_get_vcpu_events(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + struct kvm_vcpu_events events; + int ret; + + if (!kvm_has_vcpu_events()) { + return 0; + } + + memset(&events, 0, sizeof(events)); + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events); + if (ret) { + error_report("failed to get vcpu events"); + return ret; + } + + env->serror.pending = events.exception.serror_pending; + env->serror.has_esr = events.exception.serror_has_esr; + env->serror.esr = events.exception.serror_esr; + + return 0; +} + void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) { } diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c index 4e91c11796..0f1e94c7b5 100644 --- a/target/arm/kvm32.c +++ b/target/arm/kvm32.c @@ -217,6 +217,9 @@ int kvm_arch_init_vcpu(CPUState *cs) } cpu->mp_affinity = mpidr & ARM32_AFFINITY_MASK; + /* Check whether userspace can specify guest syndrome value */ + kvm_arm_init_serror_injection(cs); + return kvm_arm_init_cpreg_list(cpu); } @@ -358,6 +361,11 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } + ret = kvm_put_vcpu_events(cpu); + if (ret) { + return ret; + } + /* Note that we do not call write_cpustate_to_list() * here, so we are only writing the tuple list back to * KVM. This is safe because nothing can change the @@ -445,6 +453,11 @@ int kvm_arch_get_registers(CPUState *cs) } vfp_set_fpscr(env, fpscr); + ret = kvm_get_vcpu_events(cpu); + if (ret) { + return ret; + } + if (!write_kvmstate_to_list(cpu)) { return EINVAL; } diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index e0b8246283..5de8ff0ac5 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -546,6 +546,9 @@ int kvm_arch_init_vcpu(CPUState *cs) kvm_arm_init_debug(cs); + /* Check whether user space can specify guest syndrome value */ + kvm_arm_init_serror_injection(cs); + return kvm_arm_init_cpreg_list(cpu); } @@ -727,6 +730,11 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } + ret = kvm_put_vcpu_events(cpu); + if (ret) { + return ret; + } + if (!write_list_to_kvmstate(cpu, level)) { return EINVAL; } @@ -863,6 +871,11 @@ int kvm_arch_get_registers(CPUState *cs) } vfp_set_fpcr(env, fpr); + ret = kvm_get_vcpu_events(cpu); + if (ret) { + return ret; + } + if (!write_kvmstate_to_list(cpu)) { return EINVAL; } @@ -920,7 +933,7 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) { - int hsr_ec = debug_exit->hsr >> ARM_EL_EC_SHIFT; + int hsr_ec = syn_get_ec(debug_exit->hsr); ARMCPU *cpu = ARM_CPU(cs); CPUClass *cc = CPU_GET_CLASS(cs); CPUARMState *env = &cpu->env; diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index 863f205822..21c0129da2 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -50,9 +50,9 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, /** * kvm_arm_init_cpreg_list: - * @cs: CPUState + * @cpu: ARMCPU * - * Initialize the CPUState's cpreg list according to the kernel's + * Initialize the ARMCPU cpreg list according to the kernel's * definition of what CPU registers it knows about (and throw away * the previous TCG-created cpreg list). * @@ -121,6 +121,30 @@ bool write_kvmstate_to_list(ARMCPU *cpu); */ void kvm_arm_reset_vcpu(ARMCPU *cpu); +/** + * kvm_arm_init_serror_injection: + * @cs: CPUState + * + * Check whether KVM can set guest SError syndrome. + */ +void kvm_arm_init_serror_injection(CPUState *cs); + +/** + * kvm_get_vcpu_events: + * @cpu: ARMCPU + * + * Get VCPU related state from kvm. + */ +int kvm_get_vcpu_events(ARMCPU *cpu); + +/** + * kvm_put_vcpu_events: + * @cpu: ARMCPU + * + * Put VCPU related state to kvm. + */ +int kvm_put_vcpu_events(ARMCPU *cpu); + #ifdef CONFIG_KVM /** * kvm_arm_create_scratch_host_vcpu: diff --git a/target/arm/machine.c b/target/arm/machine.c index ff4ec22bf7..239fe4e84d 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -131,9 +131,8 @@ static const VMStateDescription vmstate_iwmmxt = { static bool sve_needed(void *opaque) { ARMCPU *cpu = opaque; - CPUARMState *env = &cpu->env; - return arm_feature(env, ARM_FEATURE_SVE); + return cpu_isar_feature(aa64_sve, cpu); } /* The first two words of each Zreg is stored in VFP state. */ @@ -172,6 +171,27 @@ static const VMStateDescription vmstate_sve = { }; #endif /* AARCH64 */ +static bool serror_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + return env->serror.pending != 0; +} + +static const VMStateDescription vmstate_serror = { + .name = "cpu/serror", + .version_id = 1, + .minimum_version_id = 1, + .needed = serror_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(env.serror.pending, ARMCPU), + VMSTATE_UINT8(env.serror.has_esr, ARMCPU), + VMSTATE_UINT64(env.serror.esr, ARMCPU), + VMSTATE_END_OF_LIST() + } +}; + static bool m_needed(void *opaque) { ARMCPU *cpu = opaque; @@ -726,6 +746,7 @@ const VMStateDescription vmstate_arm_cpu = { #ifdef TARGET_AARCH64 &vmstate_sve, #endif + &vmstate_serror, NULL } }; diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index 952b8d122b..90741f6331 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -28,8 +28,8 @@ #define SIGNBIT (uint32_t)0x80000000 #define SIGNBIT64 ((uint64_t)1 << 63) -static void raise_exception(CPUARMState *env, uint32_t excp, - uint32_t syndrome, uint32_t target_el) +void raise_exception(CPUARMState *env, uint32_t excp, + uint32_t syndrome, uint32_t target_el) { CPUState *cs = CPU(arm_env_get_cpu(env)); @@ -42,7 +42,7 @@ static void raise_exception(CPUARMState *env, uint32_t excp, * (see DDI0478C.a D1.10.4) */ target_el = 2; - if (syndrome >> ARM_EL_EC_SHIFT == EC_ADVSIMDFPACCESSTRAP) { + if (syn_get_ec(syndrome) == EC_ADVSIMDFPACCESSTRAP) { syndrome = syn_uncategorized(); } } @@ -238,6 +238,25 @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, #endif /* !defined(CONFIG_USER_ONLY) */ +void HELPER(v8m_stackcheck)(CPUARMState *env, uint32_t newvalue) +{ + /* + * Perform the v8M stack limit check for SP updates from translated code, + * raising an exception if the limit is breached. + */ + if (newvalue < v7m_sp_limit(env)) { + CPUState *cs = CPU(arm_env_get_cpu(env)); + + /* + * Stack limit exceptions are a rare case, so rather than syncing + * PC/condbits before the call, we use cpu_restore_state() to + * get them right before raising the exception. + */ + cpu_restore_state(cs, GETPC(), true); + raise_exception(env, EXCP_STKOF, 0, 1); + } +} + uint32_t HELPER(add_setq)(CPUARMState *env, uint32_t a, uint32_t b) { uint32_t res = a + b; @@ -1082,6 +1101,11 @@ void HELPER(exception_return)(CPUARMState *env) "AArch64 EL%d PC 0x%" PRIx64 "\n", cur_el, new_el, env->pc); } + /* + * Note that cur_el can never be 0. If new_el is 0, then + * el0_a64 is return_to_aa64, else el0_a64 is ignored. + */ + aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); qemu_mutex_lock_iothread(); arm_call_el_change_hook(arm_env_get_cpu(env)); diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 0f98097253..8cbc6516ab 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -19,6 +19,7 @@ #include "qemu/osdep.h" #include "cpu.h" +#include "internals.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/helper-proto.h" @@ -1688,6 +1689,47 @@ static void swap_memmove(void *vd, void *vs, size_t n) } } +/* Similarly for memset of 0. */ +static void swap_memzero(void *vd, size_t n) +{ + uintptr_t d = (uintptr_t)vd; + uintptr_t o = (d | n) & 7; + size_t i; + + /* Usually, the first bit of a predicate is set, so N is 0. */ + if (likely(n == 0)) { + return; + } + +#ifndef HOST_WORDS_BIGENDIAN + o = 0; +#endif + switch (o) { + case 0: + memset(vd, 0, n); + break; + + case 4: + for (i = 0; i < n; i += 4) { + *(uint32_t *)H1_4(d + i) = 0; + } + break; + + case 2: + case 6: + for (i = 0; i < n; i += 2) { + *(uint16_t *)H1_2(d + i) = 0; + } + break; + + default: + for (i = 0; i < n; i++) { + *(uint8_t *)H1(d + i) = 0; + } + break; + } +} + void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc) { intptr_t opr_sz = simd_oprsz(desc); @@ -3927,161 +3969,471 @@ void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) /* * Load contiguous data, protected by a governing predicate. */ -#define DO_LD1(NAME, FN, TYPEE, TYPEM, H) \ -static void do_##NAME(CPUARMState *env, void *vd, void *vg, \ - target_ulong addr, intptr_t oprsz, \ - uintptr_t ra) \ -{ \ - intptr_t i = 0; \ - do { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m = 0; \ - if (pg & 1) { \ - m = FN(env, addr, ra); \ - } \ - *(TYPEE *)(vd + H(i)) = m; \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += sizeof(TYPEM); \ - } while (i & 15); \ - } while (i < oprsz); \ -} \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - do_##NAME(env, &env->vfp.zregs[simd_data(desc)], vg, \ - addr, simd_oprsz(desc), GETPC()); \ + +/* + * Load elements into @vd, controlled by @vg, from @host + @mem_ofs. + * Memory is valid through @host + @mem_max. The register element + * indicies are inferred from @mem_ofs, as modified by the types for + * which the helper is built. Return the @mem_ofs of the first element + * not loaded (which is @mem_max if they are all loaded). + * + * For softmmu, we have fully validated the guest page. For user-only, + * we cannot fully validate without taking the mmap lock, but since we + * know the access is within one host page, if any access is valid they + * all must be valid. However, when @vg is all false, it may be that + * no access is valid. + */ +typedef intptr_t sve_ld1_host_fn(void *vd, void *vg, void *host, + intptr_t mem_ofs, intptr_t mem_max); + +/* + * Load one element into @vd + @reg_off from (@env, @vaddr, @ra). + * The controlling predicate is known to be true. + */ +typedef void sve_ld1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong vaddr, TCGMemOpIdx oi, uintptr_t ra); +typedef sve_ld1_tlb_fn sve_st1_tlb_fn; + +/* + * Generate the above primitives. + */ + +#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \ +static intptr_t sve_##NAME##_host(void *vd, void *vg, void *host, \ + intptr_t mem_off, const intptr_t mem_max) \ +{ \ + intptr_t reg_off = mem_off * (sizeof(TYPEE) / sizeof(TYPEM)); \ + uint64_t *pg = vg; \ + while (mem_off + sizeof(TYPEM) <= mem_max) { \ + TYPEM val = 0; \ + if (likely((pg[reg_off >> 6] >> (reg_off & 63)) & 1)) { \ + val = HOST(host + mem_off); \ + } \ + *(TYPEE *)(vd + H(reg_off)) = val; \ + mem_off += sizeof(TYPEM), reg_off += sizeof(TYPEE); \ + } \ + return mem_off; \ +} + +#ifdef CONFIG_SOFTMMU +#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \ +static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ +{ \ + TYPEM val = TLB(env, addr, oi, ra); \ + *(TYPEE *)(vd + H(reg_off)) = val; \ } +#else +#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \ +static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ +{ \ + TYPEM val = HOST(g2h(addr)); \ + *(TYPEE *)(vd + H(reg_off)) = val; \ +} +#endif -#define DO_LD2(NAME, FN, TYPEE, TYPEM, H) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - void *d1 = &env->vfp.zregs[rd]; \ - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m1 = 0, m2 = 0; \ - if (pg & 1) { \ - m1 = FN(env, addr, ra); \ - m2 = FN(env, addr + sizeof(TYPEM), ra); \ - } \ - *(TYPEE *)(d1 + H(i)) = m1; \ - *(TYPEE *)(d2 + H(i)) = m2; \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += 2 * sizeof(TYPEM); \ - } while (i & 15); \ - } \ +#define DO_LD_PRIM_1(NAME, H, TE, TM) \ + DO_LD_HOST(NAME, H, TE, TM, ldub_p) \ + DO_LD_TLB(NAME, H, TE, TM, ldub_p, 0, helper_ret_ldub_mmu) + +DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t) +DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t) +DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t) +DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t) +DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t) +DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t) +DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t) + +#define DO_LD_PRIM_2(NAME, end, MOEND, H, TE, TM, PH, PT) \ + DO_LD_HOST(NAME##_##end, H, TE, TM, PH##_##end##_p) \ + DO_LD_TLB(NAME##_##end, H, TE, TM, PH##_##end##_p, \ + MOEND, helper_##end##_##PT##_mmu) + +DO_LD_PRIM_2(ld1hh, le, MO_LE, H1_2, uint16_t, uint16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hsu, le, MO_LE, H1_4, uint32_t, uint16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hss, le, MO_LE, H1_4, uint32_t, int16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hdu, le, MO_LE, , uint64_t, uint16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hds, le, MO_LE, , uint64_t, int16_t, lduw, lduw) + +DO_LD_PRIM_2(ld1ss, le, MO_LE, H1_4, uint32_t, uint32_t, ldl, ldul) +DO_LD_PRIM_2(ld1sdu, le, MO_LE, , uint64_t, uint32_t, ldl, ldul) +DO_LD_PRIM_2(ld1sds, le, MO_LE, , uint64_t, int32_t, ldl, ldul) + +DO_LD_PRIM_2(ld1dd, le, MO_LE, , uint64_t, uint64_t, ldq, ldq) + +DO_LD_PRIM_2(ld1hh, be, MO_BE, H1_2, uint16_t, uint16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hsu, be, MO_BE, H1_4, uint32_t, uint16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hss, be, MO_BE, H1_4, uint32_t, int16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hdu, be, MO_BE, , uint64_t, uint16_t, lduw, lduw) +DO_LD_PRIM_2(ld1hds, be, MO_BE, , uint64_t, int16_t, lduw, lduw) + +DO_LD_PRIM_2(ld1ss, be, MO_BE, H1_4, uint32_t, uint32_t, ldl, ldul) +DO_LD_PRIM_2(ld1sdu, be, MO_BE, , uint64_t, uint32_t, ldl, ldul) +DO_LD_PRIM_2(ld1sds, be, MO_BE, , uint64_t, int32_t, ldl, ldul) + +DO_LD_PRIM_2(ld1dd, be, MO_BE, , uint64_t, uint64_t, ldq, ldq) + +#undef DO_LD_TLB +#undef DO_LD_HOST +#undef DO_LD_PRIM_1 +#undef DO_LD_PRIM_2 + +/* + * Skip through a sequence of inactive elements in the guarding predicate @vg, + * beginning at @reg_off bounded by @reg_max. Return the offset of the active + * element >= @reg_off, or @reg_max if there were no active elements at all. + */ +static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off, + intptr_t reg_max, int esz) +{ + uint64_t pg_mask = pred_esz_masks[esz]; + uint64_t pg = (vg[reg_off >> 6] & pg_mask) >> (reg_off & 63); + + /* In normal usage, the first element is active. */ + if (likely(pg & 1)) { + return reg_off; + } + + if (pg == 0) { + reg_off &= -64; + do { + reg_off += 64; + if (unlikely(reg_off >= reg_max)) { + /* The entire predicate was false. */ + return reg_max; + } + pg = vg[reg_off >> 6] & pg_mask; + } while (pg == 0); + } + reg_off += ctz64(pg); + + /* We should never see an out of range predicate bit set. */ + tcg_debug_assert(reg_off < reg_max); + return reg_off; } -#define DO_LD3(NAME, FN, TYPEE, TYPEM, H) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - void *d1 = &env->vfp.zregs[rd]; \ - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \ - void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m1 = 0, m2 = 0, m3 = 0; \ - if (pg & 1) { \ - m1 = FN(env, addr, ra); \ - m2 = FN(env, addr + sizeof(TYPEM), ra); \ - m3 = FN(env, addr + 2 * sizeof(TYPEM), ra); \ - } \ - *(TYPEE *)(d1 + H(i)) = m1; \ - *(TYPEE *)(d2 + H(i)) = m2; \ - *(TYPEE *)(d3 + H(i)) = m3; \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += 3 * sizeof(TYPEM); \ - } while (i & 15); \ - } \ +/* + * Return the maximum offset <= @mem_max which is still within the page + * referenced by @base + @mem_off. + */ +static intptr_t max_for_page(target_ulong base, intptr_t mem_off, + intptr_t mem_max) +{ + target_ulong addr = base + mem_off; + intptr_t split = -(intptr_t)(addr | TARGET_PAGE_MASK); + return MIN(split, mem_max - mem_off) + mem_off; } -#define DO_LD4(NAME, FN, TYPEE, TYPEM, H) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - void *d1 = &env->vfp.zregs[rd]; \ - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \ - void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \ - void *d4 = &env->vfp.zregs[(rd + 3) & 31]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m1 = 0, m2 = 0, m3 = 0, m4 = 0; \ - if (pg & 1) { \ - m1 = FN(env, addr, ra); \ - m2 = FN(env, addr + sizeof(TYPEM), ra); \ - m3 = FN(env, addr + 2 * sizeof(TYPEM), ra); \ - m4 = FN(env, addr + 3 * sizeof(TYPEM), ra); \ - } \ - *(TYPEE *)(d1 + H(i)) = m1; \ - *(TYPEE *)(d2 + H(i)) = m2; \ - *(TYPEE *)(d3 + H(i)) = m3; \ - *(TYPEE *)(d4 + H(i)) = m4; \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += 4 * sizeof(TYPEM); \ - } while (i & 15); \ - } \ +static inline void set_helper_retaddr(uintptr_t ra) +{ +#ifdef CONFIG_USER_ONLY + helper_retaddr = ra; +#endif } -DO_LD1(sve_ld1bhu_r, cpu_ldub_data_ra, uint16_t, uint8_t, H1_2) -DO_LD1(sve_ld1bhs_r, cpu_ldsb_data_ra, uint16_t, int8_t, H1_2) -DO_LD1(sve_ld1bsu_r, cpu_ldub_data_ra, uint32_t, uint8_t, H1_4) -DO_LD1(sve_ld1bss_r, cpu_ldsb_data_ra, uint32_t, int8_t, H1_4) -DO_LD1(sve_ld1bdu_r, cpu_ldub_data_ra, uint64_t, uint8_t, ) -DO_LD1(sve_ld1bds_r, cpu_ldsb_data_ra, uint64_t, int8_t, ) +/* + * The result of tlb_vaddr_to_host for user-only is just g2h(x), + * which is always non-null. Elide the useless test. + */ +static inline bool test_host_page(void *host) +{ +#ifdef CONFIG_USER_ONLY + return true; +#else + return likely(host != NULL); +#endif +} -DO_LD1(sve_ld1hsu_r, cpu_lduw_data_ra, uint32_t, uint16_t, H1_4) -DO_LD1(sve_ld1hss_r, cpu_ldsw_data_ra, uint32_t, int16_t, H1_4) -DO_LD1(sve_ld1hdu_r, cpu_lduw_data_ra, uint64_t, uint16_t, ) -DO_LD1(sve_ld1hds_r, cpu_ldsw_data_ra, uint64_t, int16_t, ) +/* + * Common helper for all contiguous one-register predicated loads. + */ +static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const uintptr_t retaddr, + const int esz, const int msz, + sve_ld1_host_fn *host_fn, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + void *vd = &env->vfp.zregs[rd]; + const int diffsz = esz - msz; + const intptr_t reg_max = simd_oprsz(desc); + const intptr_t mem_max = reg_max >> diffsz; + ARMVectorReg scratch; + void *host; + intptr_t split, reg_off, mem_off; + + /* Find the first active element. */ + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off == reg_max)) { + /* The entire predicate was false; no load occurs. */ + memset(vd, 0, reg_max); + return; + } + mem_off = reg_off >> diffsz; + set_helper_retaddr(retaddr); + + /* + * If the (remaining) load is entirely within a single page, then: + * For softmmu, and the tlb hits, then no faults will occur; + * For user-only, either the first load will fault or none will. + * We can thus perform the load directly to the destination and + * Vd will be unmodified on any exception path. + */ + split = max_for_page(addr, mem_off, mem_max); + if (likely(split == mem_max)) { + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + if (test_host_page(host)) { + mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max); + tcg_debug_assert(mem_off == mem_max); + set_helper_retaddr(0); + /* After having taken any fault, zero leading inactive elements. */ + swap_memzero(vd, reg_off); + return; + } + } -DO_LD1(sve_ld1sdu_r, cpu_ldl_data_ra, uint64_t, uint32_t, ) -DO_LD1(sve_ld1sds_r, cpu_ldl_data_ra, uint64_t, int32_t, ) + /* + * Perform the predicated read into a temporary, thus ensuring + * if the load of the last element faults, Vd is not modified. + */ +#ifdef CONFIG_USER_ONLY + swap_memzero(&scratch, reg_off); + host_fn(&scratch, vg, g2h(addr), mem_off, mem_max); +#else + memset(&scratch, 0, reg_max); + goto start; + while (1) { + reg_off = find_next_active(vg, reg_off, reg_max, esz); + if (reg_off >= reg_max) { + break; + } + mem_off = reg_off >> diffsz; + split = max_for_page(addr, mem_off, mem_max); + + start: + if (split - mem_off >= (1 << msz)) { + /* At least one whole element on this page. */ + host = tlb_vaddr_to_host(env, addr + mem_off, + MMU_DATA_LOAD, mmu_idx); + if (host) { + mem_off = host_fn(&scratch, vg, host - mem_off, + mem_off, split); + reg_off = mem_off << diffsz; + continue; + } + } -DO_LD1(sve_ld1bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1) -DO_LD2(sve_ld2bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1) -DO_LD3(sve_ld3bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1) -DO_LD4(sve_ld4bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1) + /* + * Perform one normal read. This may fault, longjmping out to the + * main loop in order to raise an exception. It may succeed, and + * as a side-effect load the TLB entry for the next round. Finally, + * in the extremely unlikely case we're performing this operation + * on I/O memory, it may succeed but not bring in the TLB entry. + * But even then we have still made forward progress. + */ + tlb_fn(env, &scratch, reg_off, addr + mem_off, oi, retaddr); + reg_off += 1 << esz; + } +#endif -DO_LD1(sve_ld1hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2) -DO_LD2(sve_ld2hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2) -DO_LD3(sve_ld3hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2) -DO_LD4(sve_ld4hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2) + set_helper_retaddr(0); + memcpy(vd, &scratch, reg_max); +} -DO_LD1(sve_ld1ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4) -DO_LD2(sve_ld2ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4) -DO_LD3(sve_ld3ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4) -DO_LD4(sve_ld4ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4) +#define DO_LD1_1(NAME, ESZ) \ +void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ + sve_##NAME##_host, sve_##NAME##_tlb); \ +} + +#define DO_LD1_2(NAME, ESZ, MSZ) \ +void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ +} \ +void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ +} + +DO_LD1_1(ld1bb, 0) +DO_LD1_1(ld1bhu, 1) +DO_LD1_1(ld1bhs, 1) +DO_LD1_1(ld1bsu, 2) +DO_LD1_1(ld1bss, 2) +DO_LD1_1(ld1bdu, 3) +DO_LD1_1(ld1bds, 3) -DO_LD1(sve_ld1dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, ) -DO_LD2(sve_ld2dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, ) -DO_LD3(sve_ld3dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, ) -DO_LD4(sve_ld4dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, ) +DO_LD1_2(ld1hh, 1, 1) +DO_LD1_2(ld1hsu, 2, 1) +DO_LD1_2(ld1hss, 2, 1) +DO_LD1_2(ld1hdu, 3, 1) +DO_LD1_2(ld1hds, 3, 1) -#undef DO_LD1 -#undef DO_LD2 -#undef DO_LD3 -#undef DO_LD4 +DO_LD1_2(ld1ss, 2, 2) +DO_LD1_2(ld1sdu, 3, 2) +DO_LD1_2(ld1sds, 3, 2) + +DO_LD1_2(ld1dd, 3, 3) + +#undef DO_LD1_1 +#undef DO_LD1_2 /* - * Load contiguous data, first-fault and no-fault. + * Common helpers for all contiguous 2,3,4-register predicated loads. */ +static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, int size, uintptr_t ra, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch[2] = { }; -#ifdef CONFIG_USER_ONLY + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, &scratch[0], i, addr, oi, ra); + tlb_fn(env, &scratch[1], i, addr + size, oi, ra); + } + i += size, pg >>= size; + addr += 2 * size; + } while (i & 15); + } + set_helper_retaddr(0); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); + memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); +} + +static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, int size, uintptr_t ra, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch[3] = { }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, &scratch[0], i, addr, oi, ra); + tlb_fn(env, &scratch[1], i, addr + size, oi, ra); + tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra); + } + i += size, pg >>= size; + addr += 3 * size; + } while (i & 15); + } + set_helper_retaddr(0); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); + memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); + memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz); +} + +static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, int size, uintptr_t ra, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch[4] = { }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, &scratch[0], i, addr, oi, ra); + tlb_fn(env, &scratch[1], i, addr + size, oi, ra); + tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra); + tlb_fn(env, &scratch[3], i, addr + 3 * size, oi, ra); + } + i += size, pg >>= size; + addr += 4 * size; + } while (i & 15); + } + set_helper_retaddr(0); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); + memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); + memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz); + memcpy(&env->vfp.zregs[(rd + 3) & 31], &scratch[3], oprsz); +} + +#define DO_LDN_1(N) \ +void __attribute__((flatten)) HELPER(sve_ld##N##bb_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_ld##N##_r(env, vg, addr, desc, 1, GETPC(), sve_ld1bb_tlb); \ +} + +#define DO_LDN_2(N, SUFF, SIZE) \ +void __attribute__((flatten)) HELPER(sve_ld##N##SUFF##_le_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \ + sve_ld1##SUFF##_le_tlb); \ +} \ +void __attribute__((flatten)) HELPER(sve_ld##N##SUFF##_be_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \ + sve_ld1##SUFF##_be_tlb); \ +} + +DO_LDN_1(2) +DO_LDN_1(3) +DO_LDN_1(4) + +DO_LDN_2(2, hh, 2) +DO_LDN_2(3, hh, 2) +DO_LDN_2(4, hh, 2) + +DO_LDN_2(2, ss, 4) +DO_LDN_2(3, ss, 4) +DO_LDN_2(4, ss, 4) + +DO_LDN_2(2, dd, 8) +DO_LDN_2(3, dd, 8) +DO_LDN_2(4, dd, 8) + +#undef DO_LDN_1 +#undef DO_LDN_2 + +/* + * Load contiguous data, first-fault and no-fault. + * + * For user-only, one could argue that we should hold the mmap_lock during + * the operation so that there is no race between page_check_range and the + * load operation. However, unmapping pages out from under a running thread + * is extraordinarily unlikely. This theoretical race condition also affects + * linux-user/ in its get_user/put_user macros. + * + * TODO: Construct some helpers, written in assembly, that interact with + * handle_cpu_signal to produce memory ops which can properly report errors + * without racing. + */ /* Fault on byte I. All bits in FFR from I are cleared. The vector * result from I is CONSTRAINED UNPREDICTABLE; we choose the MERGE @@ -4100,573 +4452,932 @@ static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz) } } -/* Hold the mmap lock during the operation so that there is no race - * between page_check_range and the load operation. We expect the - * usual case to have no faults at all, so we check the whole range - * first and if successful defer to the normal load operation. - * - * TODO: Change mmap_lock to a rwlock so that multiple readers - * can run simultaneously. This will probably help other uses - * within QEMU as well. +/* + * Common helper for all contiguous first-fault loads. */ -#define DO_LDFF1(PART, FN, TYPEE, TYPEM, H) \ -static void do_sve_ldff1##PART(CPUARMState *env, void *vd, void *vg, \ - target_ulong addr, intptr_t oprsz, \ - bool first, uintptr_t ra) \ -{ \ - intptr_t i = 0; \ - do { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m = 0; \ - if (pg & 1) { \ - if (!first && \ - unlikely(page_check_range(addr, sizeof(TYPEM), \ - PAGE_READ))) { \ - record_fault(env, i, oprsz); \ - return; \ - } \ - m = FN(env, addr, ra); \ - first = false; \ - } \ - *(TYPEE *)(vd + H(i)) = m; \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += sizeof(TYPEM); \ - } while (i & 15); \ - } while (i < oprsz); \ -} \ -void HELPER(sve_ldff1##PART)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t oprsz = simd_oprsz(desc); \ - unsigned rd = simd_data(desc); \ - void *vd = &env->vfp.zregs[rd]; \ - mmap_lock(); \ - if (likely(page_check_range(addr, oprsz, PAGE_READ) == 0)) { \ - do_sve_ld1##PART(env, vd, vg, addr, oprsz, GETPC()); \ - } else { \ - do_sve_ldff1##PART(env, vd, vg, addr, oprsz, true, GETPC()); \ - } \ - mmap_unlock(); \ -} +static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const uintptr_t retaddr, + const int esz, const int msz, + sve_ld1_host_fn *host_fn, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + void *vd = &env->vfp.zregs[rd]; + const int diffsz = esz - msz; + const intptr_t reg_max = simd_oprsz(desc); + const intptr_t mem_max = reg_max >> diffsz; + intptr_t split, reg_off, mem_off; + void *host; + + /* Skip to the first active element. */ + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off == reg_max)) { + /* The entire predicate was false; no load occurs. */ + memset(vd, 0, reg_max); + return; + } + mem_off = reg_off >> diffsz; + set_helper_retaddr(retaddr); + + /* + * If the (remaining) load is entirely within a single page, then: + * For softmmu, and the tlb hits, then no faults will occur; + * For user-only, either the first load will fault or none will. + * We can thus perform the load directly to the destination and + * Vd will be unmodified on any exception path. + */ + split = max_for_page(addr, mem_off, mem_max); + if (likely(split == mem_max)) { + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + if (test_host_page(host)) { + mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max); + tcg_debug_assert(mem_off == mem_max); + set_helper_retaddr(0); + /* After any fault, zero any leading inactive elements. */ + swap_memzero(vd, reg_off); + return; + } + } -/* No-fault loads are like first-fault loads without the - * first faulting special case. - */ -#define DO_LDNF1(PART) \ -void HELPER(sve_ldnf1##PART)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t oprsz = simd_oprsz(desc); \ - unsigned rd = simd_data(desc); \ - void *vd = &env->vfp.zregs[rd]; \ - mmap_lock(); \ - if (likely(page_check_range(addr, oprsz, PAGE_READ) == 0)) { \ - do_sve_ld1##PART(env, vd, vg, addr, oprsz, GETPC()); \ - } else { \ - do_sve_ldff1##PART(env, vd, vg, addr, oprsz, false, GETPC()); \ - } \ - mmap_unlock(); \ -} +#ifdef CONFIG_USER_ONLY + /* + * The page(s) containing this first element at ADDR+MEM_OFF must + * be valid. Considering that this first element may be misaligned + * and cross a page boundary itself, take the rest of the page from + * the last byte of the element. + */ + split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max); + mem_off = host_fn(vd, vg, g2h(addr), mem_off, split); + /* After any fault, zero any leading inactive elements. */ + swap_memzero(vd, reg_off); + reg_off = mem_off << diffsz; #else + /* + * Perform one normal read, which will fault or not. + * But it is likely to bring the page into the tlb. + */ + tlb_fn(env, vd, reg_off, addr + mem_off, oi, retaddr); + + /* After any fault, zero any leading predicated false elts. */ + swap_memzero(vd, reg_off); + mem_off += 1 << msz; + reg_off += 1 << esz; + + /* Try again to read the balance of the page. */ + split = max_for_page(addr, mem_off - 1, mem_max); + if (split >= (1 << msz)) { + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + if (host) { + mem_off = host_fn(vd, vg, host - mem_off, mem_off, split); + reg_off = mem_off << diffsz; + } + } +#endif -/* TODO: System mode is not yet supported. - * This would probably use tlb_vaddr_to_host. - */ -#define DO_LDFF1(PART, FN, TYPEE, TYPEM, H) \ -void HELPER(sve_ldff1##PART)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - g_assert_not_reached(); \ + set_helper_retaddr(0); + record_fault(env, reg_off, reg_max); } -#define DO_LDNF1(PART) \ -void HELPER(sve_ldnf1##PART)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - g_assert_not_reached(); \ -} +/* + * Common helper for all contiguous no-fault loads. + */ +static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const int esz, const int msz, + sve_ld1_host_fn *host_fn) +{ + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + void *vd = &env->vfp.zregs[rd]; + const int diffsz = esz - msz; + const intptr_t reg_max = simd_oprsz(desc); + const intptr_t mem_max = reg_max >> diffsz; + const int mmu_idx = cpu_mmu_index(env, false); + intptr_t split, reg_off, mem_off; + void *host; +#ifdef CONFIG_USER_ONLY + host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); + if (likely(page_check_range(addr, mem_max, PAGE_READ) == 0)) { + /* The entire operation is valid and will not fault. */ + host_fn(vd, vg, host, 0, mem_max); + return; + } #endif -DO_LDFF1(bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1) -DO_LDFF1(bhu_r, cpu_ldub_data_ra, uint16_t, uint8_t, H1_2) -DO_LDFF1(bhs_r, cpu_ldsb_data_ra, uint16_t, int8_t, H1_2) -DO_LDFF1(bsu_r, cpu_ldub_data_ra, uint32_t, uint8_t, H1_4) -DO_LDFF1(bss_r, cpu_ldsb_data_ra, uint32_t, int8_t, H1_4) -DO_LDFF1(bdu_r, cpu_ldub_data_ra, uint64_t, uint8_t, ) -DO_LDFF1(bds_r, cpu_ldsb_data_ra, uint64_t, int8_t, ) + /* There will be no fault, so we may modify in advance. */ + memset(vd, 0, reg_max); -DO_LDFF1(hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2) -DO_LDFF1(hsu_r, cpu_lduw_data_ra, uint32_t, uint16_t, H1_4) -DO_LDFF1(hss_r, cpu_ldsw_data_ra, uint32_t, int8_t, H1_4) -DO_LDFF1(hdu_r, cpu_lduw_data_ra, uint64_t, uint16_t, ) -DO_LDFF1(hds_r, cpu_ldsw_data_ra, uint64_t, int16_t, ) + /* Skip to the first active element. */ + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off == reg_max)) { + /* The entire predicate was false; no load occurs. */ + return; + } + mem_off = reg_off >> diffsz; -DO_LDFF1(ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4) -DO_LDFF1(sdu_r, cpu_ldl_data_ra, uint64_t, uint32_t, ) -DO_LDFF1(sds_r, cpu_ldl_data_ra, uint64_t, int32_t, ) +#ifdef CONFIG_USER_ONLY + if (page_check_range(addr + mem_off, 1 << msz, PAGE_READ) == 0) { + /* At least one load is valid; take the rest of the page. */ + split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max); + mem_off = host_fn(vd, vg, host, mem_off, split); + reg_off = mem_off << diffsz; + } +#else + /* + * If the address is not in the TLB, we have no way to bring the + * entry into the TLB without also risking a fault. Note that + * the corollary is that we never load from an address not in RAM. + * + * This last is out of spec, in a weird corner case. + * Per the MemNF/MemSingleNF pseudocode, a NF load from Device memory + * must not actually hit the bus -- it returns UNKNOWN data instead. + * But if you map non-RAM with Normal memory attributes and do a NF + * load then it should access the bus. (Nobody ought actually do this + * in the real world, obviously.) + * + * Then there are the annoying special cases with watchpoints... + * + * TODO: Add a form of tlb_fill that does not raise an exception, + * with a form of tlb_vaddr_to_host and a set of loads to match. + * The non_fault_vaddr_to_host would handle everything, usually, + * and the loads would handle the iomem path for watchpoints. + */ + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + split = max_for_page(addr, mem_off, mem_max); + if (host && split >= (1 << msz)) { + mem_off = host_fn(vd, vg, host - mem_off, mem_off, split); + reg_off = mem_off << diffsz; + } +#endif -DO_LDFF1(dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, ) + record_fault(env, reg_off, reg_max); +} -#undef DO_LDFF1 +#define DO_LDFF1_LDNF1_1(PART, ESZ) \ +void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnf1_r(env, vg, addr, desc, ESZ, 0, sve_ld1##PART##_host); \ +} -DO_LDNF1(bb_r) -DO_LDNF1(bhu_r) -DO_LDNF1(bhs_r) -DO_LDNF1(bsu_r) -DO_LDNF1(bss_r) -DO_LDNF1(bdu_r) -DO_LDNF1(bds_r) +#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ +void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_le_host); \ +} \ +void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_be_host); \ +} + +DO_LDFF1_LDNF1_1(bb, 0) +DO_LDFF1_LDNF1_1(bhu, 1) +DO_LDFF1_LDNF1_1(bhs, 1) +DO_LDFF1_LDNF1_1(bsu, 2) +DO_LDFF1_LDNF1_1(bss, 2) +DO_LDFF1_LDNF1_1(bdu, 3) +DO_LDFF1_LDNF1_1(bds, 3) -DO_LDNF1(hh_r) -DO_LDNF1(hsu_r) -DO_LDNF1(hss_r) -DO_LDNF1(hdu_r) -DO_LDNF1(hds_r) +DO_LDFF1_LDNF1_2(hh, 1, 1) +DO_LDFF1_LDNF1_2(hsu, 2, 1) +DO_LDFF1_LDNF1_2(hss, 2, 1) +DO_LDFF1_LDNF1_2(hdu, 3, 1) +DO_LDFF1_LDNF1_2(hds, 3, 1) -DO_LDNF1(ss_r) -DO_LDNF1(sdu_r) -DO_LDNF1(sds_r) +DO_LDFF1_LDNF1_2(ss, 2, 2) +DO_LDFF1_LDNF1_2(sdu, 3, 2) +DO_LDFF1_LDNF1_2(sds, 3, 2) -DO_LDNF1(dd_r) +DO_LDFF1_LDNF1_2(dd, 3, 3) -#undef DO_LDNF1 +#undef DO_LDFF1_LDNF1_1 +#undef DO_LDFF1_LDNF1_2 /* * Store contiguous data, protected by a governing predicate. */ -#define DO_ST1(NAME, FN, TYPEE, TYPEM, H) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - void *vd = &env->vfp.zregs[rd]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPEM m = *(TYPEE *)(vd + H(i)); \ - FN(env, addr, m, ra); \ - } \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += sizeof(TYPEM); \ - } while (i & 15); \ - } \ -} -#define DO_ST1_D(NAME, FN, TYPEM) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc) / 8; \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - uint64_t *d = &env->vfp.zregs[rd].d[0]; \ - uint8_t *pg = vg; \ - for (i = 0; i < oprsz; i += 1) { \ - if (pg[H1(i)] & 1) { \ - FN(env, addr, d[i], ra); \ - } \ - addr += sizeof(TYPEM); \ - } \ +#ifdef CONFIG_SOFTMMU +#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \ +static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ +{ \ + TLB(env, addr, *(TYPEM *)(vd + H(reg_off)), oi, ra); \ } - -#define DO_ST2(NAME, FN, TYPEE, TYPEM, H) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - void *d1 = &env->vfp.zregs[rd]; \ - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPEM m1 = *(TYPEE *)(d1 + H(i)); \ - TYPEM m2 = *(TYPEE *)(d2 + H(i)); \ - FN(env, addr, m1, ra); \ - FN(env, addr + sizeof(TYPEM), m2, ra); \ - } \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += 2 * sizeof(TYPEM); \ - } while (i & 15); \ - } \ +#else +#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \ +static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ +{ \ + HOST(g2h(addr), *(TYPEM *)(vd + H(reg_off))); \ } +#endif -#define DO_ST3(NAME, FN, TYPEE, TYPEM, H) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - void *d1 = &env->vfp.zregs[rd]; \ - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \ - void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPEM m1 = *(TYPEE *)(d1 + H(i)); \ - TYPEM m2 = *(TYPEE *)(d2 + H(i)); \ - TYPEM m3 = *(TYPEE *)(d3 + H(i)); \ - FN(env, addr, m1, ra); \ - FN(env, addr + sizeof(TYPEM), m2, ra); \ - FN(env, addr + 2 * sizeof(TYPEM), m3, ra); \ - } \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += 3 * sizeof(TYPEM); \ - } while (i & 15); \ - } \ -} +DO_ST_TLB(st1bb, H1, uint8_t, stb_p, 0, helper_ret_stb_mmu) +DO_ST_TLB(st1bh, H1_2, uint16_t, stb_p, 0, helper_ret_stb_mmu) +DO_ST_TLB(st1bs, H1_4, uint32_t, stb_p, 0, helper_ret_stb_mmu) +DO_ST_TLB(st1bd, , uint64_t, stb_p, 0, helper_ret_stb_mmu) -#define DO_ST4(NAME, FN, TYPEE, TYPEM, H) \ -void HELPER(NAME)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - intptr_t ra = GETPC(); \ - unsigned rd = simd_data(desc); \ - void *d1 = &env->vfp.zregs[rd]; \ - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \ - void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \ - void *d4 = &env->vfp.zregs[(rd + 3) & 31]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPEM m1 = *(TYPEE *)(d1 + H(i)); \ - TYPEM m2 = *(TYPEE *)(d2 + H(i)); \ - TYPEM m3 = *(TYPEE *)(d3 + H(i)); \ - TYPEM m4 = *(TYPEE *)(d4 + H(i)); \ - FN(env, addr, m1, ra); \ - FN(env, addr + sizeof(TYPEM), m2, ra); \ - FN(env, addr + 2 * sizeof(TYPEM), m3, ra); \ - FN(env, addr + 3 * sizeof(TYPEM), m4, ra); \ - } \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - addr += 4 * sizeof(TYPEM); \ - } while (i & 15); \ - } \ -} +DO_ST_TLB(st1hh_le, H1_2, uint16_t, stw_le_p, MO_LE, helper_le_stw_mmu) +DO_ST_TLB(st1hs_le, H1_4, uint32_t, stw_le_p, MO_LE, helper_le_stw_mmu) +DO_ST_TLB(st1hd_le, , uint64_t, stw_le_p, MO_LE, helper_le_stw_mmu) -DO_ST1(sve_st1bh_r, cpu_stb_data_ra, uint16_t, uint8_t, H1_2) -DO_ST1(sve_st1bs_r, cpu_stb_data_ra, uint32_t, uint8_t, H1_4) -DO_ST1_D(sve_st1bd_r, cpu_stb_data_ra, uint8_t) +DO_ST_TLB(st1ss_le, H1_4, uint32_t, stl_le_p, MO_LE, helper_le_stl_mmu) +DO_ST_TLB(st1sd_le, , uint64_t, stl_le_p, MO_LE, helper_le_stl_mmu) -DO_ST1(sve_st1hs_r, cpu_stw_data_ra, uint32_t, uint16_t, H1_4) -DO_ST1_D(sve_st1hd_r, cpu_stw_data_ra, uint16_t) +DO_ST_TLB(st1dd_le, , uint64_t, stq_le_p, MO_LE, helper_le_stq_mmu) -DO_ST1_D(sve_st1sd_r, cpu_stl_data_ra, uint32_t) +DO_ST_TLB(st1hh_be, H1_2, uint16_t, stw_be_p, MO_BE, helper_be_stw_mmu) +DO_ST_TLB(st1hs_be, H1_4, uint32_t, stw_be_p, MO_BE, helper_be_stw_mmu) +DO_ST_TLB(st1hd_be, , uint64_t, stw_be_p, MO_BE, helper_be_stw_mmu) -DO_ST1(sve_st1bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1) -DO_ST2(sve_st2bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1) -DO_ST3(sve_st3bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1) -DO_ST4(sve_st4bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1) +DO_ST_TLB(st1ss_be, H1_4, uint32_t, stl_be_p, MO_BE, helper_be_stl_mmu) +DO_ST_TLB(st1sd_be, , uint64_t, stl_be_p, MO_BE, helper_be_stl_mmu) -DO_ST1(sve_st1hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2) -DO_ST2(sve_st2hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2) -DO_ST3(sve_st3hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2) -DO_ST4(sve_st4hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2) +DO_ST_TLB(st1dd_be, , uint64_t, stq_be_p, MO_BE, helper_be_stq_mmu) -DO_ST1(sve_st1ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4) -DO_ST2(sve_st2ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4) -DO_ST3(sve_st3ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4) -DO_ST4(sve_st4ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4) +#undef DO_ST_TLB -DO_ST1_D(sve_st1dd_r, cpu_stq_data_ra, uint64_t) +/* + * Common helpers for all contiguous 1,2,3,4-register predicated stores. + */ +static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *vd = &env->vfp.zregs[rd]; -void HELPER(sve_st2dd_r)(CPUARMState *env, void *vg, - target_ulong addr, uint32_t desc) + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, vd, i, addr, oi, ra); + } + i += esize, pg >>= esize; + addr += msize; + } while (i & 15); + } + set_helper_retaddr(0); +} + +static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) { - intptr_t i, oprsz = simd_oprsz(desc) / 8; - intptr_t ra = GETPC(); - unsigned rd = simd_data(desc); - uint64_t *d1 = &env->vfp.zregs[rd].d[0]; - uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0]; - uint8_t *pg = vg; + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *d1 = &env->vfp.zregs[rd]; + void *d2 = &env->vfp.zregs[(rd + 1) & 31]; - for (i = 0; i < oprsz; i += 1) { - if (pg[H1(i)] & 1) { - cpu_stq_data_ra(env, addr, d1[i], ra); - cpu_stq_data_ra(env, addr + 8, d2[i], ra); - } - addr += 2 * 8; + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, d1, i, addr, oi, ra); + tlb_fn(env, d2, i, addr + msize, oi, ra); + } + i += esize, pg >>= esize; + addr += 2 * msize; + } while (i & 15); } + set_helper_retaddr(0); } -void HELPER(sve_st3dd_r)(CPUARMState *env, void *vg, - target_ulong addr, uint32_t desc) +static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) { - intptr_t i, oprsz = simd_oprsz(desc) / 8; - intptr_t ra = GETPC(); - unsigned rd = simd_data(desc); - uint64_t *d1 = &env->vfp.zregs[rd].d[0]; - uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0]; - uint64_t *d3 = &env->vfp.zregs[(rd + 2) & 31].d[0]; - uint8_t *pg = vg; + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *d1 = &env->vfp.zregs[rd]; + void *d2 = &env->vfp.zregs[(rd + 1) & 31]; + void *d3 = &env->vfp.zregs[(rd + 2) & 31]; - for (i = 0; i < oprsz; i += 1) { - if (pg[H1(i)] & 1) { - cpu_stq_data_ra(env, addr, d1[i], ra); - cpu_stq_data_ra(env, addr + 8, d2[i], ra); - cpu_stq_data_ra(env, addr + 16, d3[i], ra); - } - addr += 3 * 8; + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, d1, i, addr, oi, ra); + tlb_fn(env, d2, i, addr + msize, oi, ra); + tlb_fn(env, d3, i, addr + 2 * msize, oi, ra); + } + i += esize, pg >>= esize; + addr += 3 * msize; + } while (i & 15); } + set_helper_retaddr(0); } -void HELPER(sve_st4dd_r)(CPUARMState *env, void *vg, - target_ulong addr, uint32_t desc) +static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) { - intptr_t i, oprsz = simd_oprsz(desc) / 8; - intptr_t ra = GETPC(); - unsigned rd = simd_data(desc); - uint64_t *d1 = &env->vfp.zregs[rd].d[0]; - uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0]; - uint64_t *d3 = &env->vfp.zregs[(rd + 2) & 31].d[0]; - uint64_t *d4 = &env->vfp.zregs[(rd + 3) & 31].d[0]; - uint8_t *pg = vg; + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *d1 = &env->vfp.zregs[rd]; + void *d2 = &env->vfp.zregs[(rd + 1) & 31]; + void *d3 = &env->vfp.zregs[(rd + 2) & 31]; + void *d4 = &env->vfp.zregs[(rd + 3) & 31]; - for (i = 0; i < oprsz; i += 1) { - if (pg[H1(i)] & 1) { - cpu_stq_data_ra(env, addr, d1[i], ra); - cpu_stq_data_ra(env, addr + 8, d2[i], ra); - cpu_stq_data_ra(env, addr + 16, d3[i], ra); - cpu_stq_data_ra(env, addr + 24, d4[i], ra); - } - addr += 4 * 8; + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, d1, i, addr, oi, ra); + tlb_fn(env, d2, i, addr + msize, oi, ra); + tlb_fn(env, d3, i, addr + 2 * msize, oi, ra); + tlb_fn(env, d4, i, addr + 3 * msize, oi, ra); + } + i += esize, pg >>= esize; + addr += 4 * msize; + } while (i & 15); } + set_helper_retaddr(0); } -/* Loads with a vector index. */ +#define DO_STN_1(N, NAME, ESIZE) \ +void __attribute__((flatten)) HELPER(sve_st##N##NAME##_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, 1, \ + sve_st1##NAME##_tlb); \ +} -#define DO_LD1_ZPZ_S(NAME, TYPEI, TYPEM, FN) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - unsigned scale = simd_data(desc); \ - uintptr_t ra = GETPC(); \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m = 0; \ - if (pg & 1) { \ - target_ulong off = *(TYPEI *)(vm + H1_4(i)); \ - m = FN(env, base + (off << scale), ra); \ - } \ - *(uint32_t *)(vd + H1_4(i)) = m; \ - i += 4, pg >>= 4; \ - } while (i & 15); \ - } \ +#define DO_STN_2(N, NAME, ESIZE, MSIZE) \ +void __attribute__((flatten)) HELPER(sve_st##N##NAME##_le_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \ + sve_st1##NAME##_le_tlb); \ +} \ +void __attribute__((flatten)) HELPER(sve_st##N##NAME##_be_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \ + sve_st1##NAME##_be_tlb); \ +} + +DO_STN_1(1, bb, 1) +DO_STN_1(1, bh, 2) +DO_STN_1(1, bs, 4) +DO_STN_1(1, bd, 8) +DO_STN_1(2, bb, 1) +DO_STN_1(3, bb, 1) +DO_STN_1(4, bb, 1) + +DO_STN_2(1, hh, 2, 2) +DO_STN_2(1, hs, 4, 2) +DO_STN_2(1, hd, 8, 2) +DO_STN_2(2, hh, 2, 2) +DO_STN_2(3, hh, 2, 2) +DO_STN_2(4, hh, 2, 2) + +DO_STN_2(1, ss, 4, 4) +DO_STN_2(1, sd, 8, 4) +DO_STN_2(2, ss, 4, 4) +DO_STN_2(3, ss, 4, 4) +DO_STN_2(4, ss, 4, 4) + +DO_STN_2(1, dd, 8, 8) +DO_STN_2(2, dd, 8, 8) +DO_STN_2(3, dd, 8, 8) +DO_STN_2(4, dd, 8, 8) + +#undef DO_STN_1 +#undef DO_STN_2 + +/* + * Loads with a vector index. + */ + +/* + * Load the element at @reg + @reg_ofs, sign or zero-extend as needed. + */ +typedef target_ulong zreg_off_fn(void *reg, intptr_t reg_ofs); + +static target_ulong off_zsu_s(void *reg, intptr_t reg_ofs) +{ + return *(uint32_t *)(reg + H1_4(reg_ofs)); } -#define DO_LD1_ZPZ_D(NAME, TYPEI, TYPEM, FN) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc) / 8; \ - unsigned scale = simd_data(desc); \ - uintptr_t ra = GETPC(); \ - uint64_t *d = vd, *m = vm; uint8_t *pg = vg; \ - for (i = 0; i < oprsz; i++) { \ - TYPEM mm = 0; \ - if (pg[H1(i)] & 1) { \ - target_ulong off = (TYPEI)m[i]; \ - mm = FN(env, base + (off << scale), ra); \ - } \ - d[i] = mm; \ - } \ +static target_ulong off_zss_s(void *reg, intptr_t reg_ofs) +{ + return *(int32_t *)(reg + H1_4(reg_ofs)); } -DO_LD1_ZPZ_S(sve_ldbsu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_S(sve_ldhsu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_S(sve_ldssu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_S(sve_ldbss_zsu, uint32_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_S(sve_ldhss_zsu, uint32_t, int16_t, cpu_lduw_data_ra) - -DO_LD1_ZPZ_S(sve_ldbsu_zss, int32_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_S(sve_ldhsu_zss, int32_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_S(sve_ldssu_zss, int32_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_S(sve_ldbss_zss, int32_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_S(sve_ldhss_zss, int32_t, int16_t, cpu_lduw_data_ra) - -DO_LD1_ZPZ_D(sve_ldbdu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhdu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsdu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_D(sve_ldddu_zsu, uint32_t, uint64_t, cpu_ldq_data_ra) -DO_LD1_ZPZ_D(sve_ldbds_zsu, uint32_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhds_zsu, uint32_t, int16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsds_zsu, uint32_t, int32_t, cpu_ldl_data_ra) - -DO_LD1_ZPZ_D(sve_ldbdu_zss, int32_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhdu_zss, int32_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsdu_zss, int32_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_D(sve_ldddu_zss, int32_t, uint64_t, cpu_ldq_data_ra) -DO_LD1_ZPZ_D(sve_ldbds_zss, int32_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhds_zss, int32_t, int16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsds_zss, int32_t, int32_t, cpu_ldl_data_ra) - -DO_LD1_ZPZ_D(sve_ldbdu_zd, uint64_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhdu_zd, uint64_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsdu_zd, uint64_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_D(sve_ldddu_zd, uint64_t, uint64_t, cpu_ldq_data_ra) -DO_LD1_ZPZ_D(sve_ldbds_zd, uint64_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhds_zd, uint64_t, int16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsds_zd, uint64_t, int32_t, cpu_ldl_data_ra) +static target_ulong off_zsu_d(void *reg, intptr_t reg_ofs) +{ + return (uint32_t)*(uint64_t *)(reg + reg_ofs); +} -/* First fault loads with a vector index. */ +static target_ulong off_zss_d(void *reg, intptr_t reg_ofs) +{ + return (int32_t)*(uint64_t *)(reg + reg_ofs); +} -#ifdef CONFIG_USER_ONLY +static target_ulong off_zd_d(void *reg, intptr_t reg_ofs) +{ + return *(uint64_t *)(reg + reg_ofs); +} -#define DO_LDFF1_ZPZ(NAME, TYPEE, TYPEI, TYPEM, FN, H) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - unsigned scale = simd_data(desc); \ - uintptr_t ra = GETPC(); \ - bool first = true; \ - mmap_lock(); \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m = 0; \ - if (pg & 1) { \ - target_ulong off = *(TYPEI *)(vm + H(i)); \ - target_ulong addr = base + (off << scale); \ - if (!first && \ - page_check_range(addr, sizeof(TYPEM), PAGE_READ)) { \ - record_fault(env, i, oprsz); \ - goto exit; \ - } \ - m = FN(env, addr, ra); \ - first = false; \ - } \ - *(TYPEE *)(vd + H(i)) = m; \ - i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \ - } while (i & 15); \ - } \ - exit: \ - mmap_unlock(); \ +static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch = { }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i); + tlb_fn(env, &scratch, i, base + (off << scale), oi, ra); + } + i += 4, pg >>= 4; + } while (i & 15); + } + set_helper_retaddr(0); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(vd, &scratch, oprsz); } +static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc) / 8; + ARMVectorReg scratch = { }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; i++) { + uint8_t pg = *(uint8_t *)(vg + H1(i)); + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i * 8); + tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra); + } + } + set_helper_retaddr(0); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(vd, &scratch, oprsz * 8); +} + +#define DO_LD1_ZPZ_S(MEM, OFS) \ +void __attribute__((flatten)) HELPER(sve_ld##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_s, sve_ld1##MEM##_tlb); \ +} + +#define DO_LD1_ZPZ_D(MEM, OFS) \ +void __attribute__((flatten)) HELPER(sve_ld##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_d, sve_ld1##MEM##_tlb); \ +} + +DO_LD1_ZPZ_S(bsu, zsu) +DO_LD1_ZPZ_S(bsu, zss) +DO_LD1_ZPZ_D(bdu, zsu) +DO_LD1_ZPZ_D(bdu, zss) +DO_LD1_ZPZ_D(bdu, zd) + +DO_LD1_ZPZ_S(bss, zsu) +DO_LD1_ZPZ_S(bss, zss) +DO_LD1_ZPZ_D(bds, zsu) +DO_LD1_ZPZ_D(bds, zss) +DO_LD1_ZPZ_D(bds, zd) + +DO_LD1_ZPZ_S(hsu_le, zsu) +DO_LD1_ZPZ_S(hsu_le, zss) +DO_LD1_ZPZ_D(hdu_le, zsu) +DO_LD1_ZPZ_D(hdu_le, zss) +DO_LD1_ZPZ_D(hdu_le, zd) + +DO_LD1_ZPZ_S(hsu_be, zsu) +DO_LD1_ZPZ_S(hsu_be, zss) +DO_LD1_ZPZ_D(hdu_be, zsu) +DO_LD1_ZPZ_D(hdu_be, zss) +DO_LD1_ZPZ_D(hdu_be, zd) + +DO_LD1_ZPZ_S(hss_le, zsu) +DO_LD1_ZPZ_S(hss_le, zss) +DO_LD1_ZPZ_D(hds_le, zsu) +DO_LD1_ZPZ_D(hds_le, zss) +DO_LD1_ZPZ_D(hds_le, zd) + +DO_LD1_ZPZ_S(hss_be, zsu) +DO_LD1_ZPZ_S(hss_be, zss) +DO_LD1_ZPZ_D(hds_be, zsu) +DO_LD1_ZPZ_D(hds_be, zss) +DO_LD1_ZPZ_D(hds_be, zd) + +DO_LD1_ZPZ_S(ss_le, zsu) +DO_LD1_ZPZ_S(ss_le, zss) +DO_LD1_ZPZ_D(sdu_le, zsu) +DO_LD1_ZPZ_D(sdu_le, zss) +DO_LD1_ZPZ_D(sdu_le, zd) + +DO_LD1_ZPZ_S(ss_be, zsu) +DO_LD1_ZPZ_S(ss_be, zss) +DO_LD1_ZPZ_D(sdu_be, zsu) +DO_LD1_ZPZ_D(sdu_be, zss) +DO_LD1_ZPZ_D(sdu_be, zd) + +DO_LD1_ZPZ_D(sds_le, zsu) +DO_LD1_ZPZ_D(sds_le, zss) +DO_LD1_ZPZ_D(sds_le, zd) + +DO_LD1_ZPZ_D(sds_be, zsu) +DO_LD1_ZPZ_D(sds_be, zss) +DO_LD1_ZPZ_D(sds_be, zd) + +DO_LD1_ZPZ_D(dd_le, zsu) +DO_LD1_ZPZ_D(dd_le, zss) +DO_LD1_ZPZ_D(dd_le, zd) + +DO_LD1_ZPZ_D(dd_be, zsu) +DO_LD1_ZPZ_D(dd_be, zss) +DO_LD1_ZPZ_D(dd_be, zd) + +#undef DO_LD1_ZPZ_S +#undef DO_LD1_ZPZ_D + +/* First fault loads with a vector index. */ + +/* Load one element into VD+REG_OFF from (ENV,VADDR) without faulting. + * The controlling predicate is known to be true. Return true if the + * load was successful. + */ +typedef bool sve_ld1_nf_fn(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong vaddr, int mmu_idx); + +#ifdef CONFIG_SOFTMMU +#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \ +static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, int mmu_idx) \ +{ \ + target_ulong next_page = -(addr | TARGET_PAGE_MASK); \ + if (likely(next_page - addr >= sizeof(TYPEM))) { \ + void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); \ + if (likely(host)) { \ + TYPEM val = HOST(host); \ + *(TYPEE *)(vd + H(reg_off)) = val; \ + return true; \ + } \ + } \ + return false; \ +} #else +#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \ +static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, int mmu_idx) \ +{ \ + if (likely(page_check_range(addr, sizeof(TYPEM), PAGE_READ))) { \ + TYPEM val = HOST(g2h(addr)); \ + *(TYPEE *)(vd + H(reg_off)) = val; \ + return true; \ + } \ + return false; \ +} +#endif -#define DO_LDFF1_ZPZ(NAME, TYPEE, TYPEI, TYPEM, FN, H) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - g_assert_not_reached(); \ +DO_LD_NF(bsu, H1_4, uint32_t, uint8_t, ldub_p) +DO_LD_NF(bss, H1_4, uint32_t, int8_t, ldsb_p) +DO_LD_NF(bdu, , uint64_t, uint8_t, ldub_p) +DO_LD_NF(bds, , uint64_t, int8_t, ldsb_p) + +DO_LD_NF(hsu_le, H1_4, uint32_t, uint16_t, lduw_le_p) +DO_LD_NF(hss_le, H1_4, uint32_t, int16_t, ldsw_le_p) +DO_LD_NF(hsu_be, H1_4, uint32_t, uint16_t, lduw_be_p) +DO_LD_NF(hss_be, H1_4, uint32_t, int16_t, ldsw_be_p) +DO_LD_NF(hdu_le, , uint64_t, uint16_t, lduw_le_p) +DO_LD_NF(hds_le, , uint64_t, int16_t, ldsw_le_p) +DO_LD_NF(hdu_be, , uint64_t, uint16_t, lduw_be_p) +DO_LD_NF(hds_be, , uint64_t, int16_t, ldsw_be_p) + +DO_LD_NF(ss_le, H1_4, uint32_t, uint32_t, ldl_le_p) +DO_LD_NF(ss_be, H1_4, uint32_t, uint32_t, ldl_be_p) +DO_LD_NF(sdu_le, , uint64_t, uint32_t, ldl_le_p) +DO_LD_NF(sds_le, , uint64_t, int32_t, ldl_le_p) +DO_LD_NF(sdu_be, , uint64_t, uint32_t, ldl_be_p) +DO_LD_NF(sds_be, , uint64_t, int32_t, ldl_be_p) + +DO_LD_NF(dd_le, , uint64_t, uint64_t, ldq_le_p) +DO_LD_NF(dd_be, , uint64_t, uint64_t, ldq_be_p) + +/* + * Common helper for all gather first-faulting loads. + */ +static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn, + sve_ld1_nf_fn *nonfault_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t reg_off, reg_max = simd_oprsz(desc); + target_ulong addr; + + /* Skip to the first true predicate. */ + reg_off = find_next_active(vg, 0, reg_max, MO_32); + if (likely(reg_off < reg_max)) { + /* Perform one normal read, which will fault or not. */ + set_helper_retaddr(ra); + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + tlb_fn(env, vd, reg_off, addr, oi, ra); + + /* The rest of the reads will be non-faulting. */ + set_helper_retaddr(0); + } + + /* After any fault, zero the leading predicated false elements. */ + swap_memzero(vd, reg_off); + + while (likely((reg_off += 4) < reg_max)) { + uint64_t pg = *(uint64_t *)(vg + (reg_off >> 6) * 8); + if (likely((pg >> (reg_off & 63)) & 1)) { + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) { + record_fault(env, reg_off, reg_max); + break; + } + } else { + *(uint32_t *)(vd + H1_4(reg_off)) = 0; + } + } } -#endif +static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn, + sve_ld1_nf_fn *nonfault_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t reg_off, reg_max = simd_oprsz(desc); + target_ulong addr; + + /* Skip to the first true predicate. */ + reg_off = find_next_active(vg, 0, reg_max, MO_64); + if (likely(reg_off < reg_max)) { + /* Perform one normal read, which will fault or not. */ + set_helper_retaddr(ra); + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + tlb_fn(env, vd, reg_off, addr, oi, ra); + + /* The rest of the reads will be non-faulting. */ + set_helper_retaddr(0); + } -#define DO_LDFF1_ZPZ_S(NAME, TYPEI, TYPEM, FN) \ - DO_LDFF1_ZPZ(NAME, uint32_t, TYPEI, TYPEM, FN, H1_4) -#define DO_LDFF1_ZPZ_D(NAME, TYPEI, TYPEM, FN) \ - DO_LDFF1_ZPZ(NAME, uint64_t, TYPEI, TYPEM, FN, ) - -DO_LDFF1_ZPZ_S(sve_ldffbsu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffhsu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffssu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffbss_zsu, uint32_t, int8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffhss_zsu, uint32_t, int16_t, cpu_lduw_data_ra) - -DO_LDFF1_ZPZ_S(sve_ldffbsu_zss, int32_t, uint8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffhsu_zss, int32_t, uint16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffssu_zss, int32_t, uint32_t, cpu_ldl_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffbss_zss, int32_t, int8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_S(sve_ldffhss_zss, int32_t, int16_t, cpu_lduw_data_ra) - -DO_LDFF1_ZPZ_D(sve_ldffbdu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffhdu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffsdu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffddu_zsu, uint32_t, uint64_t, cpu_ldq_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffbds_zsu, uint32_t, int8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffhds_zsu, uint32_t, int16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffsds_zsu, uint32_t, int32_t, cpu_ldl_data_ra) - -DO_LDFF1_ZPZ_D(sve_ldffbdu_zss, int32_t, uint8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffhdu_zss, int32_t, uint16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffsdu_zss, int32_t, uint32_t, cpu_ldl_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffddu_zss, int32_t, uint64_t, cpu_ldq_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffbds_zss, int32_t, int8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffhds_zss, int32_t, int16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffsds_zss, int32_t, int32_t, cpu_ldl_data_ra) - -DO_LDFF1_ZPZ_D(sve_ldffbdu_zd, uint64_t, uint8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffhdu_zd, uint64_t, uint16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffsdu_zd, uint64_t, uint32_t, cpu_ldl_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffddu_zd, uint64_t, uint64_t, cpu_ldq_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffbds_zd, uint64_t, int8_t, cpu_ldub_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffhds_zd, uint64_t, int16_t, cpu_lduw_data_ra) -DO_LDFF1_ZPZ_D(sve_ldffsds_zd, uint64_t, int32_t, cpu_ldl_data_ra) + /* After any fault, zero the leading predicated false elements. */ + swap_memzero(vd, reg_off); -/* Stores with a vector index. */ + while (likely((reg_off += 8) < reg_max)) { + uint8_t pg = *(uint8_t *)(vg + H1(reg_off >> 3)); + if (likely(pg & 1)) { + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) { + record_fault(env, reg_off, reg_max); + break; + } + } else { + *(uint64_t *)(vd + reg_off) = 0; + } + } +} -#define DO_ST1_ZPZ_S(NAME, TYPEI, FN) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ +#define DO_LDFF1_ZPZ_S(MEM, OFS) \ +void HELPER(sve_ldff##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ { \ - intptr_t i, oprsz = simd_oprsz(desc); \ - unsigned scale = simd_data(desc); \ - uintptr_t ra = GETPC(); \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - if (likely(pg & 1)) { \ - target_ulong off = *(TYPEI *)(vm + H1_4(i)); \ - uint32_t d = *(uint32_t *)(vd + H1_4(i)); \ - FN(env, base + (off << scale), d, ra); \ - } \ - i += sizeof(uint32_t), pg >>= sizeof(uint32_t); \ - } while (i & 15); \ - } \ + sve_ldff1_zs(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_s, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \ } -#define DO_ST1_ZPZ_D(NAME, TYPEI, FN) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ +#define DO_LDFF1_ZPZ_D(MEM, OFS) \ +void HELPER(sve_ldff##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ { \ - intptr_t i, oprsz = simd_oprsz(desc) / 8; \ - unsigned scale = simd_data(desc); \ - uintptr_t ra = GETPC(); \ - uint64_t *d = vd, *m = vm; uint8_t *pg = vg; \ - for (i = 0; i < oprsz; i++) { \ - if (likely(pg[H1(i)] & 1)) { \ - target_ulong off = (target_ulong)(TYPEI)m[i] << scale; \ - FN(env, base + off, d[i], ra); \ - } \ - } \ -} + sve_ldff1_zd(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_d, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \ +} + +DO_LDFF1_ZPZ_S(bsu, zsu) +DO_LDFF1_ZPZ_S(bsu, zss) +DO_LDFF1_ZPZ_D(bdu, zsu) +DO_LDFF1_ZPZ_D(bdu, zss) +DO_LDFF1_ZPZ_D(bdu, zd) + +DO_LDFF1_ZPZ_S(bss, zsu) +DO_LDFF1_ZPZ_S(bss, zss) +DO_LDFF1_ZPZ_D(bds, zsu) +DO_LDFF1_ZPZ_D(bds, zss) +DO_LDFF1_ZPZ_D(bds, zd) + +DO_LDFF1_ZPZ_S(hsu_le, zsu) +DO_LDFF1_ZPZ_S(hsu_le, zss) +DO_LDFF1_ZPZ_D(hdu_le, zsu) +DO_LDFF1_ZPZ_D(hdu_le, zss) +DO_LDFF1_ZPZ_D(hdu_le, zd) + +DO_LDFF1_ZPZ_S(hsu_be, zsu) +DO_LDFF1_ZPZ_S(hsu_be, zss) +DO_LDFF1_ZPZ_D(hdu_be, zsu) +DO_LDFF1_ZPZ_D(hdu_be, zss) +DO_LDFF1_ZPZ_D(hdu_be, zd) + +DO_LDFF1_ZPZ_S(hss_le, zsu) +DO_LDFF1_ZPZ_S(hss_le, zss) +DO_LDFF1_ZPZ_D(hds_le, zsu) +DO_LDFF1_ZPZ_D(hds_le, zss) +DO_LDFF1_ZPZ_D(hds_le, zd) + +DO_LDFF1_ZPZ_S(hss_be, zsu) +DO_LDFF1_ZPZ_S(hss_be, zss) +DO_LDFF1_ZPZ_D(hds_be, zsu) +DO_LDFF1_ZPZ_D(hds_be, zss) +DO_LDFF1_ZPZ_D(hds_be, zd) + +DO_LDFF1_ZPZ_S(ss_le, zsu) +DO_LDFF1_ZPZ_S(ss_le, zss) +DO_LDFF1_ZPZ_D(sdu_le, zsu) +DO_LDFF1_ZPZ_D(sdu_le, zss) +DO_LDFF1_ZPZ_D(sdu_le, zd) + +DO_LDFF1_ZPZ_S(ss_be, zsu) +DO_LDFF1_ZPZ_S(ss_be, zss) +DO_LDFF1_ZPZ_D(sdu_be, zsu) +DO_LDFF1_ZPZ_D(sdu_be, zss) +DO_LDFF1_ZPZ_D(sdu_be, zd) + +DO_LDFF1_ZPZ_D(sds_le, zsu) +DO_LDFF1_ZPZ_D(sds_le, zss) +DO_LDFF1_ZPZ_D(sds_le, zd) + +DO_LDFF1_ZPZ_D(sds_be, zsu) +DO_LDFF1_ZPZ_D(sds_be, zss) +DO_LDFF1_ZPZ_D(sds_be, zd) + +DO_LDFF1_ZPZ_D(dd_le, zsu) +DO_LDFF1_ZPZ_D(dd_le, zss) +DO_LDFF1_ZPZ_D(dd_le, zd) + +DO_LDFF1_ZPZ_D(dd_be, zsu) +DO_LDFF1_ZPZ_D(dd_be, zss) +DO_LDFF1_ZPZ_D(dd_be, zd) -DO_ST1_ZPZ_S(sve_stbs_zsu, uint32_t, cpu_stb_data_ra) -DO_ST1_ZPZ_S(sve_sths_zsu, uint32_t, cpu_stw_data_ra) -DO_ST1_ZPZ_S(sve_stss_zsu, uint32_t, cpu_stl_data_ra) +/* Stores with a vector index. */ -DO_ST1_ZPZ_S(sve_stbs_zss, int32_t, cpu_stb_data_ra) -DO_ST1_ZPZ_S(sve_sths_zss, int32_t, cpu_stw_data_ra) -DO_ST1_ZPZ_S(sve_stss_zss, int32_t, cpu_stl_data_ra) +static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc); -DO_ST1_ZPZ_D(sve_stbd_zsu, uint32_t, cpu_stb_data_ra) -DO_ST1_ZPZ_D(sve_sthd_zsu, uint32_t, cpu_stw_data_ra) -DO_ST1_ZPZ_D(sve_stsd_zsu, uint32_t, cpu_stl_data_ra) -DO_ST1_ZPZ_D(sve_stdd_zsu, uint32_t, cpu_stq_data_ra) + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i); + tlb_fn(env, vd, i, base + (off << scale), oi, ra); + } + i += 4, pg >>= 4; + } while (i & 15); + } + set_helper_retaddr(0); +} -DO_ST1_ZPZ_D(sve_stbd_zss, int32_t, cpu_stb_data_ra) -DO_ST1_ZPZ_D(sve_sthd_zss, int32_t, cpu_stw_data_ra) -DO_ST1_ZPZ_D(sve_stsd_zss, int32_t, cpu_stl_data_ra) -DO_ST1_ZPZ_D(sve_stdd_zss, int32_t, cpu_stq_data_ra) +static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc) / 8; -DO_ST1_ZPZ_D(sve_stbd_zd, uint64_t, cpu_stb_data_ra) -DO_ST1_ZPZ_D(sve_sthd_zd, uint64_t, cpu_stw_data_ra) -DO_ST1_ZPZ_D(sve_stsd_zd, uint64_t, cpu_stl_data_ra) -DO_ST1_ZPZ_D(sve_stdd_zd, uint64_t, cpu_stq_data_ra) + set_helper_retaddr(ra); + for (i = 0; i < oprsz; i++) { + uint8_t pg = *(uint8_t *)(vg + H1(i)); + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i * 8); + tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra); + } + } + set_helper_retaddr(0); +} + +#define DO_ST1_ZPZ_S(MEM, OFS) \ +void __attribute__((flatten)) HELPER(sve_st##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_s, sve_st1##MEM##_tlb); \ +} + +#define DO_ST1_ZPZ_D(MEM, OFS) \ +void __attribute__((flatten)) HELPER(sve_st##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_d, sve_st1##MEM##_tlb); \ +} + +DO_ST1_ZPZ_S(bs, zsu) +DO_ST1_ZPZ_S(hs_le, zsu) +DO_ST1_ZPZ_S(hs_be, zsu) +DO_ST1_ZPZ_S(ss_le, zsu) +DO_ST1_ZPZ_S(ss_be, zsu) + +DO_ST1_ZPZ_S(bs, zss) +DO_ST1_ZPZ_S(hs_le, zss) +DO_ST1_ZPZ_S(hs_be, zss) +DO_ST1_ZPZ_S(ss_le, zss) +DO_ST1_ZPZ_S(ss_be, zss) + +DO_ST1_ZPZ_D(bd, zsu) +DO_ST1_ZPZ_D(hd_le, zsu) +DO_ST1_ZPZ_D(hd_be, zsu) +DO_ST1_ZPZ_D(sd_le, zsu) +DO_ST1_ZPZ_D(sd_be, zsu) +DO_ST1_ZPZ_D(dd_le, zsu) +DO_ST1_ZPZ_D(dd_be, zsu) + +DO_ST1_ZPZ_D(bd, zss) +DO_ST1_ZPZ_D(hd_le, zss) +DO_ST1_ZPZ_D(hd_be, zss) +DO_ST1_ZPZ_D(sd_le, zss) +DO_ST1_ZPZ_D(sd_be, zss) +DO_ST1_ZPZ_D(dd_le, zss) +DO_ST1_ZPZ_D(dd_be, zss) + +DO_ST1_ZPZ_D(bd, zd) +DO_ST1_ZPZ_D(hd_le, zd) +DO_ST1_ZPZ_D(hd_be, zd) +DO_ST1_ZPZ_D(sd_le, zd) +DO_ST1_ZPZ_D(sd_be, zd) +DO_ST1_ZPZ_D(dd_le, zd) +DO_ST1_ZPZ_D(dd_be, zd) + +#undef DO_ST1_ZPZ_S +#undef DO_ST1_ZPZ_D diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 8ca3876707..88195ab949 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -37,6 +37,7 @@ #include "trace-tcg.h" #include "translate-a64.h" +#include "qemu/atomic128.h" static TCGv_i64 cpu_X[32]; static TCGv_i64 cpu_pc; @@ -166,11 +167,15 @@ void aarch64_cpu_dump_state(CPUState *cs, FILE *f, cpu_fprintf(f, "\n"); return; } + if (fp_exception_el(env, el) != 0) { + cpu_fprintf(f, " FPU disabled\n"); + return; + } cpu_fprintf(f, " FPCR=%08x FPSR=%08x\n", vfp_get_fpcr(env), vfp_get_fpsr(env)); - if (arm_feature(env, ARM_FEATURE_SVE)) { - int j, zcr_len = env->vfp.zcr_el[1] & 0xf; /* fix for system mode */ + if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) { + int j, zcr_len = sve_zcr_len_for_el(env, el); for (i = 0; i <= FFR_PRED_NUM; i++) { bool eol; @@ -1196,25 +1201,23 @@ static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, /* Store from vector register to memory */ static void do_vec_st(DisasContext *s, int srcidx, int element, - TCGv_i64 tcg_addr, int size) + TCGv_i64 tcg_addr, int size, TCGMemOp endian) { - TCGMemOp memop = s->be_data + size; TCGv_i64 tcg_tmp = tcg_temp_new_i64(); read_vec_element(s, tcg_tmp, srcidx, element, size); - tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop); + tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size); tcg_temp_free_i64(tcg_tmp); } /* Load from memory to vector register */ static void do_vec_ld(DisasContext *s, int destidx, int element, - TCGv_i64 tcg_addr, int size) + TCGv_i64 tcg_addr, int size, TCGMemOp endian) { - TCGMemOp memop = s->be_data + size; TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop); + tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size); write_vec_element(s, tcg_tmp, destidx, element, size); tcg_temp_free_i64(tcg_tmp); @@ -2082,26 +2085,27 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, get_mem_index(s), MO_64 | MO_ALIGN | s->be_data); tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); - } else if (s->be_data == MO_LE) { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!HAVE_CMPXCHG128) { + gen_helper_exit_atomic(cpu_env); + s->base.is_jmp = DISAS_NORETURN; + } else if (s->be_data == MO_LE) { gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env, cpu_exclusive_addr, cpu_reg(s, rt), cpu_reg(s, rt2)); } else { - gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); - } - } else { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env, cpu_exclusive_addr, cpu_reg(s, rt), cpu_reg(s, rt2)); - } else { - gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); } + } else if (s->be_data == MO_LE) { + gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); + } else { + gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); } } else { tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, @@ -2171,14 +2175,18 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, } tcg_temp_free_i64(cmp); } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { - TCGv_i32 tcg_rs = tcg_const_i32(rs); - - if (s->be_data == MO_LE) { - gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); + if (HAVE_CMPXCHG128) { + TCGv_i32 tcg_rs = tcg_const_i32(rs); + if (s->be_data == MO_LE) { + gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); + } else { + gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); + } + tcg_temp_free_i32(tcg_rs); } else { - gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); + gen_helper_exit_atomic(cpu_env); + s->base.is_jmp = DISAS_NORETURN; } - tcg_temp_free_i32(tcg_rs); } else { TCGv_i64 d1 = tcg_temp_new_i64(); TCGv_i64 d2 = tcg_temp_new_i64(); @@ -2318,7 +2326,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } if (rt2 == 31 && ((rt | rs) & 1) == 0 - && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + && dc_isar_feature(aa64_atomics, s)) { /* CASP / CASPL */ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); return; @@ -2340,7 +2348,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } if (rt2 == 31 && ((rt | rs) & 1) == 0 - && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + && dc_isar_feature(aa64_atomics, s)) { /* CASPA / CASPAL */ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); return; @@ -2351,7 +2359,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) case 0xb: /* CASL */ case 0xe: /* CASA */ case 0xf: /* CASAL */ - if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) { gen_compare_and_swap(s, rs, rt, rn, size); return; } @@ -2890,11 +2898,10 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, int rs = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int o3_opc = extract32(insn, 12, 4); - int feature = ARM_FEATURE_V8_ATOMICS; TCGv_i64 tcg_rn, tcg_rs; AtomicThreeOpFn *fn; - if (is_vector) { + if (is_vector || !dc_isar_feature(aa64_atomics, s)) { unallocated_encoding(s); return; } @@ -2930,10 +2937,6 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { - unallocated_encoding(s); - return; - } if (rn == 31) { gen_check_sp_alignment(s); @@ -3013,10 +3016,11 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) bool is_store = !extract32(insn, 22, 1); bool is_postidx = extract32(insn, 23, 1); bool is_q = extract32(insn, 30, 1); - TCGv_i64 tcg_addr, tcg_rn; + TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes; + TCGMemOp endian = s->be_data; - int ebytes = 1 << size; - int elements = (is_q ? 128 : 64) / (8 << size); + int ebytes; /* bytes per element */ + int elements; /* elements per vector */ int rpt; /* num iterations */ int selem; /* structure elements */ int r; @@ -3075,39 +3079,55 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) gen_check_sp_alignment(s); } + /* For our purposes, bytes are always little-endian. */ + if (size == 0) { + endian = MO_LE; + } + + /* Consecutive little-endian elements from a single register + * can be promoted to a larger little-endian operation. + */ + if (selem == 1 && endian == MO_LE) { + size = 3; + } + ebytes = 1 << size; + elements = (is_q ? 16 : 8) / ebytes; + tcg_rn = cpu_reg_sp(s, rn); tcg_addr = tcg_temp_new_i64(); tcg_gen_mov_i64(tcg_addr, tcg_rn); + tcg_ebytes = tcg_const_i64(ebytes); for (r = 0; r < rpt; r++) { int e; for (e = 0; e < elements; e++) { - int tt = (rt + r) % 32; int xs; for (xs = 0; xs < selem; xs++) { + int tt = (rt + r + xs) % 32; if (is_store) { - do_vec_st(s, tt, e, tcg_addr, size); + do_vec_st(s, tt, e, tcg_addr, size, endian); } else { - do_vec_ld(s, tt, e, tcg_addr, size); - - /* For non-quad operations, setting a slice of the low - * 64 bits of the register clears the high 64 bits (in - * the ARM ARM pseudocode this is implicit in the fact - * that 'rval' is a 64 bit wide variable). - * For quad operations, we might still need to zero the - * high bits of SVE. We optimize by noticing that we only - * need to do this the first time we touch a register. - */ - if (e == 0 && (r == 0 || xs == selem - 1)) { - clear_vec_high(s, is_q, tt); - } + do_vec_ld(s, tt, e, tcg_addr, size, endian); } - tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes); - tt = (tt + 1) % 32; + tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes); } } } + if (!is_store) { + /* For non-quad operations, setting a slice of the low + * 64 bits of the register clears the high 64 bits (in + * the ARM ARM pseudocode this is implicit in the fact + * that 'rval' is a 64 bit wide variable). + * For quad operations, we might still need to zero the + * high bits of SVE. + */ + for (r = 0; r < rpt * selem; r++) { + int tt = (rt + r) % 32; + clear_vec_high(s, is_q, tt); + } + } + if (is_postidx) { int rm = extract32(insn, 16, 5); if (rm == 31) { @@ -3116,6 +3136,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); } } + tcg_temp_free_i64(tcg_ebytes); tcg_temp_free_i64(tcg_addr); } @@ -3158,7 +3179,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) bool replicate = false; int index = is_q << 3 | S << 2 | size; int ebytes, xs; - TCGv_i64 tcg_addr, tcg_rn; + TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes; switch (scale) { case 3: @@ -3211,49 +3232,28 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) tcg_rn = cpu_reg_sp(s, rn); tcg_addr = tcg_temp_new_i64(); tcg_gen_mov_i64(tcg_addr, tcg_rn); + tcg_ebytes = tcg_const_i64(ebytes); for (xs = 0; xs < selem; xs++) { if (replicate) { /* Load and replicate to all elements */ - uint64_t mulconst; TCGv_i64 tcg_tmp = tcg_temp_new_i64(); tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), s->be_data + scale); - switch (scale) { - case 0: - mulconst = 0x0101010101010101ULL; - break; - case 1: - mulconst = 0x0001000100010001ULL; - break; - case 2: - mulconst = 0x0000000100000001ULL; - break; - case 3: - mulconst = 0; - break; - default: - g_assert_not_reached(); - } - if (mulconst) { - tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst); - } - write_vec_element(s, tcg_tmp, rt, 0, MO_64); - if (is_q) { - write_vec_element(s, tcg_tmp, rt, 1, MO_64); - } + tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt), + (is_q + 1) * 8, vec_full_reg_size(s), + tcg_tmp); tcg_temp_free_i64(tcg_tmp); - clear_vec_high(s, is_q, rt); } else { /* Load/store one element per register */ if (is_load) { - do_vec_ld(s, rt, index, tcg_addr, scale); + do_vec_ld(s, rt, index, tcg_addr, scale, s->be_data); } else { - do_vec_st(s, rt, index, tcg_addr, scale); + do_vec_st(s, rt, index, tcg_addr, scale, s->be_data); } } - tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes); + tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes); rt = (rt + 1) % 32; } @@ -3265,6 +3265,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); } } + tcg_temp_free_i64(tcg_ebytes); tcg_temp_free_i64(tcg_addr); } @@ -4564,7 +4565,7 @@ static void handle_crc32(DisasContext *s, TCGv_i64 tcg_acc, tcg_val; TCGv_i32 tcg_bytes; - if (!arm_dc_feature(s, ARM_FEATURE_CRC) + if (!dc_isar_feature(aa64_crc32, s) || (sf == 1 && sz != 3) || (sf == 0 && sz == 3)) { unallocated_encoding(s); @@ -4806,7 +4807,7 @@ static void disas_fp_compare(DisasContext *s, uint32_t insn) break; case 3: size = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -4857,7 +4858,7 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn) break; case 3: size = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -4923,7 +4924,7 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn) break; case 3: sz = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -5256,7 +5257,7 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) handle_fp_1src_double(s, opcode, rd, rn); break; case 3: - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -5471,7 +5472,7 @@ static void disas_fp_2src(DisasContext *s, uint32_t insn) handle_fp_2src_double(s, opcode, rd, rn, rm); break; case 3: - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -5629,7 +5630,7 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn) handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); break; case 3: - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -5699,7 +5700,7 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn) break; case 3: sz = MO_16; - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -5924,7 +5925,7 @@ static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) case 1: /* float64 */ break; case 3: /* float16 */ - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -6054,7 +6055,7 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn) break; case 0x6: /* 16-bit float, 32-bit int */ case 0xe: /* 16-bit float, 64-bit int */ - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -6081,7 +6082,7 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn) case 1: /* float64 */ break; case 3: /* float16 */ - if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (dc_isar_feature(aa64_fp16, s)) { break; } /* fallthru */ @@ -6518,7 +6519,7 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) */ is_min = extract32(size, 1, 1); is_fp = true; - if (!is_u && arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!is_u && dc_isar_feature(aa64_fp16, s)) { size = 1; } else if (!is_u || !is_q || extract32(size, 0, 1)) { unallocated_encoding(s); @@ -6914,7 +6915,7 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { /* Check for FMOV (vector, immediate) - half-precision */ - if (!(arm_dc_feature(s, ARM_FEATURE_V8_FP16) && o2 && cmode == 0xf)) { + if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) { unallocated_encoding(s); return; } @@ -7081,7 +7082,7 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) case 0x2f: /* FMINP */ /* FP op, size[0] is 32 or 64 bit*/ if (!u) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } else { @@ -7726,7 +7727,7 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, size = MO_32; } else if (immh & 2) { size = MO_16; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -7771,7 +7772,7 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, size = MO_32; } else if (immh & 0x2) { size = MO_16; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -8036,28 +8037,6 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) } } -/* CMTST : test is "if (X & Y != 0)". */ -static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_and_i32(d, a, b); - tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0); - tcg_gen_neg_i32(d, d); -} - -static void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_and_i64(d, a, b); - tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0); - tcg_gen_neg_i64(d, d); -} - -static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_and_vec(vece, d, a, b); - tcg_gen_dupi_vec(vece, a, 0); - tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); -} - static void handle_3same_64(DisasContext *s, int opcode, bool u, TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) { @@ -8535,7 +8514,7 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, return; } - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); } @@ -8608,7 +8587,7 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, bool u = extract32(insn, 29, 1); TCGv_i32 ele1, ele2, ele3; TCGv_i64 res; - int feature; + bool feature; switch (u * 16 + opcode) { case 0x10: /* SQRDMLAH (vector) */ @@ -8617,13 +8596,13 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_RDM; + feature = dc_isar_feature(aa64_rdm, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -9397,191 +9376,10 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) } } -static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar8i_i64(a, a, shift); - tcg_gen_vec_add8_i64(d, d, a); -} - -static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar16i_i64(a, a, shift); - tcg_gen_vec_add16_i64(d, d, a); -} - -static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_sari_i32(a, a, shift); - tcg_gen_add_i32(d, d, a); -} - -static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_sari_i64(a, a, shift); - tcg_gen_add_i64(d, d, a); -} - -static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - tcg_gen_sari_vec(vece, a, a, sh); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr8i_i64(a, a, shift); - tcg_gen_vec_add8_i64(d, d, a); -} - -static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr16i_i64(a, a, shift); - tcg_gen_vec_add16_i64(d, d, a); -} - -static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(a, a, shift); - tcg_gen_add_i32(d, d, a); -} - -static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_shri_i64(a, a, shift); - tcg_gen_add_i64(d, d, a); -} - -static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - tcg_gen_shri_vec(vece, a, a, sh); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_8, 0xff >> shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_16, 0xffff >> shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(a, a, shift); - tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); -} - -static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_shri_i64(a, a, shift); - tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); -} - -static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - uint64_t mask = (2ull << ((8 << vece) - 1)) - 1; - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec m = tcg_temp_new_vec_matching(d); - - tcg_gen_dupi_vec(vece, m, mask ^ (mask >> sh)); - tcg_gen_shri_vec(vece, t, a, sh); - tcg_gen_and_vec(vece, d, d, m); - tcg_gen_or_vec(vece, d, d, t); - - tcg_temp_free_vec(t); - tcg_temp_free_vec(m); -} - /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, int immh, int immb, int opcode, int rn, int rd) { - static const GVecGen2i ssra_op[4] = { - { .fni8 = gen_ssra8_i64, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_8 }, - { .fni8 = gen_ssra16_i64, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_16 }, - { .fni4 = gen_ssra32_i32, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_32 }, - { .fni8 = gen_ssra64_i64, - .fniv = gen_ssra_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_64 }, - }; - static const GVecGen2i usra_op[4] = { - { .fni8 = gen_usra8_i64, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_8, }, - { .fni8 = gen_usra16_i64, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_16, }, - { .fni4 = gen_usra32_i32, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_32, }, - { .fni8 = gen_usra64_i64, - .fniv = gen_usra_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_64, }, - }; - static const GVecGen2i sri_op[4] = { - { .fni8 = gen_shr8_ins_i64, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_8 }, - { .fni8 = gen_shr16_ins_i64, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_16 }, - { .fni4 = gen_shr32_ins_i32, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_32 }, - { .fni8 = gen_shr64_ins_i64, - .fniv = gen_shr_ins_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_64 }, - }; - int size = 32 - clz32(immh) - 1; int immhb = immh << 3 | immb; int shift = 2 * (8 << size) - immhb; @@ -9677,85 +9475,10 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, clear_vec_high(s, is_q, rd); } -static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_8, 0xff << shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shli_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_16, 0xffff << shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shli_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); -} - -static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); -} - -static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - uint64_t mask = (1ull << sh) - 1; - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec m = tcg_temp_new_vec_matching(d); - - tcg_gen_dupi_vec(vece, m, mask); - tcg_gen_shli_vec(vece, t, a, sh); - tcg_gen_and_vec(vece, d, d, m); - tcg_gen_or_vec(vece, d, d, t); - - tcg_temp_free_vec(t); - tcg_temp_free_vec(m); -} - /* SHL/SLI - Vector shift left */ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, int immh, int immb, int opcode, int rn, int rd) { - static const GVecGen2i shi_op[4] = { - { .fni8 = gen_shl8_ins_i64, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_8 }, - { .fni8 = gen_shl16_ins_i64, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_shl32_ins_i32, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_shl64_ins_i64, - .fniv = gen_shl_ins_vec, - .opc = INDEX_op_shli_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_64 }, - }; int size = 32 - clz32(immh) - 1; int immhb = immh << 3 | immb; int shift = immhb - (8 << size); @@ -9775,7 +9498,7 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, } if (insert) { - gen_gvec_op2i(s, is_q, rd, rn, shift, &shi_op[size]); + gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]); } else { gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); } @@ -10352,7 +10075,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) return; } if (size == 3) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) { + if (!dc_isar_feature(aa64_pmull, s)) { unallocated_encoding(s); return; } @@ -10397,70 +10120,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) } } -static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(rn, rn, rm); - tcg_gen_and_i64(rn, rn, rd); - tcg_gen_xor_i64(rd, rm, rn); -} - -static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(rn, rn, rd); - tcg_gen_and_i64(rn, rn, rm); - tcg_gen_xor_i64(rd, rd, rn); -} - -static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) -{ - tcg_gen_xor_i64(rn, rn, rd); - tcg_gen_andc_i64(rn, rn, rm); - tcg_gen_xor_i64(rd, rd, rn); -} - -static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(vece, rn, rn, rm); - tcg_gen_and_vec(vece, rn, rn, rd); - tcg_gen_xor_vec(vece, rd, rm, rn); -} - -static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(vece, rn, rn, rd); - tcg_gen_and_vec(vece, rn, rn, rm); - tcg_gen_xor_vec(vece, rd, rd, rn); -} - -static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) -{ - tcg_gen_xor_vec(vece, rn, rn, rd); - tcg_gen_andc_vec(vece, rn, rn, rm); - tcg_gen_xor_vec(vece, rd, rd, rn); -} - /* Logic op (opcode == 3) subgroup of C3.6.16. */ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) { - static const GVecGen3 bsl_op = { - .fni8 = gen_bsl_i64, - .fniv = gen_bsl_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - static const GVecGen3 bit_op = { - .fni8 = gen_bit_i64, - .fniv = gen_bit_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - static const GVecGen3 bif_op = { - .fni8 = gen_bif_i64, - .fniv = gen_bif_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true - }; - int rd = extract32(insn, 0, 5); int rn = extract32(insn, 5, 5); int rm = extract32(insn, 16, 5); @@ -10732,131 +10394,9 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) } } -static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u8(a, a, b); - gen_helper_neon_add_u8(d, d, a); -} - -static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u16(a, a, b); - gen_helper_neon_add_u16(d, d, a); -} - -static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_mul_i32(a, a, b); - tcg_gen_add_i32(d, d, a); -} - -static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_mul_i64(a, a, b); - tcg_gen_add_i64(d, d, a); -} - -static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_mul_vec(vece, a, a, b); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u8(a, a, b); - gen_helper_neon_sub_u8(d, d, a); -} - -static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u16(a, a, b); - gen_helper_neon_sub_u16(d, d, a); -} - -static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_mul_i32(a, a, b); - tcg_gen_sub_i32(d, d, a); -} - -static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_mul_i64(a, a, b); - tcg_gen_sub_i64(d, d, a); -} - -static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_mul_vec(vece, a, a, b); - tcg_gen_sub_vec(vece, d, d, a); -} - /* Integer op subgroup of C3.6.16. */ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) { - static const GVecGen3 cmtst_op[4] = { - { .fni4 = gen_helper_neon_tst_u8, - .fniv = gen_cmtst_vec, - .vece = MO_8 }, - { .fni4 = gen_helper_neon_tst_u16, - .fniv = gen_cmtst_vec, - .vece = MO_16 }, - { .fni4 = gen_cmtst_i32, - .fniv = gen_cmtst_vec, - .vece = MO_32 }, - { .fni8 = gen_cmtst_i64, - .fniv = gen_cmtst_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .vece = MO_64 }, - }; - static const GVecGen3 mla_op[4] = { - { .fni4 = gen_mla8_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_8 }, - { .fni4 = gen_mla16_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_mla32_i32, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_mla64_i64, - .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_64 }, - }; - static const GVecGen3 mls_op[4] = { - { .fni4 = gen_mls8_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_8 }, - { .fni4 = gen_mls16_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_mls32_i32, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_mls64_i64, - .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .vece = MO_64 }, - }; - int is_q = extract32(insn, 30, 1); int u = extract32(insn, 29, 1); int size = extract32(insn, 22, 2); @@ -11216,7 +10756,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) TCGv_ptr fpst; bool pairwise = false; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -11404,7 +10944,8 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) int size = extract32(insn, 22, 2); bool u = extract32(insn, 29, 1); bool is_q = extract32(insn, 30, 1); - int feature, rot; + bool feature; + int rot; switch (u * 16 + opcode) { case 0x10: /* SQRDMLAH (vector) */ @@ -11413,7 +10954,7 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_RDM; + feature = dc_isar_feature(aa64_rdm, s); break; case 0x02: /* SDOT (vector) */ case 0x12: /* UDOT (vector) */ @@ -11421,7 +10962,7 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_DOTPROD; + feature = dc_isar_feature(aa64_dp, s); break; case 0x18: /* FCMLA, #0 */ case 0x19: /* FCMLA, #90 */ @@ -11430,18 +10971,18 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) case 0x1c: /* FCADD, #90 */ case 0x1e: /* FCADD, #270 */ if (size == 0 - || (size == 1 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) + || (size == 1 && !dc_isar_feature(aa64_fp16, s)) || (size == 3 && !is_q)) { unallocated_encoding(s); return; } - feature = ARM_FEATURE_V8_FCMA; + feature = dc_isar_feature(aa64_fcma, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -12310,7 +11851,7 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) bool need_fpst = true; int rmode; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -12655,14 +12196,14 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) break; case 0x1d: /* SQRDMLAH */ case 0x1f: /* SQRDMLSH */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) { + if (!dc_isar_feature(aa64_rdm, s)) { unallocated_encoding(s); return; } break; case 0x0e: /* SDOT */ case 0x1e: /* UDOT */ - if (size != MO_32 || !arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) { + if (size != MO_32 || !dc_isar_feature(aa64_dp, s)) { unallocated_encoding(s); return; } @@ -12671,7 +12212,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) case 0x13: /* FCMLA #90 */ case 0x15: /* FCMLA #180 */ case 0x17: /* FCMLA #270 */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) { + if (!dc_isar_feature(aa64_fcma, s)) { unallocated_encoding(s); return; } @@ -12727,7 +12268,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } break; } - if (is_fp16 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); return; } @@ -13198,8 +12739,7 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) TCGv_i32 tcg_decrypt; CryptoThreeOpIntFn *genfn; - if (!arm_dc_feature(s, ARM_FEATURE_V8_AES) - || size != 0) { + if (!dc_isar_feature(aa64_aes, s) || size != 0) { unallocated_encoding(s); return; } @@ -13256,7 +12796,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) int rd = extract32(insn, 0, 5); CryptoThreeOpFn *genfn; TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; - int feature = ARM_FEATURE_V8_SHA256; + bool feature; if (size != 0) { unallocated_encoding(s); @@ -13269,23 +12809,26 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) case 2: /* SHA1M */ case 3: /* SHA1SU0 */ genfn = NULL; - feature = ARM_FEATURE_V8_SHA1; + feature = dc_isar_feature(aa64_sha1, s); break; case 4: /* SHA256H */ genfn = gen_helper_crypto_sha256h; + feature = dc_isar_feature(aa64_sha256, s); break; case 5: /* SHA256H2 */ genfn = gen_helper_crypto_sha256h2; + feature = dc_isar_feature(aa64_sha256, s); break; case 6: /* SHA256SU1 */ genfn = gen_helper_crypto_sha256su1; + feature = dc_isar_feature(aa64_sha256, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13326,7 +12869,7 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); CryptoTwoOpFn *genfn; - int feature; + bool feature; TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; if (size != 0) { @@ -13336,15 +12879,15 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SHA1H */ - feature = ARM_FEATURE_V8_SHA1; + feature = dc_isar_feature(aa64_sha1, s); genfn = gen_helper_crypto_sha1h; break; case 1: /* SHA1SU1 */ - feature = ARM_FEATURE_V8_SHA1; + feature = dc_isar_feature(aa64_sha1, s); genfn = gen_helper_crypto_sha1su1; break; case 2: /* SHA256SU0 */ - feature = ARM_FEATURE_V8_SHA256; + feature = dc_isar_feature(aa64_sha256, s); genfn = gen_helper_crypto_sha256su0; break; default: @@ -13352,7 +12895,7 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13383,40 +12926,40 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - int feature; + bool feature; CryptoThreeOpFn *genfn; if (o == 0) { switch (opcode) { case 0: /* SHA512H */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512h; break; case 1: /* SHA512H2 */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512h2; break; case 2: /* SHA512SU1 */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512su1; break; case 3: /* RAX1 */ - feature = ARM_FEATURE_V8_SHA3; + feature = dc_isar_feature(aa64_sha3, s); genfn = NULL; break; } } else { switch (opcode) { case 0: /* SM3PARTW1 */ - feature = ARM_FEATURE_V8_SM3; + feature = dc_isar_feature(aa64_sm3, s); genfn = gen_helper_crypto_sm3partw1; break; case 1: /* SM3PARTW2 */ - feature = ARM_FEATURE_V8_SM3; + feature = dc_isar_feature(aa64_sm3, s); genfn = gen_helper_crypto_sm3partw2; break; case 2: /* SM4EKEY */ - feature = ARM_FEATURE_V8_SM4; + feature = dc_isar_feature(aa64_sm4, s); genfn = gen_helper_crypto_sm4ekey; break; default: @@ -13425,7 +12968,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) } } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13484,16 +13027,16 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; - int feature; + bool feature; CryptoTwoOpFn *genfn; switch (opcode) { case 0: /* SHA512SU0 */ - feature = ARM_FEATURE_V8_SHA512; + feature = dc_isar_feature(aa64_sha512, s); genfn = gen_helper_crypto_sha512su0; break; case 1: /* SM4E */ - feature = ARM_FEATURE_V8_SM4; + feature = dc_isar_feature(aa64_sm4, s); genfn = gen_helper_crypto_sm4e; break; default: @@ -13501,7 +13044,7 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13532,22 +13075,22 @@ static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) int ra = extract32(insn, 10, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - int feature; + bool feature; switch (op0) { case 0: /* EOR3 */ case 1: /* BCAX */ - feature = ARM_FEATURE_V8_SHA3; + feature = dc_isar_feature(aa64_sha3, s); break; case 2: /* SM3SS1 */ - feature = ARM_FEATURE_V8_SM3; + feature = dc_isar_feature(aa64_sm3, s); break; default: unallocated_encoding(s); return; } - if (!arm_dc_feature(s, feature)) { + if (!feature) { unallocated_encoding(s); return; } @@ -13634,7 +13177,7 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn) TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; int pass; - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA3)) { + if (!dc_isar_feature(aa64_sha3, s)) { unallocated_encoding(s); return; } @@ -13680,7 +13223,7 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; TCGv_i32 tcg_imm2, tcg_opcode; - if (!arm_dc_feature(s, ARM_FEATURE_V8_SM3)) { + if (!dc_isar_feature(aa64_sm3, s)) { unallocated_encoding(s); return; } @@ -13788,7 +13331,7 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) unallocated_encoding(s); break; case 0x2: - if (!arm_dc_feature(s, ARM_FEATURE_SVE) || !disas_sve(s, insn)) { + if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) { unallocated_encoding(s); } break; @@ -13829,6 +13372,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, ARMCPU *arm_cpu = arm_env_get_cpu(env); int bound; + dc->isar = &arm_cpu->isar; dc->pc = dc->base.pc_first; dc->condjmp = 0; @@ -13892,7 +13436,6 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) { - tcg_clear_temp_count(); } static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 667879564f..fe7aebdc19 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -4600,62 +4600,97 @@ static const uint8_t dtype_esz[16] = { 3, 2, 1, 3 }; +static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype) +{ + return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s)); +} + static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, - gen_helper_gvec_mem *fn) + int dtype, gen_helper_gvec_mem *fn) { unsigned vsz = vec_full_reg_size(s); TCGv_ptr t_pg; - TCGv_i32 desc; + TCGv_i32 t_desc; + int desc; /* For e.g. LD4, there are not enough arguments to pass all 4 * registers as pointers, so encode the regno into the data field. * For consistency, do this even for LD1. */ - desc = tcg_const_i32(simd_desc(vsz, vsz, zt)); + desc = sve_memopidx(s, dtype); + desc |= zt << MEMOPIDX_SHIFT; + desc = simd_desc(vsz, vsz, desc); + t_desc = tcg_const_i32(desc); t_pg = tcg_temp_new_ptr(); tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); - fn(cpu_env, t_pg, addr, desc); + fn(cpu_env, t_pg, addr, t_desc); tcg_temp_free_ptr(t_pg); - tcg_temp_free_i32(desc); + tcg_temp_free_i32(t_desc); } static void do_ld_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype, int nreg) { - static gen_helper_gvec_mem * const fns[16][4] = { - { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, - gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, - { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1sds_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r, - gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r }, - { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1hds_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hss_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r, - gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r }, - { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r, - gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r }, + static gen_helper_gvec_mem * const fns[2][16][4] = { + /* Little-endian */ + { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, + gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, + { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, + gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, + { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, + gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, + { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, + gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, + + /* Big-endian */ + { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, + gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, + { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, + gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, + { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, + gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, + { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, + gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }; - gen_helper_gvec_mem *fn = fns[dtype][nreg]; + gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg]; /* While there are holes in the table, they are not * accessible via the instruction encoding. */ assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, fn); + do_mem_zpa(s, zt, pg, addr, dtype, fn); } static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn) @@ -4689,59 +4724,104 @@ static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn) static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn) { - static gen_helper_gvec_mem * const fns[16] = { - gen_helper_sve_ldff1bb_r, - gen_helper_sve_ldff1bhu_r, - gen_helper_sve_ldff1bsu_r, - gen_helper_sve_ldff1bdu_r, - - gen_helper_sve_ldff1sds_r, - gen_helper_sve_ldff1hh_r, - gen_helper_sve_ldff1hsu_r, - gen_helper_sve_ldff1hdu_r, - - gen_helper_sve_ldff1hds_r, - gen_helper_sve_ldff1hss_r, - gen_helper_sve_ldff1ss_r, - gen_helper_sve_ldff1sdu_r, - - gen_helper_sve_ldff1bds_r, - gen_helper_sve_ldff1bss_r, - gen_helper_sve_ldff1bhs_r, - gen_helper_sve_ldff1dd_r, + static gen_helper_gvec_mem * const fns[2][16] = { + /* Little-endian */ + { gen_helper_sve_ldff1bb_r, + gen_helper_sve_ldff1bhu_r, + gen_helper_sve_ldff1bsu_r, + gen_helper_sve_ldff1bdu_r, + + gen_helper_sve_ldff1sds_le_r, + gen_helper_sve_ldff1hh_le_r, + gen_helper_sve_ldff1hsu_le_r, + gen_helper_sve_ldff1hdu_le_r, + + gen_helper_sve_ldff1hds_le_r, + gen_helper_sve_ldff1hss_le_r, + gen_helper_sve_ldff1ss_le_r, + gen_helper_sve_ldff1sdu_le_r, + + gen_helper_sve_ldff1bds_r, + gen_helper_sve_ldff1bss_r, + gen_helper_sve_ldff1bhs_r, + gen_helper_sve_ldff1dd_le_r }, + + /* Big-endian */ + { gen_helper_sve_ldff1bb_r, + gen_helper_sve_ldff1bhu_r, + gen_helper_sve_ldff1bsu_r, + gen_helper_sve_ldff1bdu_r, + + gen_helper_sve_ldff1sds_be_r, + gen_helper_sve_ldff1hh_be_r, + gen_helper_sve_ldff1hsu_be_r, + gen_helper_sve_ldff1hdu_be_r, + + gen_helper_sve_ldff1hds_be_r, + gen_helper_sve_ldff1hss_be_r, + gen_helper_sve_ldff1ss_be_r, + gen_helper_sve_ldff1sdu_be_r, + + gen_helper_sve_ldff1bds_r, + gen_helper_sve_ldff1bss_r, + gen_helper_sve_ldff1bhs_r, + gen_helper_sve_ldff1dd_be_r }, }; if (sve_access_check(s)) { TCGv_i64 addr = new_tmp_a64(s); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); - do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]); + do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, + fns[s->be_data == MO_BE][a->dtype]); } return true; } static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn) { - static gen_helper_gvec_mem * const fns[16] = { - gen_helper_sve_ldnf1bb_r, - gen_helper_sve_ldnf1bhu_r, - gen_helper_sve_ldnf1bsu_r, - gen_helper_sve_ldnf1bdu_r, - - gen_helper_sve_ldnf1sds_r, - gen_helper_sve_ldnf1hh_r, - gen_helper_sve_ldnf1hsu_r, - gen_helper_sve_ldnf1hdu_r, - - gen_helper_sve_ldnf1hds_r, - gen_helper_sve_ldnf1hss_r, - gen_helper_sve_ldnf1ss_r, - gen_helper_sve_ldnf1sdu_r, - - gen_helper_sve_ldnf1bds_r, - gen_helper_sve_ldnf1bss_r, - gen_helper_sve_ldnf1bhs_r, - gen_helper_sve_ldnf1dd_r, + static gen_helper_gvec_mem * const fns[2][16] = { + /* Little-endian */ + { gen_helper_sve_ldnf1bb_r, + gen_helper_sve_ldnf1bhu_r, + gen_helper_sve_ldnf1bsu_r, + gen_helper_sve_ldnf1bdu_r, + + gen_helper_sve_ldnf1sds_le_r, + gen_helper_sve_ldnf1hh_le_r, + gen_helper_sve_ldnf1hsu_le_r, + gen_helper_sve_ldnf1hdu_le_r, + + gen_helper_sve_ldnf1hds_le_r, + gen_helper_sve_ldnf1hss_le_r, + gen_helper_sve_ldnf1ss_le_r, + gen_helper_sve_ldnf1sdu_le_r, + + gen_helper_sve_ldnf1bds_r, + gen_helper_sve_ldnf1bss_r, + gen_helper_sve_ldnf1bhs_r, + gen_helper_sve_ldnf1dd_le_r }, + + /* Big-endian */ + { gen_helper_sve_ldnf1bb_r, + gen_helper_sve_ldnf1bhu_r, + gen_helper_sve_ldnf1bsu_r, + gen_helper_sve_ldnf1bdu_r, + + gen_helper_sve_ldnf1sds_be_r, + gen_helper_sve_ldnf1hh_be_r, + gen_helper_sve_ldnf1hsu_be_r, + gen_helper_sve_ldnf1hdu_be_r, + + gen_helper_sve_ldnf1hds_be_r, + gen_helper_sve_ldnf1hss_be_r, + gen_helper_sve_ldnf1ss_be_r, + gen_helper_sve_ldnf1sdu_be_r, + + gen_helper_sve_ldnf1bds_r, + gen_helper_sve_ldnf1bss_r, + gen_helper_sve_ldnf1bhs_r, + gen_helper_sve_ldnf1dd_be_r }, }; if (sve_access_check(s)) { @@ -4751,30 +4831,57 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn) TCGv_i64 addr = new_tmp_a64(s); tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); - do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]); + do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, + fns[s->be_data == MO_BE][a->dtype]); } return true; } static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz) { - static gen_helper_gvec_mem * const fns[4] = { - gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r, - gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r, + static gen_helper_gvec_mem * const fns[2][4] = { + { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r, + gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r }, + { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r, + gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r }, }; unsigned vsz = vec_full_reg_size(s); TCGv_ptr t_pg; - TCGv_i32 desc; + TCGv_i32 t_desc; + int desc, poff; /* Load the first quadword using the normal predicated load helpers. */ - desc = tcg_const_i32(simd_desc(16, 16, zt)); + desc = sve_memopidx(s, msz_dtype(msz)); + desc |= zt << MEMOPIDX_SHIFT; + desc = simd_desc(16, 16, desc); + t_desc = tcg_const_i32(desc); + + poff = pred_full_reg_offset(s, pg); + if (vsz > 16) { + /* + * Zero-extend the first 16 bits of the predicate into a temporary. + * This avoids triggering an assert making sure we don't have bits + * set within a predicate beyond VQ, but we have lowered VQ to 1 + * for this load operation. + */ + TCGv_i64 tmp = tcg_temp_new_i64(); +#ifdef HOST_WORDS_BIGENDIAN + poff += 6; +#endif + tcg_gen_ld16u_i64(tmp, cpu_env, poff); + + poff = offsetof(CPUARMState, vfp.preg_tmp); + tcg_gen_st_i64(tmp, cpu_env, poff); + tcg_temp_free_i64(tmp); + } + t_pg = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(t_pg, cpu_env, poff); - tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); - fns[msz](cpu_env, t_pg, addr, desc); + fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc); tcg_temp_free_ptr(t_pg); - tcg_temp_free_i32(desc); + tcg_temp_free_i32(t_desc); /* Replicate that first quadword. */ if (vsz > 16) { @@ -4860,35 +4967,73 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn) static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz, int esz, int nreg) { - static gen_helper_gvec_mem * const fn_single[4][4] = { - { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r, - gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r }, - { NULL, gen_helper_sve_st1hh_r, - gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r }, - { NULL, NULL, - gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r }, - { NULL, NULL, NULL, gen_helper_sve_st1dd_r }, + static gen_helper_gvec_mem * const fn_single[2][4][4] = { + { { gen_helper_sve_st1bb_r, + gen_helper_sve_st1bh_r, + gen_helper_sve_st1bs_r, + gen_helper_sve_st1bd_r }, + { NULL, + gen_helper_sve_st1hh_le_r, + gen_helper_sve_st1hs_le_r, + gen_helper_sve_st1hd_le_r }, + { NULL, NULL, + gen_helper_sve_st1ss_le_r, + gen_helper_sve_st1sd_le_r }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_le_r } }, + { { gen_helper_sve_st1bb_r, + gen_helper_sve_st1bh_r, + gen_helper_sve_st1bs_r, + gen_helper_sve_st1bd_r }, + { NULL, + gen_helper_sve_st1hh_be_r, + gen_helper_sve_st1hs_be_r, + gen_helper_sve_st1hd_be_r }, + { NULL, NULL, + gen_helper_sve_st1ss_be_r, + gen_helper_sve_st1sd_be_r }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_be_r } }, }; - static gen_helper_gvec_mem * const fn_multiple[3][4] = { - { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r, - gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r }, - { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r, - gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r }, - { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r, - gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r }, + static gen_helper_gvec_mem * const fn_multiple[2][3][4] = { + { { gen_helper_sve_st2bb_r, + gen_helper_sve_st2hh_le_r, + gen_helper_sve_st2ss_le_r, + gen_helper_sve_st2dd_le_r }, + { gen_helper_sve_st3bb_r, + gen_helper_sve_st3hh_le_r, + gen_helper_sve_st3ss_le_r, + gen_helper_sve_st3dd_le_r }, + { gen_helper_sve_st4bb_r, + gen_helper_sve_st4hh_le_r, + gen_helper_sve_st4ss_le_r, + gen_helper_sve_st4dd_le_r } }, + { { gen_helper_sve_st2bb_r, + gen_helper_sve_st2hh_be_r, + gen_helper_sve_st2ss_be_r, + gen_helper_sve_st2dd_be_r }, + { gen_helper_sve_st3bb_r, + gen_helper_sve_st3hh_be_r, + gen_helper_sve_st3ss_be_r, + gen_helper_sve_st3dd_be_r }, + { gen_helper_sve_st4bb_r, + gen_helper_sve_st4hh_be_r, + gen_helper_sve_st4ss_be_r, + gen_helper_sve_st4dd_be_r } }, }; gen_helper_gvec_mem *fn; + int be = s->be_data == MO_BE; if (nreg == 0) { /* ST1 */ - fn = fn_single[msz][esz]; + fn = fn_single[be][msz][esz]; } else { /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ assert(msz == esz); - fn = fn_multiple[nreg - 1][msz]; + fn = fn_multiple[be][nreg - 1][msz]; } assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, fn); + do_mem_zpa(s, zt, pg, addr, msz_dtype(msz), fn); } static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn) @@ -4926,111 +5071,203 @@ static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn) *** SVE gather loads / scatter stores */ -static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale, - TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn) +static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, + int scale, TCGv_i64 scalar, int msz, + gen_helper_gvec_mem_scatter *fn) { unsigned vsz = vec_full_reg_size(s); - TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale)); TCGv_ptr t_zm = tcg_temp_new_ptr(); TCGv_ptr t_pg = tcg_temp_new_ptr(); TCGv_ptr t_zt = tcg_temp_new_ptr(); + TCGv_i32 t_desc; + int desc; + + desc = sve_memopidx(s, msz_dtype(msz)); + desc |= scale << MEMOPIDX_SHIFT; + desc = simd_desc(vsz, vsz, desc); + t_desc = tcg_const_i32(desc); tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm)); tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt)); - fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc); + fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc); tcg_temp_free_ptr(t_zt); tcg_temp_free_ptr(t_zm); tcg_temp_free_ptr(t_pg); - tcg_temp_free_i32(desc); + tcg_temp_free_i32(t_desc); } -/* Indexed by [ff][xs][u][msz]. */ -static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = { - { { { gen_helper_sve_ldbss_zsu, - gen_helper_sve_ldhss_zsu, - NULL, }, - { gen_helper_sve_ldbsu_zsu, - gen_helper_sve_ldhsu_zsu, - gen_helper_sve_ldssu_zsu, } }, - { { gen_helper_sve_ldbss_zss, - gen_helper_sve_ldhss_zss, - NULL, }, - { gen_helper_sve_ldbsu_zss, - gen_helper_sve_ldhsu_zss, - gen_helper_sve_ldssu_zss, } } }, - - { { { gen_helper_sve_ldffbss_zsu, - gen_helper_sve_ldffhss_zsu, - NULL, }, - { gen_helper_sve_ldffbsu_zsu, - gen_helper_sve_ldffhsu_zsu, - gen_helper_sve_ldffssu_zsu, } }, - { { gen_helper_sve_ldffbss_zss, - gen_helper_sve_ldffhss_zss, - NULL, }, - { gen_helper_sve_ldffbsu_zss, - gen_helper_sve_ldffhsu_zss, - gen_helper_sve_ldffssu_zss, } } } +/* Indexed by [be][ff][xs][u][msz]. */ +static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = { + /* Little-endian */ + { { { { gen_helper_sve_ldbss_zsu, + gen_helper_sve_ldhss_le_zsu, + NULL, }, + { gen_helper_sve_ldbsu_zsu, + gen_helper_sve_ldhsu_le_zsu, + gen_helper_sve_ldss_le_zsu, } }, + { { gen_helper_sve_ldbss_zss, + gen_helper_sve_ldhss_le_zss, + NULL, }, + { gen_helper_sve_ldbsu_zss, + gen_helper_sve_ldhsu_le_zss, + gen_helper_sve_ldss_le_zss, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu, + gen_helper_sve_ldffhss_le_zsu, + NULL, }, + { gen_helper_sve_ldffbsu_zsu, + gen_helper_sve_ldffhsu_le_zsu, + gen_helper_sve_ldffss_le_zsu, } }, + { { gen_helper_sve_ldffbss_zss, + gen_helper_sve_ldffhss_le_zss, + NULL, }, + { gen_helper_sve_ldffbsu_zss, + gen_helper_sve_ldffhsu_le_zss, + gen_helper_sve_ldffss_le_zss, } } } }, + + /* Big-endian */ + { { { { gen_helper_sve_ldbss_zsu, + gen_helper_sve_ldhss_be_zsu, + NULL, }, + { gen_helper_sve_ldbsu_zsu, + gen_helper_sve_ldhsu_be_zsu, + gen_helper_sve_ldss_be_zsu, } }, + { { gen_helper_sve_ldbss_zss, + gen_helper_sve_ldhss_be_zss, + NULL, }, + { gen_helper_sve_ldbsu_zss, + gen_helper_sve_ldhsu_be_zss, + gen_helper_sve_ldss_be_zss, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu, + gen_helper_sve_ldffhss_be_zsu, + NULL, }, + { gen_helper_sve_ldffbsu_zsu, + gen_helper_sve_ldffhsu_be_zsu, + gen_helper_sve_ldffss_be_zsu, } }, + { { gen_helper_sve_ldffbss_zss, + gen_helper_sve_ldffhss_be_zss, + NULL, }, + { gen_helper_sve_ldffbsu_zss, + gen_helper_sve_ldffhsu_be_zss, + gen_helper_sve_ldffss_be_zss, } } } }, }; /* Note that we overload xs=2 to indicate 64-bit offset. */ -static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = { - { { { gen_helper_sve_ldbds_zsu, - gen_helper_sve_ldhds_zsu, - gen_helper_sve_ldsds_zsu, - NULL, }, - { gen_helper_sve_ldbdu_zsu, - gen_helper_sve_ldhdu_zsu, - gen_helper_sve_ldsdu_zsu, - gen_helper_sve_ldddu_zsu, } }, - { { gen_helper_sve_ldbds_zss, - gen_helper_sve_ldhds_zss, - gen_helper_sve_ldsds_zss, - NULL, }, - { gen_helper_sve_ldbdu_zss, - gen_helper_sve_ldhdu_zss, - gen_helper_sve_ldsdu_zss, - gen_helper_sve_ldddu_zss, } }, - { { gen_helper_sve_ldbds_zd, - gen_helper_sve_ldhds_zd, - gen_helper_sve_ldsds_zd, - NULL, }, - { gen_helper_sve_ldbdu_zd, - gen_helper_sve_ldhdu_zd, - gen_helper_sve_ldsdu_zd, - gen_helper_sve_ldddu_zd, } } }, - - { { { gen_helper_sve_ldffbds_zsu, - gen_helper_sve_ldffhds_zsu, - gen_helper_sve_ldffsds_zsu, - NULL, }, - { gen_helper_sve_ldffbdu_zsu, - gen_helper_sve_ldffhdu_zsu, - gen_helper_sve_ldffsdu_zsu, - gen_helper_sve_ldffddu_zsu, } }, - { { gen_helper_sve_ldffbds_zss, - gen_helper_sve_ldffhds_zss, - gen_helper_sve_ldffsds_zss, - NULL, }, - { gen_helper_sve_ldffbdu_zss, - gen_helper_sve_ldffhdu_zss, - gen_helper_sve_ldffsdu_zss, - gen_helper_sve_ldffddu_zss, } }, - { { gen_helper_sve_ldffbds_zd, - gen_helper_sve_ldffhds_zd, - gen_helper_sve_ldffsds_zd, - NULL, }, - { gen_helper_sve_ldffbdu_zd, - gen_helper_sve_ldffhdu_zd, - gen_helper_sve_ldffsdu_zd, - gen_helper_sve_ldffddu_zd, } } } +static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = { + /* Little-endian */ + { { { { gen_helper_sve_ldbds_zsu, + gen_helper_sve_ldhds_le_zsu, + gen_helper_sve_ldsds_le_zsu, + NULL, }, + { gen_helper_sve_ldbdu_zsu, + gen_helper_sve_ldhdu_le_zsu, + gen_helper_sve_ldsdu_le_zsu, + gen_helper_sve_lddd_le_zsu, } }, + { { gen_helper_sve_ldbds_zss, + gen_helper_sve_ldhds_le_zss, + gen_helper_sve_ldsds_le_zss, + NULL, }, + { gen_helper_sve_ldbdu_zss, + gen_helper_sve_ldhdu_le_zss, + gen_helper_sve_ldsdu_le_zss, + gen_helper_sve_lddd_le_zss, } }, + { { gen_helper_sve_ldbds_zd, + gen_helper_sve_ldhds_le_zd, + gen_helper_sve_ldsds_le_zd, + NULL, }, + { gen_helper_sve_ldbdu_zd, + gen_helper_sve_ldhdu_le_zd, + gen_helper_sve_ldsdu_le_zd, + gen_helper_sve_lddd_le_zd, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu, + gen_helper_sve_ldffhds_le_zsu, + gen_helper_sve_ldffsds_le_zsu, + NULL, }, + { gen_helper_sve_ldffbdu_zsu, + gen_helper_sve_ldffhdu_le_zsu, + gen_helper_sve_ldffsdu_le_zsu, + gen_helper_sve_ldffdd_le_zsu, } }, + { { gen_helper_sve_ldffbds_zss, + gen_helper_sve_ldffhds_le_zss, + gen_helper_sve_ldffsds_le_zss, + NULL, }, + { gen_helper_sve_ldffbdu_zss, + gen_helper_sve_ldffhdu_le_zss, + gen_helper_sve_ldffsdu_le_zss, + gen_helper_sve_ldffdd_le_zss, } }, + { { gen_helper_sve_ldffbds_zd, + gen_helper_sve_ldffhds_le_zd, + gen_helper_sve_ldffsds_le_zd, + NULL, }, + { gen_helper_sve_ldffbdu_zd, + gen_helper_sve_ldffhdu_le_zd, + gen_helper_sve_ldffsdu_le_zd, + gen_helper_sve_ldffdd_le_zd, } } } }, + + /* Big-endian */ + { { { { gen_helper_sve_ldbds_zsu, + gen_helper_sve_ldhds_be_zsu, + gen_helper_sve_ldsds_be_zsu, + NULL, }, + { gen_helper_sve_ldbdu_zsu, + gen_helper_sve_ldhdu_be_zsu, + gen_helper_sve_ldsdu_be_zsu, + gen_helper_sve_lddd_be_zsu, } }, + { { gen_helper_sve_ldbds_zss, + gen_helper_sve_ldhds_be_zss, + gen_helper_sve_ldsds_be_zss, + NULL, }, + { gen_helper_sve_ldbdu_zss, + gen_helper_sve_ldhdu_be_zss, + gen_helper_sve_ldsdu_be_zss, + gen_helper_sve_lddd_be_zss, } }, + { { gen_helper_sve_ldbds_zd, + gen_helper_sve_ldhds_be_zd, + gen_helper_sve_ldsds_be_zd, + NULL, }, + { gen_helper_sve_ldbdu_zd, + gen_helper_sve_ldhdu_be_zd, + gen_helper_sve_ldsdu_be_zd, + gen_helper_sve_lddd_be_zd, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu, + gen_helper_sve_ldffhds_be_zsu, + gen_helper_sve_ldffsds_be_zsu, + NULL, }, + { gen_helper_sve_ldffbdu_zsu, + gen_helper_sve_ldffhdu_be_zsu, + gen_helper_sve_ldffsdu_be_zsu, + gen_helper_sve_ldffdd_be_zsu, } }, + { { gen_helper_sve_ldffbds_zss, + gen_helper_sve_ldffhds_be_zss, + gen_helper_sve_ldffsds_be_zss, + NULL, }, + { gen_helper_sve_ldffbdu_zss, + gen_helper_sve_ldffhdu_be_zss, + gen_helper_sve_ldffsdu_be_zss, + gen_helper_sve_ldffdd_be_zss, } }, + { { gen_helper_sve_ldffbds_zd, + gen_helper_sve_ldffhds_be_zd, + gen_helper_sve_ldffsds_be_zd, + NULL, }, + { gen_helper_sve_ldffbdu_zd, + gen_helper_sve_ldffhdu_be_zd, + gen_helper_sve_ldffsdu_be_zd, + gen_helper_sve_ldffdd_be_zd, } } } }, }; static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn) { gen_helper_gvec_mem_scatter *fn = NULL; + int be = s->be_data == MO_BE; if (!sve_access_check(s)) { return true; @@ -5038,22 +5275,23 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn) switch (a->esz) { case MO_32: - fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz]; + fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz]; break; case MO_64: - fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz]; + fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz]; break; } assert(fn != NULL); do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, - cpu_reg_sp(s, a->rn), fn); + cpu_reg_sp(s, a->rn), a->msz, fn); return true; } static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn) { gen_helper_gvec_mem_scatter *fn = NULL; + int be = s->be_data == MO_BE; TCGv_i64 imm; if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { @@ -5065,10 +5303,10 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn) switch (a->esz) { case MO_32: - fn = gather_load_fn32[a->ff][0][a->u][a->msz]; + fn = gather_load_fn32[be][a->ff][0][a->u][a->msz]; break; case MO_64: - fn = gather_load_fn64[a->ff][2][a->u][a->msz]; + fn = gather_load_fn64[be][a->ff][2][a->u][a->msz]; break; } assert(fn != NULL); @@ -5077,40 +5315,63 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn) * by loading the immediate into the scalar parameter. */ imm = tcg_const_i64(a->imm << a->msz); - do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn); + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn); tcg_temp_free_i64(imm); return true; } -/* Indexed by [xs][msz]. */ -static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = { - { gen_helper_sve_stbs_zsu, - gen_helper_sve_sths_zsu, - gen_helper_sve_stss_zsu, }, - { gen_helper_sve_stbs_zss, - gen_helper_sve_sths_zss, - gen_helper_sve_stss_zss, }, +/* Indexed by [be][xs][msz]. */ +static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = { + /* Little-endian */ + { { gen_helper_sve_stbs_zsu, + gen_helper_sve_sths_le_zsu, + gen_helper_sve_stss_le_zsu, }, + { gen_helper_sve_stbs_zss, + gen_helper_sve_sths_le_zss, + gen_helper_sve_stss_le_zss, } }, + /* Big-endian */ + { { gen_helper_sve_stbs_zsu, + gen_helper_sve_sths_be_zsu, + gen_helper_sve_stss_be_zsu, }, + { gen_helper_sve_stbs_zss, + gen_helper_sve_sths_be_zss, + gen_helper_sve_stss_be_zss, } }, }; /* Note that we overload xs=2 to indicate 64-bit offset. */ -static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = { - { gen_helper_sve_stbd_zsu, - gen_helper_sve_sthd_zsu, - gen_helper_sve_stsd_zsu, - gen_helper_sve_stdd_zsu, }, - { gen_helper_sve_stbd_zss, - gen_helper_sve_sthd_zss, - gen_helper_sve_stsd_zss, - gen_helper_sve_stdd_zss, }, - { gen_helper_sve_stbd_zd, - gen_helper_sve_sthd_zd, - gen_helper_sve_stsd_zd, - gen_helper_sve_stdd_zd, }, +static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = { + /* Little-endian */ + { { gen_helper_sve_stbd_zsu, + gen_helper_sve_sthd_le_zsu, + gen_helper_sve_stsd_le_zsu, + gen_helper_sve_stdd_le_zsu, }, + { gen_helper_sve_stbd_zss, + gen_helper_sve_sthd_le_zss, + gen_helper_sve_stsd_le_zss, + gen_helper_sve_stdd_le_zss, }, + { gen_helper_sve_stbd_zd, + gen_helper_sve_sthd_le_zd, + gen_helper_sve_stsd_le_zd, + gen_helper_sve_stdd_le_zd, } }, + /* Big-endian */ + { { gen_helper_sve_stbd_zsu, + gen_helper_sve_sthd_be_zsu, + gen_helper_sve_stsd_be_zsu, + gen_helper_sve_stdd_be_zsu, }, + { gen_helper_sve_stbd_zss, + gen_helper_sve_sthd_be_zss, + gen_helper_sve_stsd_be_zss, + gen_helper_sve_stdd_be_zss, }, + { gen_helper_sve_stbd_zd, + gen_helper_sve_sthd_be_zd, + gen_helper_sve_stsd_be_zd, + gen_helper_sve_stdd_be_zd, } }, }; static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn) { gen_helper_gvec_mem_scatter *fn; + int be = s->be_data == MO_BE; if (a->esz < a->msz || (a->msz == 0 && a->scale)) { return false; @@ -5120,22 +5381,23 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn) } switch (a->esz) { case MO_32: - fn = scatter_store_fn32[a->xs][a->msz]; + fn = scatter_store_fn32[be][a->xs][a->msz]; break; case MO_64: - fn = scatter_store_fn64[a->xs][a->msz]; + fn = scatter_store_fn64[be][a->xs][a->msz]; break; default: g_assert_not_reached(); } do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, - cpu_reg_sp(s, a->rn), fn); + cpu_reg_sp(s, a->rn), a->msz, fn); return true; } static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn) { gen_helper_gvec_mem_scatter *fn = NULL; + int be = s->be_data == MO_BE; TCGv_i64 imm; if (a->esz < a->msz) { @@ -5147,10 +5409,10 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn) switch (a->esz) { case MO_32: - fn = scatter_store_fn32[0][a->msz]; + fn = scatter_store_fn32[be][0][a->msz]; break; case MO_64: - fn = scatter_store_fn64[2][a->msz]; + fn = scatter_store_fn64[be][2][a->msz]; break; } assert(fn != NULL); @@ -5159,7 +5421,7 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn) * by loading the immediate into the scalar parameter. */ imm = tcg_const_i64(a->imm << a->msz); - do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn); + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn); tcg_temp_free_i64(imm); return true; } diff --git a/target/arm/translate.c b/target/arm/translate.c index c6a5d2ac44..7c4675ffd8 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -42,7 +42,7 @@ #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5) /* currently all emulated v5 cores are also v5TE, so don't bother */ #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5) -#define ENABLE_ARCH_5J arm_dc_feature(s, ARM_FEATURE_JAZELLE) +#define ENABLE_ARCH_5J dc_isar_feature(jazelle, s) #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6) #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K) #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2) @@ -72,7 +72,7 @@ static TCGv_i64 cpu_F0d, cpu_F1d; #include "exec/gen-icount.h" -static const char *regnames[] = +static const char * const regnames[] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" }; @@ -239,6 +239,23 @@ static void store_reg(DisasContext *s, int reg, TCGv_i32 var) tcg_temp_free_i32(var); } +/* + * Variant of store_reg which applies v8M stack-limit checks before updating + * SP. If the check fails this will result in an exception being taken. + * We disable the stack checks for CONFIG_USER_ONLY because we have + * no idea what the stack limits should be in that case. + * If stack checking is not being done this just acts like store_reg(). + */ +static void store_sp_checked(DisasContext *s, TCGv_i32 var) +{ +#ifndef CONFIG_USER_ONLY + if (s->v8m_stackcheck) { + gen_helper_v8m_stackcheck(cpu_env, var); + } +#endif + store_reg(s, 13, var); +} + /* Value extensions. */ #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var) #define gen_uxth(var) tcg_gen_ext16u_i32(var, var) @@ -1568,6 +1585,25 @@ neon_reg_offset (int reg, int n) return vfp_reg_offset(0, sreg); } +/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE, + * where 0 is the least significant end of the register. + */ +static inline long +neon_element_offset(int reg, int element, TCGMemOp size) +{ + int element_size = 1 << size; + int ofs = element * element_size; +#ifdef HOST_WORDS_BIGENDIAN + /* Calculate the offset assuming fully little-endian, + * then XOR to account for the order of the 8-byte units. + */ + if (element_size < 8) { + ofs ^= 8 - element_size; + } +#endif + return neon_reg_offset(reg, 0) + ofs; +} + static TCGv_i32 neon_load_reg(int reg, int pass) { TCGv_i32 tmp = tcg_temp_new_i32(); @@ -1575,12 +1611,94 @@ static TCGv_i32 neon_load_reg(int reg, int pass) return tmp; } +static void neon_load_element(TCGv_i32 var, int reg, int ele, TCGMemOp mop) +{ + long offset = neon_element_offset(reg, ele, mop & MO_SIZE); + + switch (mop) { + case MO_UB: + tcg_gen_ld8u_i32(var, cpu_env, offset); + break; + case MO_UW: + tcg_gen_ld16u_i32(var, cpu_env, offset); + break; + case MO_UL: + tcg_gen_ld_i32(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static void neon_load_element64(TCGv_i64 var, int reg, int ele, TCGMemOp mop) +{ + long offset = neon_element_offset(reg, ele, mop & MO_SIZE); + + switch (mop) { + case MO_UB: + tcg_gen_ld8u_i64(var, cpu_env, offset); + break; + case MO_UW: + tcg_gen_ld16u_i64(var, cpu_env, offset); + break; + case MO_UL: + tcg_gen_ld32u_i64(var, cpu_env, offset); + break; + case MO_Q: + tcg_gen_ld_i64(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + static void neon_store_reg(int reg, int pass, TCGv_i32 var) { tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass)); tcg_temp_free_i32(var); } +static void neon_store_element(int reg, int ele, TCGMemOp size, TCGv_i32 var) +{ + long offset = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_8: + tcg_gen_st8_i32(var, cpu_env, offset); + break; + case MO_16: + tcg_gen_st16_i32(var, cpu_env, offset); + break; + case MO_32: + tcg_gen_st_i32(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static void neon_store_element64(int reg, int ele, TCGMemOp size, TCGv_i64 var) +{ + long offset = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_8: + tcg_gen_st8_i64(var, cpu_env, offset); + break; + case MO_16: + tcg_gen_st16_i64(var, cpu_env, offset); + break; + case MO_32: + tcg_gen_st32_i64(var, cpu_env, offset); + break; + case MO_64: + tcg_gen_st_i64(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + static inline void neon_load_reg64(TCGv_i64 var, int reg) { tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg)); @@ -2957,19 +3075,6 @@ static void gen_vfp_msr(TCGv_i32 tmp) tcg_temp_free_i32(tmp); } -static void gen_neon_dup_u8(TCGv_i32 var, int shift) -{ - TCGv_i32 tmp = tcg_temp_new_i32(); - if (shift) - tcg_gen_shri_i32(var, var, shift); - tcg_gen_ext8u_i32(var, var); - tcg_gen_shli_i32(tmp, var, 8); - tcg_gen_or_i32(var, var, tmp); - tcg_gen_shli_i32(tmp, var, 16); - tcg_gen_or_i32(var, var, tmp); - tcg_temp_free_i32(tmp); -} - static void gen_neon_dup_low16(TCGv_i32 var) { TCGv_i32 tmp = tcg_temp_new_i32(); @@ -2988,28 +3093,6 @@ static void gen_neon_dup_high16(TCGv_i32 var) tcg_temp_free_i32(tmp); } -static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size) -{ - /* Load a single Neon element and replicate into a 32 bit TCG reg */ - TCGv_i32 tmp = tcg_temp_new_i32(); - switch (size) { - case 0: - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - gen_neon_dup_u8(tmp, 0); - break; - case 1: - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - gen_neon_dup_low16(tmp); - break; - case 2: - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - break; - default: /* Avoid compiler warnings. */ - abort(); - } - return tmp; -} - static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm, uint32_t dp) { @@ -3415,17 +3498,10 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) tmp = load_reg(s, rd); if (insn & (1 << 23)) { /* VDUP */ - if (size == 0) { - gen_neon_dup_u8(tmp, 0); - } else if (size == 1) { - gen_neon_dup_low16(tmp); - } - for (n = 0; n <= pass * 2; n++) { - tmp2 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp2, tmp); - neon_store_reg(rn, n, tmp2); - } - neon_store_reg(rn, n, tmp); + int vec_size = pass ? 16 : 8; + tcg_gen_gvec_dup_i32(size, neon_reg_offset(rn, 0), + vec_size, vec_size, tmp); + tcg_temp_free_i32(tmp); } else { /* VMOV */ switch (size) { @@ -4212,6 +4288,18 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) if (insn & (1 << 24)) /* pre-decrement */ tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2)); + if (s->v8m_stackcheck && rn == 13 && w) { + /* + * Here 'addr' is the lowest address we will store to, + * and is either the old SP (if post-increment) or + * the new SP (if pre-decrement). For post-increment + * where the old value is below the limit and the new + * value is above, it is UNKNOWN whether the limit check + * triggers; we choose to trigger. + */ + gen_helper_v8m_stackcheck(cpu_env, addr); + } + if (dp) offset = 8; else @@ -4878,17 +4966,17 @@ static struct { int nregs; int interleave; int spacing; -} neon_ls_element_type[11] = { - {4, 4, 1}, - {4, 4, 2}, +} const neon_ls_element_type[11] = { + {1, 4, 1}, + {1, 4, 2}, {4, 1, 1}, - {4, 2, 1}, - {3, 3, 1}, - {3, 3, 2}, + {2, 2, 2}, + {1, 3, 1}, + {1, 3, 2}, {3, 1, 1}, {1, 1, 1}, - {2, 2, 1}, - {2, 2, 2}, + {1, 2, 1}, + {1, 2, 2}, {2, 1, 1} }; @@ -4904,10 +4992,11 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) int stride; int size; int reg; - int pass; int load; - int shift; int n; + int vec_size; + int mmu_idx; + TCGMemOp endian; TCGv_i32 addr; TCGv_i32 tmp; TCGv_i32 tmp2; @@ -4919,7 +5008,7 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) */ if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } @@ -4929,6 +5018,8 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) rn = (insn >> 16) & 0xf; rm = insn & 0xf; load = (insn & (1 << 21)) != 0; + endian = s->be_data; + mmu_idx = get_mem_index(s); if ((insn & (1 << 23)) == 0) { /* Load store all elements. */ op = (insn >> 8) & 0xf; @@ -4953,104 +5044,44 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) nregs = neon_ls_element_type[op].nregs; interleave = neon_ls_element_type[op].interleave; spacing = neon_ls_element_type[op].spacing; - if (size == 3 && (interleave | spacing) != 1) + if (size == 3 && (interleave | spacing) != 1) { return 1; + } + /* For our purposes, bytes are always little-endian. */ + if (size == 0) { + endian = MO_LE; + } + /* Consecutive little-endian elements from a single register + * can be promoted to a larger little-endian operation. + */ + if (interleave == 1 && endian == MO_LE) { + size = 3; + } + tmp64 = tcg_temp_new_i64(); addr = tcg_temp_new_i32(); + tmp2 = tcg_const_i32(1 << size); load_reg_var(s, addr, rn); - stride = (1 << size) * interleave; for (reg = 0; reg < nregs; reg++) { - if (interleave > 2 || (interleave == 2 && nregs == 2)) { - load_reg_var(s, addr, rn); - tcg_gen_addi_i32(addr, addr, (1 << size) * reg); - } else if (interleave == 2 && nregs == 4 && reg == 2) { - load_reg_var(s, addr, rn); - tcg_gen_addi_i32(addr, addr, 1 << size); - } - if (size == 3) { - tmp64 = tcg_temp_new_i64(); - if (load) { - gen_aa32_ld64(s, tmp64, addr, get_mem_index(s)); - neon_store_reg64(tmp64, rd); - } else { - neon_load_reg64(tmp64, rd); - gen_aa32_st64(s, tmp64, addr, get_mem_index(s)); - } - tcg_temp_free_i64(tmp64); - tcg_gen_addi_i32(addr, addr, stride); - } else { - for (pass = 0; pass < 2; pass++) { - if (size == 2) { - if (load) { - tmp = tcg_temp_new_i32(); - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - neon_store_reg(rd, pass, tmp); - } else { - tmp = neon_load_reg(rd, pass); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp); - } - tcg_gen_addi_i32(addr, addr, stride); - } else if (size == 1) { - if (load) { - tmp = tcg_temp_new_i32(); - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - tcg_gen_addi_i32(addr, addr, stride); - tmp2 = tcg_temp_new_i32(); - gen_aa32_ld16u(s, tmp2, addr, get_mem_index(s)); - tcg_gen_addi_i32(addr, addr, stride); - tcg_gen_shli_i32(tmp2, tmp2, 16); - tcg_gen_or_i32(tmp, tmp, tmp2); - tcg_temp_free_i32(tmp2); - neon_store_reg(rd, pass, tmp); - } else { - tmp = neon_load_reg(rd, pass); - tmp2 = tcg_temp_new_i32(); - tcg_gen_shri_i32(tmp2, tmp, 16); - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp); - tcg_gen_addi_i32(addr, addr, stride); - gen_aa32_st16(s, tmp2, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp2); - tcg_gen_addi_i32(addr, addr, stride); - } - } else /* size == 0 */ { - if (load) { - tmp2 = NULL; - for (n = 0; n < 4; n++) { - tmp = tcg_temp_new_i32(); - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - tcg_gen_addi_i32(addr, addr, stride); - if (n == 0) { - tmp2 = tmp; - } else { - tcg_gen_shli_i32(tmp, tmp, n * 8); - tcg_gen_or_i32(tmp2, tmp2, tmp); - tcg_temp_free_i32(tmp); - } - } - neon_store_reg(rd, pass, tmp2); - } else { - tmp2 = neon_load_reg(rd, pass); - for (n = 0; n < 4; n++) { - tmp = tcg_temp_new_i32(); - if (n == 0) { - tcg_gen_mov_i32(tmp, tmp2); - } else { - tcg_gen_shri_i32(tmp, tmp2, n * 8); - } - gen_aa32_st8(s, tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp); - tcg_gen_addi_i32(addr, addr, stride); - } - tcg_temp_free_i32(tmp2); - } + for (n = 0; n < 8 >> size; n++) { + int xs; + for (xs = 0; xs < interleave; xs++) { + int tt = rd + reg + spacing * xs; + + if (load) { + gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); + neon_store_element64(tt, n, size, tmp64); + } else { + neon_load_element64(tmp64, tt, n, size); + gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); } + tcg_gen_add_i32(addr, addr, tmp2); } } - rd += spacing; } tcg_temp_free_i32(addr); - stride = nregs * 8; + tcg_temp_free_i32(tmp2); + tcg_temp_free_i64(tmp64); + stride = nregs * interleave * 8; } else { size = (insn >> 10) & 3; if (size == 3) { @@ -5077,45 +5108,50 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) } addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); - if (nregs == 1) { - /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */ - tmp = gen_load_and_replicate(s, addr, size); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); - if (insn & (1 << 5)) { - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0)); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1)); - } - tcg_temp_free_i32(tmp); - } else { - /* VLD2/3/4 to all lanes: bit 5 indicates register stride */ - stride = (insn & (1 << 5)) ? 2 : 1; - for (reg = 0; reg < nregs; reg++) { - tmp = gen_load_and_replicate(s, addr, size); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); - tcg_temp_free_i32(tmp); - tcg_gen_addi_i32(addr, addr, 1 << size); - rd += stride; + + /* VLD1 to all lanes: bit 5 indicates how many Dregs to write. + * VLD2/3/4 to all lanes: bit 5 indicates register stride. + */ + stride = (insn & (1 << 5)) ? 2 : 1; + vec_size = nregs == 1 ? stride * 8 : 8; + + tmp = tcg_temp_new_i32(); + for (reg = 0; reg < nregs; reg++) { + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); + if ((rd & 1) && vec_size == 16) { + /* We cannot write 16 bytes at once because the + * destination is unaligned. + */ + tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0), + 8, 8, tmp); + tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0), + neon_reg_offset(rd, 0), 8, 8); + } else { + tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0), + vec_size, vec_size, tmp); } + tcg_gen_addi_i32(addr, addr, 1 << size); + rd += stride; } + tcg_temp_free_i32(tmp); tcg_temp_free_i32(addr); stride = (1 << size) * nregs; } else { /* Single element. */ int idx = (insn >> 4) & 0xf; - pass = (insn >> 7) & 1; + int reg_idx; switch (size) { case 0: - shift = ((insn >> 5) & 3) * 8; + reg_idx = (insn >> 5) & 7; stride = 1; break; case 1: - shift = ((insn >> 6) & 1) * 16; + reg_idx = (insn >> 6) & 3; stride = (insn & (1 << 5)) ? 2 : 1; break; case 2: - shift = 0; + reg_idx = (insn >> 7) & 1; stride = (insn & (1 << 6)) ? 2 : 1; break; default: @@ -5155,52 +5191,24 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) */ return 1; } + tmp = tcg_temp_new_i32(); addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); for (reg = 0; reg < nregs; reg++) { if (load) { - tmp = tcg_temp_new_i32(); - switch (size) { - case 0: - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - break; - case 2: - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - break; - default: /* Avoid compiler warnings. */ - abort(); - } - if (size != 2) { - tmp2 = neon_load_reg(rd, pass); - tcg_gen_deposit_i32(tmp, tmp2, tmp, - shift, size ? 16 : 8); - tcg_temp_free_i32(tmp2); - } - neon_store_reg(rd, pass, tmp); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); + neon_store_element(rd, reg_idx, size, tmp); } else { /* Store */ - tmp = neon_load_reg(rd, pass); - if (shift) - tcg_gen_shri_i32(tmp, tmp, shift); - switch (size) { - case 0: - gen_aa32_st8(s, tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); - break; - case 2: - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - break; - } - tcg_temp_free_i32(tmp); + neon_load_element(tmp, rd, reg_idx, size); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); } rd += stride; tcg_gen_addi_i32(addr, addr, 1 << size); } tcg_temp_free_i32(addr); + tcg_temp_free_i32(tmp); stride = nregs * (1 << size); } } @@ -5221,14 +5229,6 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) return 0; } -/* Bitwise select. dest = c ? t : f. Clobbers T and F. */ -static void gen_neon_bsl(TCGv_i32 dest, TCGv_i32 t, TCGv_i32 f, TCGv_i32 c) -{ - tcg_gen_and_i32(t, t, c); - tcg_gen_andc_i32(f, f, c); - tcg_gen_or_i32(dest, t, f); -} - static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src) { switch (size) { @@ -5435,7 +5435,7 @@ static void gen_neon_narrow_op(int op, int u, int size, #define NEON_3R_VABA 15 #define NEON_3R_VADD_VSUB 16 #define NEON_3R_VTST_VCEQ 17 -#define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */ +#define NEON_3R_VML 18 /* VMLA, VMLS */ #define NEON_3R_VMUL 19 #define NEON_3R_VPMAX 20 #define NEON_3R_VPMIN 21 @@ -5660,7 +5660,7 @@ static const uint8_t neon_2rm_sizes[] = { static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn, int q, int rd, int rn, int rm) { - if (arm_dc_feature(s, ARM_FEATURE_V8_RDM)) { + if (dc_isar_feature(aa32_rdm, s)) { int opr_sz = (1 + q) * 8; tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), vfp_reg_offset(1, rn), @@ -5671,6 +5671,483 @@ static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn, return 1; } +/* + * Expanders for VBitOps_VBIF, VBIT, VBSL. + */ +static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(rn, rn, rm); + tcg_gen_and_i64(rn, rn, rd); + tcg_gen_xor_i64(rd, rm, rn); +} + +static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(rn, rn, rd); + tcg_gen_and_i64(rn, rn, rm); + tcg_gen_xor_i64(rd, rd, rn); +} + +static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_xor_i64(rn, rn, rd); + tcg_gen_andc_i64(rn, rn, rm); + tcg_gen_xor_i64(rd, rd, rn); +} + +static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(vece, rn, rn, rm); + tcg_gen_and_vec(vece, rn, rn, rd); + tcg_gen_xor_vec(vece, rd, rm, rn); +} + +static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(vece, rn, rn, rd); + tcg_gen_and_vec(vece, rn, rn, rm); + tcg_gen_xor_vec(vece, rd, rd, rn); +} + +static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm) +{ + tcg_gen_xor_vec(vece, rn, rn, rd); + tcg_gen_andc_vec(vece, rn, rn, rm); + tcg_gen_xor_vec(vece, rd, rd, rn); +} + +const GVecGen3 bsl_op = { + .fni8 = gen_bsl_i64, + .fniv = gen_bsl_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + +const GVecGen3 bit_op = { + .fni8 = gen_bit_i64, + .fniv = gen_bit_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + +const GVecGen3 bif_op = { + .fni8 = gen_bif_i64, + .fniv = gen_bif_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true +}; + +static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar8i_i64(a, a, shift); + tcg_gen_vec_add8_i64(d, d, a); +} + +static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar16i_i64(a, a, shift); + tcg_gen_vec_add16_i64(d, d, a); +} + +static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_sari_i32(a, a, shift); + tcg_gen_add_i32(d, d, a); +} + +static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_sari_i64(a, a, shift); + tcg_gen_add_i64(d, d, a); +} + +static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_sari_vec(vece, a, a, sh); + tcg_gen_add_vec(vece, d, d, a); +} + +const GVecGen2i ssra_op[4] = { + { .fni8 = gen_ssra8_i64, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_8 }, + { .fni8 = gen_ssra16_i64, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_16 }, + { .fni4 = gen_ssra32_i32, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_32 }, + { .fni8 = gen_ssra64_i64, + .fniv = gen_ssra_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_64 }, +}; + +static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_shr8i_i64(a, a, shift); + tcg_gen_vec_add8_i64(d, d, a); +} + +static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_shr16i_i64(a, a, shift); + tcg_gen_vec_add16_i64(d, d, a); +} + +static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_shri_i32(a, a, shift); + tcg_gen_add_i32(d, d, a); +} + +static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(a, a, shift); + tcg_gen_add_i64(d, d, a); +} + +static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_shri_vec(vece, a, a, sh); + tcg_gen_add_vec(vece, d, d, a); +} + +const GVecGen2i usra_op[4] = { + { .fni8 = gen_usra8_i64, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_8, }, + { .fni8 = gen_usra16_i64, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_16, }, + { .fni4 = gen_usra32_i32, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_32, }, + { .fni8 = gen_usra64_i64, + .fniv = gen_usra_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_64, }, +}; + +static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_8, 0xff >> shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_16, 0xffff >> shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_shri_i32(a, a, shift); + tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); +} + +static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(a, a, shift); + tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); +} + +static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + if (sh == 0) { + tcg_gen_mov_vec(d, a); + } else { + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); + tcg_gen_shri_vec(vece, t, a, sh); + tcg_gen_and_vec(vece, d, d, m); + tcg_gen_or_vec(vece, d, d, t); + + tcg_temp_free_vec(t); + tcg_temp_free_vec(m); + } +} + +const GVecGen2i sri_op[4] = { + { .fni8 = gen_shr8_ins_i64, + .fniv = gen_shr_ins_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_8 }, + { .fni8 = gen_shr16_ins_i64, + .fniv = gen_shr_ins_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_16 }, + { .fni4 = gen_shr32_ins_i32, + .fniv = gen_shr_ins_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_32 }, + { .fni8 = gen_shr64_ins_i64, + .fniv = gen_shr_ins_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_64 }, +}; + +static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_8, 0xff << shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_16, 0xffff << shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); +} + +static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); +} + +static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + if (sh == 0) { + tcg_gen_mov_vec(d, a); + } else { + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); + tcg_gen_shli_vec(vece, t, a, sh); + tcg_gen_and_vec(vece, d, d, m); + tcg_gen_or_vec(vece, d, d, t); + + tcg_temp_free_vec(t); + tcg_temp_free_vec(m); + } +} + +const GVecGen2i sli_op[4] = { + { .fni8 = gen_shl8_ins_i64, + .fniv = gen_shl_ins_vec, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_8 }, + { .fni8 = gen_shl16_ins_i64, + .fniv = gen_shl_ins_vec, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_16 }, + { .fni4 = gen_shl32_ins_i32, + .fniv = gen_shl_ins_vec, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_32 }, + { .fni8 = gen_shl64_ins_i64, + .fniv = gen_shl_ins_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_shli_vec, + .vece = MO_64 }, +}; + +static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u8(a, a, b); + gen_helper_neon_add_u8(d, d, a); +} + +static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u8(a, a, b); + gen_helper_neon_sub_u8(d, d, a); +} + +static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u16(a, a, b); + gen_helper_neon_add_u16(d, d, a); +} + +static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u16(a, a, b); + gen_helper_neon_sub_u16(d, d, a); +} + +static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mul_i32(a, a, b); + tcg_gen_add_i32(d, d, a); +} + +static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mul_i32(a, a, b); + tcg_gen_sub_i32(d, d, a); +} + +static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mul_i64(a, a, b); + tcg_gen_add_i64(d, d, a); +} + +static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mul_i64(a, a, b); + tcg_gen_sub_i64(d, d, a); +} + +static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_mul_vec(vece, a, a, b); + tcg_gen_add_vec(vece, d, d, a); +} + +static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_mul_vec(vece, a, a, b); + tcg_gen_sub_vec(vece, d, d, a); +} + +/* Note that while NEON does not support VMLA and VMLS as 64-bit ops, + * these tables are shared with AArch64 which does support them. + */ +const GVecGen3 mla_op[4] = { + { .fni4 = gen_mla8_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_8 }, + { .fni4 = gen_mla16_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_mla32_i32, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_mla64_i64, + .fniv = gen_mla_vec, + .opc = INDEX_op_mul_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .vece = MO_64 }, +}; + +const GVecGen3 mls_op[4] = { + { .fni4 = gen_mls8_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_8 }, + { .fni4 = gen_mls16_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_mls32_i32, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_mls64_i64, + .fniv = gen_mls_vec, + .opc = INDEX_op_mul_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .vece = MO_64 }, +}; + +/* CMTST : test is "if (X & Y != 0)". */ +static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_and_i32(d, a, b); + tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0); + tcg_gen_neg_i32(d, d); +} + +void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_and_i64(d, a, b); + tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0); + tcg_gen_neg_i64(d, d); +} + +static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_and_vec(vece, d, a, b); + tcg_gen_dupi_vec(vece, a, 0); + tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); +} + +const GVecGen3 cmtst_op[4] = { + { .fni4 = gen_helper_neon_tst_u8, + .fniv = gen_cmtst_vec, + .vece = MO_8 }, + { .fni4 = gen_helper_neon_tst_u16, + .fniv = gen_cmtst_vec, + .vece = MO_16 }, + { .fni4 = gen_cmtst_i32, + .fniv = gen_cmtst_vec, + .vece = MO_32 }, + { .fni8 = gen_cmtst_i64, + .fniv = gen_cmtst_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, +}; + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -5680,14 +6157,15 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) { int op; int q; - int rd, rn, rm; + int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs; int size; int shift; int pass; int count; int pairwise; int u; - uint32_t imm, mask; + int vec_size; + uint32_t imm; TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; TCGv_ptr ptr1, ptr2, ptr3; TCGv_i64 tmp64; @@ -5698,7 +6176,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) */ if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } @@ -5710,6 +6188,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) VFP_DREG_N(rn, insn); VFP_DREG_M(rm, insn); size = (insn >> 20) & 3; + vec_size = q ? 16 : 8; + rd_ofs = neon_reg_offset(rd, 0); + rn_ofs = neon_reg_offset(rn, 0); + rm_ofs = neon_reg_offset(rm, 0); + if ((insn & (1 << 23)) == 0) { /* Three register same length. */ op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); @@ -5734,7 +6217,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } if (!u) { /* SHA-1 */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) { + if (!dc_isar_feature(aa32_sha1, s)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -5744,7 +6227,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4); tcg_temp_free_i32(tmp4); } else { /* SHA-256 */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256) || size == 3) { + if (!dc_isar_feature(aa32_sha2, s) || size == 3) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -5800,8 +6283,100 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) q, rd, rn, rm); } return 1; + + case NEON_3R_LOGIC: /* Logic ops. */ + switch ((u << 2) | size) { + case 0: /* VAND */ + tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 1: /* VBIC */ + tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 2: + if (rn == rm) { + /* VMOV */ + tcg_gen_gvec_mov(0, rd_ofs, rn_ofs, vec_size, vec_size); + } else { + /* VORR */ + tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } + break; + case 3: /* VORN */ + tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 4: /* VEOR */ + tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + break; + case 5: /* VBSL */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bsl_op); + break; + case 6: /* VBIT */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bit_op); + break; + case 7: /* VBIF */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &bif_op); + break; + } + return 0; + + case NEON_3R_VADD_VSUB: + if (u) { + tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } else { + tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } + return 0; + + case NEON_3R_VMUL: /* VMUL */ + if (u) { + /* Polynomial case allows only P8 and is handled below. */ + if (size != 0) { + return 1; + } + } else { + tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + return 0; + } + break; + + case NEON_3R_VML: /* VMLA, VMLS */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size, + u ? &mls_op[size] : &mla_op[size]); + return 0; + + case NEON_3R_VTST_VCEQ: + if (u) { /* VCEQ */ + tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } else { /* VTST */ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &cmtst_op[size]); + } + return 0; + + case NEON_3R_VCGT: + tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size, + rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); + return 0; + + case NEON_3R_VCGE: + tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size, + rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); + return 0; } - if (size == 3 && op != NEON_3R_LOGIC) { + + if (size == 3) { /* 64-bit element instructions. */ for (pass = 0; pass < (q ? 2 : 1); pass++) { neon_load_reg64(cpu_V0, rn + pass); @@ -5857,13 +6432,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) cpu_V1, cpu_V0); } break; - case NEON_3R_VADD_VSUB: - if (u) { - tcg_gen_sub_i64(CPU_V001); - } else { - tcg_gen_add_i64(CPU_V001); - } - break; default: abort(); } @@ -5913,12 +6481,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } break; - case NEON_3R_VMUL: - if (u && (size != 0)) { - /* UNDEF on invalid size for polynomial subcase */ - return 1; - } - break; case NEON_3R_VFM_VQRDMLSH: if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) { return 1; @@ -5959,52 +6521,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VRHADD: GEN_NEON_INTEGER_OP(rhadd); break; - case NEON_3R_LOGIC: /* Logic ops. */ - switch ((u << 2) | size) { - case 0: /* VAND */ - tcg_gen_and_i32(tmp, tmp, tmp2); - break; - case 1: /* BIC */ - tcg_gen_andc_i32(tmp, tmp, tmp2); - break; - case 2: /* VORR */ - tcg_gen_or_i32(tmp, tmp, tmp2); - break; - case 3: /* VORN */ - tcg_gen_orc_i32(tmp, tmp, tmp2); - break; - case 4: /* VEOR */ - tcg_gen_xor_i32(tmp, tmp, tmp2); - break; - case 5: /* VBSL */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp, tmp2, tmp3); - tcg_temp_free_i32(tmp3); - break; - case 6: /* VBIT */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp, tmp3, tmp2); - tcg_temp_free_i32(tmp3); - break; - case 7: /* VBIF */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp3, tmp, tmp2); - tcg_temp_free_i32(tmp3); - break; - } - break; case NEON_3R_VHSUB: GEN_NEON_INTEGER_OP(hsub); break; case NEON_3R_VQSUB: GEN_NEON_INTEGER_OP_ENV(qsub); break; - case NEON_3R_VCGT: - GEN_NEON_INTEGER_OP(cgt); - break; - case NEON_3R_VCGE: - GEN_NEON_INTEGER_OP(cge); - break; case NEON_3R_VSHL: GEN_NEON_INTEGER_OP(shl); break; @@ -6032,61 +6554,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp2 = neon_load_reg(rd, pass); gen_neon_add(size, tmp, tmp2); break; - case NEON_3R_VADD_VSUB: - if (!u) { /* VADD */ - gen_neon_add(size, tmp, tmp2); - } else { /* VSUB */ - switch (size) { - case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break; - default: abort(); - } - } - break; - case NEON_3R_VTST_VCEQ: - if (!u) { /* VTST */ - switch (size) { - case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break; - default: abort(); - } - } else { /* VCEQ */ - switch (size) { - case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; - default: abort(); - } - } - break; - case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */ - switch (size) { - case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: abort(); - } - tcg_temp_free_i32(tmp2); - tmp2 = neon_load_reg(rd, pass); - if (u) { /* VMLS */ - gen_neon_rsb(size, tmp, tmp2); - } else { /* VMLA */ - gen_neon_add(size, tmp, tmp2); - } - break; case NEON_3R_VMUL: - if (u) { /* polynomial */ - gen_helper_neon_mul_p8(tmp, tmp, tmp2); - } else { /* Integer */ - switch (size) { - case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: abort(); - } - } + /* VMUL.P8; other cases already eliminated. */ + gen_helper_neon_mul_p8(tmp, tmp, tmp2); break; case NEON_3R_VPMAX: GEN_NEON_INTEGER_OP(pmax); @@ -6268,8 +6738,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) size--; } shift = (insn >> 16) & ((1 << (3 + size)) - 1); - /* To avoid excessive duplication of ops we implement shift - by immediate using the variable shift operations. */ if (op < 8) { /* Shift by immediate: VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */ @@ -6281,43 +6749,99 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } /* Right shifts are encoded as N - shift, where N is the element size in bits. */ - if (op <= 4) + if (op <= 4) { shift = shift - (1 << (size + 3)); + } + + switch (op) { + case 0: /* VSHR */ + /* Right shift comes here negative. */ + shift = -shift; + /* Shifts larger than the element size are architecturally + * valid. Unsigned results in all zeros; signed results + * in all sign bits. + */ + if (!u) { + tcg_gen_gvec_sari(size, rd_ofs, rm_ofs, + MIN(shift, (8 << size) - 1), + vec_size, vec_size); + } else if (shift >= 8 << size) { + tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0); + } else { + tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift, + vec_size, vec_size); + } + return 0; + + case 1: /* VSRA */ + /* Right shift comes here negative. */ + shift = -shift; + /* Shifts larger than the element size are architecturally + * valid. Unsigned results in all zeros; signed results + * in all sign bits. + */ + if (!u) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size, + MIN(shift, (8 << size) - 1), + &ssra_op[size]); + } else if (shift >= 8 << size) { + /* rd += 0 */ + } else { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size, + shift, &usra_op[size]); + } + return 0; + + case 4: /* VSRI */ + if (!u) { + return 1; + } + /* Right shift comes here negative. */ + shift = -shift; + /* Shift out of range leaves destination unchanged. */ + if (shift < 8 << size) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size, + shift, &sri_op[size]); + } + return 0; + + case 5: /* VSHL, VSLI */ + if (u) { /* VSLI */ + /* Shift out of range leaves destination unchanged. */ + if (shift < 8 << size) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, + vec_size, shift, &sli_op[size]); + } + } else { /* VSHL */ + /* Shifts larger than the element size are + * architecturally valid and results in zero. + */ + if (shift >= 8 << size) { + tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0); + } else { + tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift, + vec_size, vec_size); + } + } + return 0; + } + if (size == 3) { count = q + 1; } else { count = q ? 4: 2; } - switch (size) { - case 0: - imm = (uint8_t) shift; - imm |= imm << 8; - imm |= imm << 16; - break; - case 1: - imm = (uint16_t) shift; - imm |= imm << 16; - break; - case 2: - case 3: - imm = shift; - break; - default: - abort(); - } + + /* To avoid excessive duplication of ops we implement shift + * by immediate using the variable shift operations. + */ + imm = dup_const(size, shift); for (pass = 0; pass < count; pass++) { if (size == 3) { neon_load_reg64(cpu_V0, rm + pass); tcg_gen_movi_i64(cpu_V1, imm); switch (op) { - case 0: /* VSHR */ - case 1: /* VSRA */ - if (u) - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); - else - gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1); - break; case 2: /* VRSHR */ case 3: /* VRSRA */ if (u) @@ -6325,10 +6849,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) else gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1); break; - case 4: /* VSRI */ - case 5: /* VSHL, VSLI */ - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); - break; case 6: /* VQSHLU */ gen_helper_neon_qshlu_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1); @@ -6342,26 +6862,13 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) cpu_V0, cpu_V1); } break; + default: + g_assert_not_reached(); } - if (op == 1 || op == 3) { + if (op == 3) { /* Accumulate. */ neon_load_reg64(cpu_V1, rd + pass); tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1); - } else if (op == 4 || (op == 5 && u)) { - /* Insert */ - neon_load_reg64(cpu_V1, rd + pass); - uint64_t mask; - if (shift < -63 || shift > 63) { - mask = 0; - } else { - if (op == 4) { - mask = 0xffffffffffffffffull >> -shift; - } else { - mask = 0xffffffffffffffffull << shift; - } - } - tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask); - tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); } neon_store_reg64(cpu_V0, rd + pass); } else { /* size < 3 */ @@ -6370,23 +6877,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, imm); switch (op) { - case 0: /* VSHR */ - case 1: /* VSRA */ - GEN_NEON_INTEGER_OP(shl); - break; case 2: /* VRSHR */ case 3: /* VRSRA */ GEN_NEON_INTEGER_OP(rshl); break; - case 4: /* VSRI */ - case 5: /* VSHL, VSLI */ - switch (size) { - case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break; - default: abort(); - } - break; case 6: /* VQSHLU */ switch (size) { case 0: @@ -6408,50 +6902,16 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case 7: /* VQSHL */ GEN_NEON_INTEGER_OP_ENV(qshl); break; + default: + g_assert_not_reached(); } tcg_temp_free_i32(tmp2); - if (op == 1 || op == 3) { + if (op == 3) { /* Accumulate. */ tmp2 = neon_load_reg(rd, pass); gen_neon_add(size, tmp, tmp2); tcg_temp_free_i32(tmp2); - } else if (op == 4 || (op == 5 && u)) { - /* Insert */ - switch (size) { - case 0: - if (op == 4) - mask = 0xff >> -shift; - else - mask = (uint8_t)(0xff << shift); - mask |= mask << 8; - mask |= mask << 16; - break; - case 1: - if (op == 4) - mask = 0xffff >> -shift; - else - mask = (uint16_t)(0xffff << shift); - mask |= mask << 16; - break; - case 2: - if (shift < -31 || shift > 31) { - mask = 0; - } else { - if (op == 4) - mask = 0xffffffffu >> -shift; - else - mask = 0xffffffffu << shift; - } - break; - default: - abort(); - } - tmp2 = neon_load_reg(rd, pass); - tcg_gen_andi_i32(tmp, tmp, mask); - tcg_gen_andi_i32(tmp2, tmp2, ~mask); - tcg_gen_or_i32(tmp, tmp, tmp2); - tcg_temp_free_i32(tmp2); } neon_store_reg(rd, pass, tmp); } @@ -6600,7 +7060,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } } else { /* (insn & 0x00380080) == 0 */ - int invert; + int invert, reg_ofs, vec_size; + if (q && (rd & 1)) { return 1; } @@ -6640,8 +7101,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) break; case 14: imm |= (imm << 8) | (imm << 16) | (imm << 24); - if (invert) + if (invert) { imm = ~imm; + } break; case 15: if (invert) { @@ -6651,36 +7113,45 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); break; } - if (invert) + if (invert) { imm = ~imm; + } - for (pass = 0; pass < (q ? 4 : 2); pass++) { - if (op & 1 && op < 12) { - tmp = neon_load_reg(rd, pass); - if (invert) { - /* The immediate value has already been inverted, so - BIC becomes AND. */ - tcg_gen_andi_i32(tmp, tmp, imm); - } else { - tcg_gen_ori_i32(tmp, tmp, imm); - } + reg_ofs = neon_reg_offset(rd, 0); + vec_size = q ? 16 : 8; + + if (op & 1 && op < 12) { + if (invert) { + /* The immediate value has already been inverted, + * so BIC becomes AND. + */ + tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm, + vec_size, vec_size); } else { - /* VMOV, VMVN. */ - tmp = tcg_temp_new_i32(); - if (op == 14 && invert) { + tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm, + vec_size, vec_size); + } + } else { + /* VMOV, VMVN. */ + if (op == 14 && invert) { + TCGv_i64 t64 = tcg_temp_new_i64(); + + for (pass = 0; pass <= q; ++pass) { + uint64_t val = 0; int n; - uint32_t val; - val = 0; - for (n = 0; n < 4; n++) { - if (imm & (1 << (n + (pass & 1) * 4))) - val |= 0xff << (n * 8); + + for (n = 0; n < 8; n++) { + if (imm & (1 << (n + pass * 8))) { + val |= 0xffull << (n * 8); + } } - tcg_gen_movi_i32(tmp, val); - } else { - tcg_gen_movi_i32(tmp, imm); + tcg_gen_movi_i64(t64, val); + neon_store_reg64(t64, rd + pass); } + tcg_temp_free_i64(t64); + } else { + tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm); } - neon_store_reg(rd, pass, tmp); } } } else { /* (insn & 0x00800010 == 0x00800000) */ @@ -6739,7 +7210,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) if (op == 14 && size == 2) { TCGv_i64 tcg_rn, tcg_rm, tcg_rd; - if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) { + if (!dc_isar_feature(aa32_pmull, s)) { return 1; } tcg_rn = tcg_temp_new_i64(); @@ -7056,7 +7527,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) { NeonGenThreeOpEnvFn *fn; - if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) { + if (!dc_isar_feature(aa32_rdm, s)) { return 1; } if (u && ((rd | rn) & 1)) { @@ -7330,8 +7801,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) break; } case NEON_2RM_AESE: case NEON_2RM_AESMC: - if (!arm_dc_feature(s, ARM_FEATURE_V8_AES) - || ((rm | rd) & 1)) { + if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -7352,8 +7822,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp3); break; case NEON_2RM_SHA1H: - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1) - || ((rm | rd) & 1)) { + if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -7370,10 +7839,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */ if (q) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256)) { + if (!dc_isar_feature(aa32_sha2, s)) { return 1; } - } else if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) { + } else if (!dc_isar_feature(aa32_sha1, s)) { return 1; } ptr1 = vfp_reg_ptr(true, rd); @@ -7386,6 +7855,14 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_temp_free_ptr(ptr1); tcg_temp_free_ptr(ptr2); break; + + case NEON_2RM_VMVN: + tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size); + break; + case NEON_2RM_VNEG: + tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size); + break; + default: elementwise: for (pass = 0; pass < (q ? 4 : 2); pass++) { @@ -7426,9 +7903,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_2RM_VCNT: gen_helper_neon_cnt_u8(tmp, tmp); break; - case NEON_2RM_VMVN: - tcg_gen_not_i32(tmp, tmp); - break; case NEON_2RM_VQABS: switch (size) { case 0: @@ -7501,11 +7975,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) default: abort(); } break; - case NEON_2RM_VNEG: - tmp2 = tcg_const_i32(0); - gen_neon_rsb(size, tmp, tmp2); - tcg_temp_free_i32(tmp2); - break; case NEON_2RM_VCGT0_F: { TCGv_ptr fpstatus = get_fpstatus_ptr(1); @@ -7728,28 +8197,25 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp); } else if ((insn & 0x380) == 0) { /* VDUP */ + int element; + TCGMemOp size; + if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) { return 1; } - if (insn & (1 << 19)) { - tmp = neon_load_reg(rm, 1); - } else { - tmp = neon_load_reg(rm, 0); - } if (insn & (1 << 16)) { - gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8); + size = MO_8; + element = (insn >> 17) & 7; } else if (insn & (1 << 17)) { - if ((insn >> 18) & 1) - gen_neon_dup_high16(tmp); - else - gen_neon_dup_low16(tmp); - } - for (pass = 0; pass < (q ? 4 : 2); pass++) { - tmp2 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp2, tmp); - neon_store_reg(rd, pass, tmp2); + size = MO_16; + element = (insn >> 18) & 3; + } else { + size = MO_32; + element = (insn >> 19) & 1; } - tcg_temp_free_i32(tmp); + tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0), + neon_element_offset(rm, element, size), + q ? 16 : 8, q ? 16 : 8); } else { return 1; } @@ -7784,8 +8250,8 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */ int size = extract32(insn, 20, 1); data = extract32(insn, 23, 2); /* rot */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA) - || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) { + if (!dc_isar_feature(aa32_vcma, s) + || (!size && !dc_isar_feature(aa32_fp16_arith, s))) { return 1; } fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; @@ -7793,15 +8259,15 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */ int size = extract32(insn, 20, 1); data = extract32(insn, 24, 1); /* rot */ - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA) - || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) { + if (!dc_isar_feature(aa32_vcma, s) + || (!size && !dc_isar_feature(aa32_fp16_arith, s))) { return 1; } fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; } else if ((insn & 0xfeb00f00) == 0xfc200d00) { /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */ bool u = extract32(insn, 4, 1); - if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) { + if (!dc_isar_feature(aa32_dp, s)) { return 1; } fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b; @@ -7811,7 +8277,7 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } if (!s->vfp_enabled) { @@ -7863,11 +8329,11 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) int size = extract32(insn, 23, 1); int index; - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) { + if (!dc_isar_feature(aa32_vcma, s)) { return 1; } if (size == 0) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + if (!dc_isar_feature(aa32_fp16_arith, s)) { return 1; } /* For fp16, rm is just Vm, and index is M. */ @@ -7884,7 +8350,7 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) } else if ((insn & 0xffb00f00) == 0xfe200d00) { /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */ int u = extract32(insn, 4, 1); - if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) { + if (!dc_isar_feature(aa32_dp, s)) { return 1; } fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b; @@ -7897,7 +8363,7 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) if (s->fp_excp_el) { gen_exception_insn(s, 4, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); return 0; } if (!s->vfp_enabled) { @@ -8860,8 +9326,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED. * Bits 8, 10 and 11 should be zero. */ - if (!arm_dc_feature(s, ARM_FEATURE_CRC) || op1 == 0x3 || - (c & 0xd) != 0) { + if (!dc_isar_feature(aa32_crc32, s) || op1 == 0x3 || (c & 0xd) != 0) { goto illegal_op; } @@ -9729,7 +10194,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) case 1: case 3: /* SDIV, UDIV */ - if (!arm_dc_feature(s, ARM_FEATURE_ARM_DIV)) { + if (!dc_isar_feature(arm_div, s)) { goto illegal_op; } if (((insn >> 5) & 7) || (rd != 15)) { @@ -10261,6 +10726,8 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) * 0b1111_1001_x11x_xxxx_xxxx_xxxx_xxxx_xxxx * - load/store dual (pre-indexed) */ + bool wback = extract32(insn, 21, 1); + if (rn == 15) { if (insn & (1 << 21)) { /* UNPREDICTABLE */ @@ -10272,8 +10739,29 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) addr = load_reg(s, rn); } offset = (insn & 0xff) * 4; - if ((insn & (1 << 23)) == 0) + if ((insn & (1 << 23)) == 0) { offset = -offset; + } + + if (s->v8m_stackcheck && rn == 13 && wback) { + /* + * Here 'addr' is the current SP; if offset is +ve we're + * moving SP up, else down. It is UNKNOWN whether the limit + * check triggers when SP starts below the limit and ends + * up above it; check whichever of the current and final + * SP is lower, so QEMU will trigger in that situation. + */ + if ((int32_t)offset < 0) { + TCGv_i32 newsp = tcg_temp_new_i32(); + + tcg_gen_addi_i32(newsp, addr, offset); + gen_helper_v8m_stackcheck(cpu_env, newsp); + tcg_temp_free_i32(newsp); + } else { + gen_helper_v8m_stackcheck(cpu_env, addr); + } + } + if (insn & (1 << 24)) { tcg_gen_addi_i32(addr, addr, offset); offset = 0; @@ -10297,7 +10785,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) gen_aa32_st32(s, tmp, addr, get_mem_index(s)); tcg_temp_free_i32(tmp); } - if (insn & (1 << 21)) { + if (wback) { /* Base writeback. */ tcg_gen_addi_i32(addr, addr, offset - 4); store_reg(s, rn, addr); @@ -10484,6 +10972,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) } else { int i, loaded_base = 0; TCGv_i32 loaded_var; + bool wback = extract32(insn, 21, 1); /* Load/store multiple. */ addr = load_reg(s, rn); offset = 0; @@ -10491,10 +10980,26 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) if (insn & (1 << i)) offset += 4; } + if (insn & (1 << 24)) { tcg_gen_addi_i32(addr, addr, -offset); } + if (s->v8m_stackcheck && rn == 13 && wback) { + /* + * If the writeback is incrementing SP rather than + * decrementing it, and the initial SP is below the + * stack limit but the final written-back SP would + * be above, then then we must not perform any memory + * accesses, but it is IMPDEF whether we generate + * an exception. We choose to do so in this case. + * At this point 'addr' is the lowest address, so + * either the original SP (if incrementing) or our + * final SP (if decrementing), so that's what we check. + */ + gen_helper_v8m_stackcheck(cpu_env, addr); + } + loaded_var = NULL; for (i = 0; i < 16; i++) { if ((insn & (1 << i)) == 0) @@ -10522,7 +11027,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) if (loaded_base) { store_reg(s, rn, loaded_var); } - if (insn & (1 << 21)) { + if (wback) { /* Base register writeback. */ if (insn & (1 << 24)) { tcg_gen_addi_i32(addr, addr, -offset); @@ -10583,7 +11088,13 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2)) goto illegal_op; tcg_temp_free_i32(tmp2); - if (rd != 15) { + if (rd == 13 && + ((op == 2 && rn == 15) || + (op == 8 && rn == 13) || + (op == 13 && rn == 13))) { + /* MOV SP, ... or ADD SP, SP, ... or SUB SP, SP, ... */ + store_sp_checked(s, tmp); + } else if (rd != 15) { store_reg(s, rd, tmp); } else { tcg_temp_free_i32(tmp); @@ -10600,6 +11111,10 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) tmp2 = load_reg(s, rm); if ((insn & 0x70) != 0) goto illegal_op; + /* + * 0b1111_1010_0xxx_xxxx_1111_xxxx_0000_xxxx: + * - MOV, MOVS (register-shifted register), flagsetting + */ op = (insn >> 21) & 3; logic_cc = (insn & (1 << 20)) != 0; gen_arm_shift_reg(tmp, op, tmp2, logic_cc); @@ -10706,7 +11221,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) case 0x28: case 0x29: case 0x2a: - if (!arm_dc_feature(s, ARM_FEATURE_CRC)) { + if (!dc_isar_feature(aa32_crc32, s)) { goto illegal_op; } break; @@ -10887,7 +11402,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) tmp2 = load_reg(s, rm); if ((op & 0x50) == 0x10) { /* sdiv, udiv */ - if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DIV)) { + if (!dc_isar_feature(thumb_div, s)) { goto illegal_op; } if (op & 0x20) @@ -11267,8 +11782,15 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) gen_jmp(s, s->pc + offset); } } else { - /* Data processing immediate. */ + /* + * 0b1111_0xxx_xxxx_0xxx_xxxx_xxxx + * - Data-processing (modified immediate, plain binary immediate) + */ if (insn & (1 << 25)) { + /* + * 0b1111_0x1x_xxxx_0xxx_xxxx_xxxx + * - Data-processing (plain binary immediate) + */ if (insn & (1 << 24)) { if (insn & (1 << 20)) goto illegal_op; @@ -11364,6 +11886,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, imm); } + store_reg(s, rd, tmp); } else { /* Add/sub 12-bit immediate. */ if (rn == 15) { @@ -11374,17 +11897,27 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) offset += imm; tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, offset); + store_reg(s, rd, tmp); } else { tmp = load_reg(s, rn); if (insn & (1 << 23)) tcg_gen_subi_i32(tmp, tmp, imm); else tcg_gen_addi_i32(tmp, tmp, imm); + if (rn == 13 && rd == 13) { + /* ADD SP, SP, imm or SUB SP, SP, imm */ + store_sp_checked(s, tmp); + } else { + store_reg(s, rd, tmp); + } } } - store_reg(s, rd, tmp); } } else { + /* + * 0b1111_0x0x_xxxx_0xxx_xxxx_xxxx + * - Data-processing (modified immediate) + */ int shifter_out = 0; /* modified 12-bit immediate. */ shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12); @@ -11426,7 +11959,11 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) goto illegal_op; tcg_temp_free_i32(tmp2); rd = (insn >> 8) & 0xf; - if (rd != 15) { + if (rd == 13 && rn == 13 + && (op == 8 || op == 13)) { + /* ADD(S) SP, SP, imm or SUB(S) SP, SP, imm */ + store_sp_checked(s, tmp); + } else if (rd != 15) { store_reg(s, rd, tmp); } else { tcg_temp_free_i32(tmp); @@ -11535,7 +12072,6 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) imm = -imm; /* Fall through. */ case 0xf: /* Pre-increment. */ - tcg_gen_addi_i32(addr, addr, imm); writeback = 1; break; default: @@ -11547,6 +12083,28 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) issinfo = writeback ? ISSInvalid : rs; + if (s->v8m_stackcheck && rn == 13 && writeback) { + /* + * Stackcheck. Here we know 'addr' is the current SP; + * if imm is +ve we're moving SP up, else down. It is + * UNKNOWN whether the limit check triggers when SP starts + * below the limit and ends up above it; we chose to do so. + */ + if ((int32_t)imm < 0) { + TCGv_i32 newsp = tcg_temp_new_i32(); + + tcg_gen_addi_i32(newsp, addr, imm); + gen_helper_v8m_stackcheck(cpu_env, newsp); + tcg_temp_free_i32(newsp); + } else { + gen_helper_v8m_stackcheck(cpu_env, addr); + } + } + + if (writeback && !postinc) { + tcg_gen_addi_i32(addr, addr, imm); + } + if (insn & (1 << 20)) { /* Load. */ tmp = tcg_temp_new_i32(); @@ -11629,7 +12187,11 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) rd = insn & 7; op = (insn >> 11) & 3; if (op == 3) { - /* add/subtract */ + /* + * 0b0001_1xxx_xxxx_xxxx + * - Add, subtract (three low registers) + * - Add, subtract (two low registers and immediate) + */ rn = (insn >> 3) & 7; tmp = load_reg(s, rn); if (insn & (1 << 10)) { @@ -11666,7 +12228,10 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) } break; case 2: case 3: - /* arithmetic large immediate */ + /* + * 0b001x_xxxx_xxxx_xxxx + * - Add, subtract, compare, move (one low register and immediate) + */ op = (insn >> 11) & 3; rd = (insn >> 8) & 0x7; if (op == 0) { /* mov */ @@ -11732,7 +12297,12 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) tmp2 = load_reg(s, rm); tcg_gen_add_i32(tmp, tmp, tmp2); tcg_temp_free_i32(tmp2); - store_reg(s, rd, tmp); + if (rd == 13) { + /* ADD SP, SP, reg */ + store_sp_checked(s, tmp); + } else { + store_reg(s, rd, tmp); + } break; case 1: /* cmp */ tmp = load_reg(s, rd); @@ -11743,7 +12313,12 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) break; case 2: /* mov/cpy */ tmp = load_reg(s, rm); - store_reg(s, rd, tmp); + if (rd == 13) { + /* MOV SP, reg */ + store_sp_checked(s, tmp); + } else { + store_reg(s, rd, tmp); + } break; case 3: { @@ -11793,7 +12368,10 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) break; } - /* data processing register */ + /* + * 0b0100_00xx_xxxx_xxxx + * - Data-processing (two low registers) + */ rd = insn & 7; rm = (insn >> 3) & 7; op = (insn >> 6) & 0xf; @@ -12071,7 +12649,10 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) break; case 10: - /* add to high reg */ + /* + * 0b1010_xxxx_xxxx_xxxx + * - Add PC/SP (immediate) + */ rd = (insn >> 8) & 7; if (insn & (1 << 11)) { /* SP */ @@ -12091,13 +12672,17 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) op = (insn >> 8) & 0xf; switch (op) { case 0: - /* adjust stack pointer */ + /* + * 0b1011_0000_xxxx_xxxx + * - ADD (SP plus immediate) + * - SUB (SP minus immediate) + */ tmp = load_reg(s, 13); val = (insn & 0x7f) * 4; if (insn & (1 << 7)) val = -(int32_t)val; tcg_gen_addi_i32(tmp, tmp, val); - store_reg(s, 13, tmp); + store_sp_checked(s, tmp); break; case 2: /* sign/zero extend. */ @@ -12114,7 +12699,10 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) store_reg(s, rd, tmp); break; case 4: case 5: case 0xc: case 0xd: - /* push/pop */ + /* + * 0b1011_x10x_xxxx_xxxx + * - push/pop + */ addr = load_reg(s, 13); if (insn & (1 << 8)) offset = 4; @@ -12127,6 +12715,17 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn) if ((insn & (1 << 11)) == 0) { tcg_gen_addi_i32(addr, addr, -offset); } + + if (s->v8m_stackcheck) { + /* + * Here 'addr' is the lower of "old SP" and "new SP"; + * if this is a pop that starts below the limit and ends + * above it, it is UNKNOWN whether the limit check triggers; + * we choose to trigger. + */ + gen_helper_v8m_stackcheck(cpu_env, addr); + } + for (i = 0; i < 8; i++) { if (insn & (1 << i)) { if (insn & (1 << 11)) { @@ -12423,6 +13022,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) CPUARMState *env = cs->env_ptr; ARMCPU *cpu = arm_env_get_cpu(env); + dc->isar = &cpu->isar; dc->pc = dc->base.pc_first; dc->condjmp = 0; @@ -12451,6 +13051,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->v7m_handler_mode = ARM_TBFLAG_HANDLER(dc->base.tb->flags); dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) && regime_is_secure(env, dc->mmu_idx); + dc->v8m_stackcheck = ARM_TBFLAG_STACKCHECK(dc->base.tb->flags); dc->cp_regs = cpu->cp_regs; dc->features = env->features; @@ -12539,7 +13140,6 @@ static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu) tcg_gen_movi_i32(tmp, 0); store_cpu_field(tmp, condexec_bits); } - tcg_clear_temp_count(); } static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) @@ -12928,11 +13528,6 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) translator_loop(ops, &dc.base, cpu, tb); } -static const char *cpu_mode_names[16] = { - "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt", - "???", "???", "hyp", "und", "???", "???", "???", "sys" -}; - void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, int flags) { @@ -12998,7 +13593,7 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, psr & CPSR_V ? 'V' : '-', psr & CPSR_T ? 'T' : 'A', ns_status, - cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26); + aarch32_mode_name(psr), (psr & 0x10) ? 32 : 26); } if (flags & CPU_DUMP_FPU) { diff --git a/target/arm/translate.h b/target/arm/translate.h index 45f04244be..1550aa8bc7 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -7,6 +7,7 @@ /* internal defines */ typedef struct DisasContext { DisasContextBase base; + const ARMISARegisters *isar; target_ulong pc; target_ulong page_start; @@ -38,6 +39,7 @@ typedef struct DisasContext { int vec_stride; bool v7m_handler_mode; bool v8m_secure; /* true if v8M and we're in Secure mode */ + bool v8m_stackcheck; /* true if we need to perform v8M stack limit checks */ /* Immediate value in AArch32 SVC insn; must be set if is_jmp == DISAS_SWI * so that top level loop can generate correct syndrome information. */ @@ -189,4 +191,24 @@ static inline TCGv_i32 get_ahp_flag(void) return ret; } + +/* Vector operations shared between ARM and AArch64. */ +extern const GVecGen3 bsl_op; +extern const GVecGen3 bit_op; +extern const GVecGen3 bif_op; +extern const GVecGen3 mla_op[4]; +extern const GVecGen3 mls_op[4]; +extern const GVecGen3 cmtst_op[4]; +extern const GVecGen2i ssra_op[4]; +extern const GVecGen2i usra_op[4]; +extern const GVecGen2i sri_op[4]; +extern const GVecGen2i sli_op[4]; +void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); + +/* + * Forward to the isar_feature_* tests given a DisasContext pointer. + */ +#define dc_isar_feature(name, ctx) \ + ({ DisasContext *ctx_ = (ctx); isar_feature_##name(ctx_->isar); }) + #endif /* TARGET_ARM_TRANSLATE_H */ |